ih264e_process.c revision cb6a43532c0b863d46c82feb25b10dc8732a34f9
1/****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19*/ 20 21/** 22******************************************************************************* 23* @file 24* ih264e_process.c 25* 26* @brief 27* Contains functions for codec thread 28* 29* @author 30* Harish 31* 32* @par List of Functions: 33* - ih264e_generate_sps_pps() 34* - ih264e_init_entropy_ctxt() 35* - ih264e_entropy() 36* - ih264e_pack_header_data() 37* - ih264e_update_proc_ctxt() 38* - ih264e_init_proc_ctxt() 39* - ih264e_pad_recon_buffer() 40* - ih264e_dblk_pad_hpel_processing_n_mbs() 41* - ih264e_process() 42* - ih264e_set_rc_pic_params() 43* - ih264e_update_rc_post_enc() 44* - ih264e_process_thread() 45* 46* @remarks 47* None 48* 49******************************************************************************* 50*/ 51 52/*****************************************************************************/ 53/* File Includes */ 54/*****************************************************************************/ 55 56/* System include files */ 57#include <stdio.h> 58#include <stddef.h> 59#include <stdlib.h> 60#include <string.h> 61#include <limits.h> 62#include <assert.h> 63 64/* User include files */ 65#include "ih264_typedefs.h" 66#include "iv2.h" 67#include "ive2.h" 68#include "ih264_defs.h" 69#include "ih264_debug.h" 70#include "ime_distortion_metrics.h" 71#include "ime_defs.h" 72#include "ime_structs.h" 73#include "ih264_error.h" 74#include "ih264_structs.h" 75#include "ih264_trans_quant_itrans_iquant.h" 76#include "ih264_inter_pred_filters.h" 77#include "ih264_mem_fns.h" 78#include "ih264_padding.h" 79#include "ih264_intra_pred_filters.h" 80#include "ih264_deblk_edge_filters.h" 81#include "ih264_cabac_tables.h" 82#include "ih264_platform_macros.h" 83#include "ih264_macros.h" 84#include "ih264_buf_mgr.h" 85#include "ih264e_error.h" 86#include "ih264e_bitstream.h" 87#include "ih264_common_tables.h" 88#include "ih264_list.h" 89#include "ih264e_defs.h" 90#include "irc_cntrl_param.h" 91#include "irc_frame_info_collector.h" 92#include "ih264e_rate_control.h" 93#include "ih264e_cabac_structs.h" 94#include "ih264e_structs.h" 95#include "ih264e_cabac.h" 96#include "ih264e_process.h" 97#include "ithread.h" 98#include "ih264e_intra_modes_eval.h" 99#include "ih264e_encode_header.h" 100#include "ih264e_globals.h" 101#include "ih264e_config.h" 102#include "ih264e_trace.h" 103#include "ih264e_statistics.h" 104#include "ih264_cavlc_tables.h" 105#include "ih264e_cavlc.h" 106#include "ih264e_deblk.h" 107#include "ih264e_me.h" 108#include "ih264e_debug.h" 109#include "ih264e_master.h" 110#include "ih264e_utils.h" 111#include "irc_mem_req_and_acq.h" 112#include "irc_rate_control_api.h" 113#include "ih264e_platform_macros.h" 114#include "ime_statistics.h" 115 116 117/*****************************************************************************/ 118/* Function Definitions */ 119/*****************************************************************************/ 120 121/** 122****************************************************************************** 123* 124* @brief This function generates sps, pps set on request 125* 126* @par Description 127* When the encoder is set in header generation mode, the following function 128* is called. This generates sps and pps headers and returns the control back 129* to caller. 130* 131* @param[in] ps_codec 132* pointer to codec context 133* 134* @return success or failure error code 135* 136****************************************************************************** 137*/ 138IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec) 139{ 140 /* choose between ping-pong process buffer set */ 141 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1; 142 143 /* entropy ctxt */ 144 entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy; 145 146 /* Bitstream structure */ 147 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; 148 149 /* sps */ 150 sps_t *ps_sps = NULL; 151 152 /* pps */ 153 pps_t *ps_pps = NULL; 154 155 /* output buff */ 156 out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel]; 157 158 159 /********************************************************************/ 160 /* initialize the bit stream buffer */ 161 /********************************************************************/ 162 ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize); 163 164 /********************************************************************/ 165 /* BEGIN HEADER GENERATION */ 166 /********************************************************************/ 167 /*ps_codec->i4_pps_id ++;*/ 168 ps_codec->i4_pps_id %= MAX_PPS_CNT; 169 170 /*ps_codec->i4_sps_id ++;*/ 171 ps_codec->i4_sps_id %= MAX_SPS_CNT; 172 173 /* populate sps header */ 174 ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id; 175 ih264e_populate_sps(ps_codec, ps_sps); 176 177 /* populate pps header */ 178 ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id; 179 ih264e_populate_pps(ps_codec, ps_pps); 180 181 ps_entropy->i4_error_code = IH264E_SUCCESS; 182 183 /* generate sps */ 184 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps); 185 186 /* generate pps */ 187 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); 188 189 /* queue output buffer */ 190 ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset; 191 192 return ps_entropy->i4_error_code; 193} 194 195/** 196******************************************************************************* 197* 198* @brief initialize entropy context. 199* 200* @par Description: 201* Before invoking the call to perform to entropy coding the entropy context 202* associated with the job needs to be initialized. This involves the start 203* mb address, end mb address, slice index and the pointer to location at 204* which the mb residue info and mb header info are packed. 205* 206* @param[in] ps_proc 207* Pointer to the current process context 208* 209* @returns error status 210* 211* @remarks none 212* 213******************************************************************************* 214*/ 215IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc) 216{ 217 /* codec context */ 218 codec_t *ps_codec = ps_proc->ps_codec; 219 220 /* entropy ctxt */ 221 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; 222 223 /* start address */ 224 ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x; 225 226 /* end address */ 227 ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt; 228 229 /* slice index */ 230 ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add]; 231 232 /* sof */ 233 /* @ start of frame or start of a new slice, set sof flag */ 234 if (ps_entropy->i4_mb_start_add == 0) 235 { 236 ps_entropy->i4_sof = 1; 237 } 238 239 if (ps_entropy->i4_mb_x == 0) 240 { 241 /* packed mb coeff data */ 242 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + 243 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; 244 245 /* packed mb header data */ 246 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + 247 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; 248 } 249 250 return IH264E_SUCCESS; 251} 252 253/** 254******************************************************************************* 255* 256* @brief entry point for entropy coding 257* 258* @par Description 259* This function calls lower level functions to perform entropy coding for a 260* group (n rows) of mb's. After encoding 1 row of mb's, the function takes 261* back the control, updates the ctxt and calls lower level functions again. 262* This process is repeated till all the rows or group of mb's (which ever is 263* minimum) are coded 264* 265* @param[in] ps_proc 266* process context 267* 268* @returns error status 269* 270* @remarks 271* 272******************************************************************************* 273*/ 274 275IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) 276{ 277 /* codec context */ 278 codec_t *ps_codec = ps_proc->ps_codec; 279 280 /* entropy context */ 281 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; 282 283 /* cabac context */ 284 cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac; 285 286 /* sps */ 287 sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT); 288 289 /* pps */ 290 pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT); 291 292 /* slice header */ 293 slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT); 294 295 /* slice type */ 296 WORD32 i4_slice_type = ps_proc->i4_slice_type; 297 298 /* Bitstream structure */ 299 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; 300 301 /* output buff */ 302 out_buf_t s_out_buf; 303 304 /* proc map */ 305 UWORD8 *pu1_proc_map; 306 307 /* entropy map */ 308 UWORD8 *pu1_entropy_map_curr; 309 310 /* proc base idx */ 311 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1; 312 313 /* temp var */ 314 WORD32 i4_wd_mbs, i4_ht_mbs; 315 UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx; 316 WORD32 bitstream_start_offset, bitstream_end_offset; 317 /********************************************************************/ 318 /* BEGIN INIT */ 319 /********************************************************************/ 320 321 /* entropy encode start address */ 322 u4_mb_idx = ps_entropy->i4_mb_start_add; 323 324 /* entropy encode end address */ 325 u4_mb_end_idx = ps_entropy->i4_mb_end_add; 326 327 /* width in mbs */ 328 i4_wd_mbs = ps_entropy->i4_wd_mbs; 329 330 /* height in mbs */ 331 i4_ht_mbs = ps_entropy->i4_ht_mbs; 332 333 /* total mb cnt */ 334 u4_mb_cnt = i4_wd_mbs * i4_ht_mbs; 335 336 /* proc map */ 337 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; 338 339 /* entropy map */ 340 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; 341 342 /********************************************************************/ 343 /* @ start of frame / slice, */ 344 /* initialize the output buffer, */ 345 /* initialize the bit stream buffer, */ 346 /* check if sps and pps headers have to be generated, */ 347 /* populate and generate slice header */ 348 /********************************************************************/ 349 if (ps_entropy->i4_sof) 350 { 351 /********************************************************************/ 352 /* initialize the output buffer */ 353 /********************************************************************/ 354 s_out_buf = ps_codec->as_out_buf[ctxt_sel]; 355 356 /* is last frame to encode */ 357 s_out_buf.u4_is_last = ps_entropy->u4_is_last; 358 359 /* frame idx */ 360 s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high; 361 s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low; 362 363 /********************************************************************/ 364 /* initialize the bit stream buffer */ 365 /********************************************************************/ 366 ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize); 367 368 /********************************************************************/ 369 /* BEGIN HEADER GENERATION */ 370 /********************************************************************/ 371 if (1 == ps_entropy->i4_gen_header) 372 { 373 /* generate sps */ 374 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps); 375 376 /* generate pps */ 377 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); 378 379 /* reset i4_gen_header */ 380 ps_entropy->i4_gen_header = 0; 381 } 382 383 /* populate slice header */ 384 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps); 385 386 /* generate slice header */ 387 ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr, 388 ps_pps, ps_sps); 389 390 /* once start of frame / slice is done, you can reset it */ 391 /* it is the responsibility of the caller to set this flag */ 392 ps_entropy->i4_sof = 0; 393 394 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 395 { 396 BITSTREAM_BYTE_ALIGN(ps_bitstrm); 397 BITSTREAM_FLUSH(ps_bitstrm); 398 ih264e_init_cabac_ctxt(ps_entropy); 399 } 400 } 401 402 /* begin entropy coding for the mb set */ 403 while (u4_mb_idx < u4_mb_end_idx) 404 { 405 /* init ptrs/indices */ 406 if (ps_entropy->i4_mb_x == i4_wd_mbs) 407 { 408 ps_entropy->i4_mb_y++; 409 ps_entropy->i4_mb_x = 0; 410 411 /* packed mb coeff data */ 412 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + 413 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; 414 415 /* packed mb header data */ 416 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + 417 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; 418 419 /* proc map */ 420 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; 421 422 /* entropy map */ 423 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; 424 } 425 426 DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y); 427 ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x); 428 ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y); 429 430 /* wait until the curr mb is core coded */ 431 /* The wait for curr mb to be core coded is essential when entropy is launched 432 * as a separate job 433 */ 434 while (1) 435 { 436 volatile UWORD8 *pu1_buf1; 437 WORD32 idx = ps_entropy->i4_mb_x; 438 439 pu1_buf1 = pu1_proc_map + idx; 440 if (*pu1_buf1) 441 break; 442 ithread_yield(); 443 } 444 445 446 /* write mb layer */ 447 ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy); 448 /* Starting bitstream offset for header in bits */ 449 bitstream_start_offset = GET_NUM_BITS(ps_bitstrm); 450 451 /* set entropy map */ 452 pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1; 453 454 u4_mb_idx++; 455 ps_entropy->i4_mb_x++; 456 /* check for eof */ 457 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 458 { 459 if (ps_entropy->i4_mb_x < i4_wd_mbs) 460 { 461 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 462 } 463 } 464 465 if (ps_entropy->i4_mb_x == i4_wd_mbs) 466 { 467 /* if slices are enabled */ 468 if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS) 469 { 470 /* current slice index */ 471 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx; 472 473 /* slice map */ 474 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx; 475 476 /* No need to open a slice at end of frame. The current slice can be closed at the time 477 * of signaling eof flag. 478 */ 479 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx 480 != pu1_slice_idx[u4_mb_idx])) 481 { 482 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) 483 { /* mb skip run */ 484 if ((i4_slice_type != ISLICE) 485 && *ps_entropy->pi4_mb_skip_run) 486 { 487 if (*ps_entropy->pi4_mb_skip_run) 488 { 489 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run"); 490 *ps_entropy->pi4_mb_skip_run = 0; 491 } 492 } 493 /* put rbsp trailing bits for the previous slice */ 494 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); 495 } 496 else 497 { 498 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); 499 } 500 501 /* update slice header pointer */ 502 i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx]; 503 ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx; 504 ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT); 505 506 /* populate slice header */ 507 ps_entropy->i4_mb_start_add = u4_mb_idx; 508 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, 509 ps_sps); 510 511 /* generate slice header */ 512 ps_entropy->i4_error_code |= ih264e_generate_slice_header( 513 ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps); 514 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 515 { 516 BITSTREAM_BYTE_ALIGN(ps_bitstrm); 517 BITSTREAM_FLUSH(ps_bitstrm); 518 ih264e_init_cabac_ctxt(ps_entropy); 519 } 520 } 521 else 522 { 523 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag 524 && u4_mb_idx != u4_mb_cnt) 525 { 526 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 527 } 528 } 529 } 530 /* Dont execute any further instructions until store synchronization took place */ 531 DATA_SYNC(); 532 } 533 534 /* Ending bitstream offset for header in bits */ 535 bitstream_end_offset = GET_NUM_BITS(ps_bitstrm); 536 ps_entropy->u4_header_bits[i4_slice_type == PSLICE] += 537 bitstream_end_offset - bitstream_start_offset; 538 } 539 540 /* check for eof */ 541 if (u4_mb_idx == u4_mb_cnt) 542 { 543 /* set end of frame flag */ 544 ps_entropy->i4_eof = 1; 545 } 546 else 547 { 548 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag 549 && ps_codec->s_cfg.e_slice_mode 550 != IVE_SLICE_MODE_BLOCKS) 551 { 552 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 553 } 554 } 555 556 if (ps_entropy->i4_eof) 557 { 558 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) 559 { 560 /* mb skip run */ 561 if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) 562 { 563 if (*ps_entropy->pi4_mb_skip_run) 564 { 565 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, 566 ps_entropy->i4_error_code, "mb skip run"); 567 *ps_entropy->pi4_mb_skip_run = 0; 568 } 569 } 570 /* put rbsp trailing bits */ 571 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); 572 } 573 else 574 { 575 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); 576 } 577 578 /* update current frame stats to rc library */ 579 { 580 /* number of bytes to stuff */ 581 WORD32 i4_stuff_bytes; 582 583 /* update */ 584 i4_stuff_bytes = ih264e_update_rc_post_enc( 585 ps_codec, ctxt_sel, 586 (ps_proc->ps_codec->i4_poc == 0)); 587 588 /* cbr rc - house keeping */ 589 if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel]) 590 { 591 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0; 592 } 593 else if (i4_stuff_bytes) 594 { 595 /* add filler nal units */ 596 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes); 597 } 598 } 599 600 /* 601 *Frame number is to be incremented only if the current frame is a 602 * reference frame. After each successful frame encode, we increment 603 * frame number by 1 604 */ 605 if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel] 606 && ps_codec->u4_is_curr_frm_ref) 607 { 608 ps_codec->i4_frame_num++; 609 } 610 /********************************************************************/ 611 /* signal the output */ 612 /********************************************************************/ 613 ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = 614 ps_entropy->ps_bitstrm->u4_strm_buf_offset; 615 616 DEBUG("entropy status %x", ps_entropy->i4_error_code); 617 } 618 619 /* allow threads to dequeue entropy jobs */ 620 ps_codec->au4_entropy_thread_active[ctxt_sel] = 0; 621 622 return ps_entropy->i4_error_code; 623} 624 625/** 626******************************************************************************* 627* 628* @brief Packs header information of a mb in to a buffer 629* 630* @par Description: 631* After the deciding the mode info of a macroblock, the syntax elements 632* associated with the mb are packed and stored. The entropy thread unpacks 633* this buffer and generates the end bit stream. 634* 635* @param[in] ps_proc 636* Pointer to the current process context 637* 638* @returns error status 639* 640* @remarks none 641* 642******************************************************************************* 643*/ 644IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc) 645{ 646 /* curr mb type */ 647 UWORD32 u4_mb_type = ps_proc->u4_mb_type; 648 649 /* pack mb syntax layer of curr mb (used for entropy coding) */ 650 if (u4_mb_type == I4x4) 651 { 652 /* pointer to mb header storage space */ 653 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 654 655 /* temp var */ 656 WORD32 i4, byte; 657 658 /* mb type plus mode */ 659 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type; 660 661 /* cbp */ 662 *pu1_ptr++ = ps_proc->u4_cbp; 663 664 /* mb qp delta */ 665 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 666 667 /* sub mb modes */ 668 for (i4 = 0; i4 < 16; i4 ++) 669 { 670 byte = 0; 671 672 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == 673 ps_proc->au1_intra_luma_mb_4x4_modes[i4]) 674 { 675 byte |= 1; 676 } 677 else 678 { 679 680 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < 681 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) 682 { 683 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1); 684 } 685 else 686 { 687 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1; 688 } 689 } 690 691 i4++; 692 693 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == 694 ps_proc->au1_intra_luma_mb_4x4_modes[i4]) 695 { 696 byte |= 16; 697 } 698 else 699 { 700 701 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < 702 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) 703 { 704 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5); 705 } 706 else 707 { 708 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5; 709 } 710 } 711 712 *pu1_ptr++ = byte; 713 } 714 715 /* end of mb layer */ 716 ps_proc->pv_mb_header_data = pu1_ptr; 717 } 718 else if (u4_mb_type == I16x16) 719 { 720 /* pointer to mb header storage space */ 721 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 722 723 /* mb type plus mode */ 724 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type; 725 726 /* cbp */ 727 *pu1_ptr++ = ps_proc->u4_cbp; 728 729 /* mb qp delta */ 730 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 731 732 /* end of mb layer */ 733 ps_proc->pv_mb_header_data = pu1_ptr; 734 } 735 else if (u4_mb_type == P16x16) 736 { 737 /* pointer to mb header storage space */ 738 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 739 740 WORD16 *i2_mv_ptr; 741 742 /* mb type plus mode */ 743 *pu1_ptr++ = u4_mb_type; 744 745 /* cbp */ 746 *pu1_ptr++ = ps_proc->u4_cbp; 747 748 /* mb qp delta */ 749 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 750 751 i2_mv_ptr = (WORD16 *)pu1_ptr; 752 753 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; 754 755 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; 756 757 /* end of mb layer */ 758 ps_proc->pv_mb_header_data = i2_mv_ptr; 759 } 760 else if (u4_mb_type == PSKIP) 761 { 762 /* pointer to mb header storage space */ 763 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 764 765 /* mb type plus mode */ 766 *pu1_ptr++ = u4_mb_type; 767 768 /* end of mb layer */ 769 ps_proc->pv_mb_header_data = pu1_ptr; 770 } 771 else if(u4_mb_type == B16x16) 772 { 773 774 /* pointer to mb header storage space */ 775 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 776 777 WORD16 *i2_mv_ptr; 778 779 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; 780 781 /* mb type plus mode */ 782 *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type; 783 784 /* cbp */ 785 *pu1_ptr++ = ps_proc->u4_cbp; 786 787 /* mb qp delta */ 788 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 789 790 /* l0 & l1 me data */ 791 i2_mv_ptr = (WORD16 *)pu1_ptr; 792 793 if (u4_pred_mode != PRED_L1) 794 { 795 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx 796 - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; 797 798 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy 799 - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; 800 } 801 if (u4_pred_mode != PRED_L0) 802 { 803 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx 804 - ps_proc->ps_pred_mv[1].s_mv.i2_mvx; 805 806 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy 807 - ps_proc->ps_pred_mv[1].s_mv.i2_mvy; 808 } 809 810 /* end of mb layer */ 811 ps_proc->pv_mb_header_data = i2_mv_ptr; 812 813 } 814 else if(u4_mb_type == BDIRECT) 815 { 816 /* pointer to mb header storage space */ 817 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 818 819 /* mb type plus mode */ 820 *pu1_ptr++ = u4_mb_type; 821 822 /* cbp */ 823 *pu1_ptr++ = ps_proc->u4_cbp; 824 825 /* mb qp delta */ 826 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 827 828 ps_proc->pv_mb_header_data = pu1_ptr; 829 830 } 831 else if(u4_mb_type == BSKIP) 832 { 833 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; 834 835 /* pointer to mb header storage space */ 836 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 837 838 /* mb type plus mode */ 839 *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type; 840 841 /* end of mb layer */ 842 ps_proc->pv_mb_header_data = pu1_ptr; 843 } 844 845 return IH264E_SUCCESS; 846} 847 848/** 849******************************************************************************* 850* 851* @brief update process context after encoding an mb. This involves preserving 852* the current mb information for later use, initialize the proc ctxt elements to 853* encode next mb. 854* 855* @par Description: 856* This function performs house keeping tasks after encoding an mb. 857* After encoding an mb, various elements of the process context needs to be 858* updated to encode the next mb. For instance, the source, recon and reference 859* pointers, mb indices have to be adjusted to the next mb. The slice index of 860* the current mb needs to be updated. If mb qp modulation is enabled, then if 861* the qp changes the quant param structure needs to be updated. Also to encoding 862* the next mb, the current mb info is used as part of mode prediction or mv 863* prediction. Hence the current mb info has to preserved at top/top left/left 864* locations. 865* 866* @param[in] ps_proc 867* Pointer to the current process context 868* 869* @returns none 870* 871* @remarks none 872* 873******************************************************************************* 874*/ 875WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc) 876{ 877 /* error status */ 878 WORD32 error_status = IH264_SUCCESS; 879 880 /* codec context */ 881 codec_t *ps_codec = ps_proc->ps_codec; 882 883 /* curr mb indices */ 884 WORD32 i4_mb_x = ps_proc->i4_mb_x; 885 WORD32 i4_mb_y = ps_proc->i4_mb_y; 886 887 /* mb syntax elements of neighbors */ 888 mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele; 889 mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x; 890 mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele; 891 892 /* curr mb type */ 893 UWORD32 u4_mb_type = ps_proc->u4_mb_type; 894 895 /* curr mb type */ 896 UWORD32 u4_is_intra = ps_proc->u4_is_intra; 897 898 /* width in mbs */ 899 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; 900 901 /*height in mbs*/ 902 WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs; 903 904 /* proc map */ 905 UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs); 906 907 /* deblk context */ 908 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 909 910 /* deblk bs context */ 911 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); 912 913 /* top row motion vector info */ 914 enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x; 915 916 /* top left mb motion vector */ 917 enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu; 918 919 /* left mb motion vector */ 920 enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu; 921 922 /* sub mb modes */ 923 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4); 924 925 /*************************************************************/ 926 /* During MV prediction, when top right mb is not available, */ 927 /* top left mb info. is used for prediction. Hence the curr */ 928 /* top, which will be top left for the next mb needs to be */ 929 /* preserved before updating it with curr mb info. */ 930 /*************************************************************/ 931 932 /* mb type, mb class, csbp */ 933 *ps_top_left_syn = *ps_top_syn; 934 935 if (ps_proc->i4_slice_type != ISLICE) 936 { 937 /*****************************************/ 938 /* update top left with top info results */ 939 /*****************************************/ 940 /* mv */ 941 *ps_top_left_mb_pu = *ps_top_row_pu; 942 } 943 944 /*************************************************/ 945 /* update top and left with curr mb info results */ 946 /*************************************************/ 947 948 /* mb type */ 949 ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type; 950 951 /* mb class */ 952 ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra; 953 954 /* csbp */ 955 ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp; 956 957 /* distortion */ 958 ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion; 959 960 if (u4_is_intra) 961 { 962 /* mb / sub mb modes */ 963 if (I16x16 == u4_mb_type) 964 { 965 pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode; 966 } 967 else if (I4x4 == u4_mb_type) 968 { 969 ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); 970 ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); 971 } 972 else if (I8x8 == u4_mb_type) 973 { 974 memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); 975 memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); 976 } 977 978 if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE)) 979 { 980 /* mv */ 981 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); 982 } 983 984 *ps_proc->pu4_mb_pu_cnt = 1; 985 } 986 else 987 { 988 /* mv */ 989 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); 990 } 991 992 /* 993 * Mark that the MB has been coded intra 994 * So that future AIRs can skip it 995 */ 996 ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra; 997 998 /**************************************************/ 999 /* pack mb header info. for entropy coding */ 1000 /**************************************************/ 1001 ih264e_pack_header_data(ps_proc); 1002 1003 /* update previous mb qp */ 1004 ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp; 1005 1006 /* store qp */ 1007 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; 1008 1009 /* 1010 * We need to sync the cache to make sure that the nmv content of proc 1011 * is updated to cache properly 1012 */ 1013 DATA_SYNC(); 1014 1015 /* Just before finishing the row, enqueue the job in to entropy queue. 1016 * The master thread depending on its convenience shall dequeue it and 1017 * performs entropy. 1018 * 1019 * WARN !! Placing this block post proc map update can cause queuing of 1020 * entropy jobs in out of order. 1021 */ 1022 if (i4_mb_x == i4_wd_mbs - 1) 1023 { 1024 /* job structures */ 1025 job_t s_job; 1026 1027 /* job class */ 1028 s_job.i4_cmd = CMD_ENTROPY; 1029 1030 /* number of mbs to be processed in the current job */ 1031 s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs; 1032 1033 /* job start index x */ 1034 s_job.i2_mb_x = 0; 1035 1036 /* job start index y */ 1037 s_job.i2_mb_y = ps_proc->i4_mb_y; 1038 1039 /* proc base idx */ 1040 s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt & 1) ? (MAX_PROCESS_CTXT / 2): 0 ; 1041 1042 /* queue the job */ 1043 error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1); 1044 1045 if(ps_proc->i4_mb_y == (i4_ht_mbs - 1)) 1046 ih264_list_terminate(ps_codec->pv_entropy_jobq); 1047 } 1048 1049 /* update proc map */ 1050 pu1_proc_map[i4_mb_x] = 1; 1051 1052 /**************************************************/ 1053 /* update proc ctxt elements for encoding next mb */ 1054 /**************************************************/ 1055 /* update indices */ 1056 i4_mb_x ++; 1057 ps_proc->i4_mb_x = i4_mb_x; 1058 1059 if (ps_proc->i4_mb_x == i4_wd_mbs) 1060 { 1061 ps_proc->i4_mb_y++; 1062 ps_proc->i4_mb_x = 0; 1063 } 1064 1065 /* update slice index */ 1066 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x]; 1067 1068 /* update buffers pointers */ 1069 ps_proc->pu1_src_buf_luma += MB_SIZE; 1070 ps_proc->pu1_rec_buf_luma += MB_SIZE; 1071 ps_proc->apu1_ref_buf_luma[0] += MB_SIZE; 1072 ps_proc->apu1_ref_buf_luma[1] += MB_SIZE; 1073 1074 /* 1075 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, 1076 * the stride per MB is MB_SIZE 1077 */ 1078 ps_proc->pu1_src_buf_chroma += MB_SIZE; 1079 ps_proc->pu1_rec_buf_chroma += MB_SIZE; 1080 ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE; 1081 ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE; 1082 1083 1084 1085 /* Reset cost, distortion params */ 1086 ps_proc->i4_mb_cost = INT_MAX; 1087 ps_proc->i4_mb_distortion = SHRT_MAX; 1088 1089 ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt; 1090 1091 ps_proc->pu4_mb_pu_cnt += 1; 1092 1093 /* Update colocated pu */ 1094 if (ps_proc->i4_slice_type == BSLICE) 1095 ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x); 1096 1097 /* deblk ctxts */ 1098 if (ps_proc->u4_disable_deblock_level != 1) 1099 { 1100 /* indices */ 1101 ps_bs->i4_mb_x = ps_proc->i4_mb_x; 1102 ps_bs->i4_mb_y = ps_proc->i4_mb_y; 1103 1104#ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */ 1105 ps_deblk->i4_mb_x ++; 1106 1107 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1108 /* 1109 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, 1110 * the stride per MB is MB_SIZE 1111 */ 1112 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1113#endif 1114 } 1115 1116 return error_status; 1117} 1118 1119/** 1120******************************************************************************* 1121* 1122* @brief initialize process context. 1123* 1124* @par Description: 1125* Before dispatching the current job to process thread, the process context 1126* associated with the job is initialized. Usually every job aims to encode one 1127* row of mb's. Basing on the row indices provided by the job, the process 1128* context's buffer ptrs, slice indices and other elements that are necessary 1129* during core-coding are initialized. 1130* 1131* @param[in] ps_proc 1132* Pointer to the current process context 1133* 1134* @returns error status 1135* 1136* @remarks none 1137* 1138******************************************************************************* 1139*/ 1140IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc) 1141{ 1142 /* codec context */ 1143 codec_t *ps_codec = ps_proc->ps_codec; 1144 1145 /* nmb processing context*/ 1146 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; 1147 1148 /* indices */ 1149 WORD32 i4_mb_x, i4_mb_y; 1150 1151 /* strides */ 1152 WORD32 i4_src_strd = ps_proc->i4_src_strd; 1153 WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd; 1154 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1155 1156 /* quant params */ 1157 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; 1158 1159 /* deblk ctxt */ 1160 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 1161 1162 /* deblk bs context */ 1163 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); 1164 1165 /* Pointer to mv_buffer of current frame */ 1166 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; 1167 1168 /* Pointers for color space conversion */ 1169 UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base; 1170 1171 /* Pad the MB to support non standard sizes */ 1172 UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd; 1173 UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht; 1174 UWORD16 u2_num_rows = MB_SIZE; 1175 WORD32 convert_uv_only; 1176 1177 /********************************************************************/ 1178 /* BEGIN INIT */ 1179 /********************************************************************/ 1180 1181 i4_mb_x = ps_proc->i4_mb_x; 1182 i4_mb_y = ps_proc->i4_mb_y; 1183 1184 /* Number of mbs processed in one loop of process function */ 1185 ps_proc->i4_nmb_ntrpy = (ps_proc->i4_wd_mbs > MAX_NMB) ? MAX_NMB : ps_proc->i4_wd_mbs; 1186 ps_proc->u4_nmb_me = (ps_proc->i4_wd_mbs > MAX_NMB)? MAX_NMB : ps_proc->i4_wd_mbs; 1187 1188 /* init buffer pointers */ 1189 convert_uv_only = 1; 1190 if ((u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)) || 1191 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE) 1192 { 1193 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1194 u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz; 1195 ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base; 1196 i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd; 1197 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE); 1198 convert_uv_only = 0; 1199 } 1200 else 1201 { 1202 i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0]; 1203 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE); 1204 } 1205 1206 1207 if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE || 1208 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P || 1209 ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1)) 1210 { 1211 if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) || 1212 (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU)) 1213 ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base; 1214 1215 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE); 1216 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd; 1217 } 1218 else 1219 { 1220 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1]; 1221 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE); 1222 } 1223 1224 ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1225 ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1226 1227 /* Tempral back and forward reference buffer */ 1228 ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1229 ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1230 ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1231 ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1232 1233 /* 1234 * Do color space conversion 1235 * NOTE : We assume there that the number of MB's to process will not span multiple rows 1236 */ 1237 switch (ps_codec->s_cfg.e_inp_color_fmt) 1238 { 1239 case IV_YUV_420SP_UV: 1240 case IV_YUV_420SP_VU: 1241 /* In case of 420 semi-planar input, copy last few rows to intermediate 1242 buffer as chroma trans functions access one extra byte due to interleaved input. 1243 This data will be padded if required */ 1244 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1)) 1245 { 1246 WORD32 num_rows = ps_codec->s_cfg.u4_disp_ht & 0xF; 1247 UWORD8 *pu1_src; 1248 UWORD8 *pu1_dst; 1249 WORD32 i; 1250 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) + 1251 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1252 1253 pu1_dst = ps_proc->pu1_src_buf_luma; 1254 1255 for (i = 0; i < num_rows; i++) 1256 { 1257 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd); 1258 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0]; 1259 pu1_dst += ps_proc->i4_src_strd; 1260 } 1261 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) + 1262 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE); 1263 pu1_dst = ps_proc->pu1_src_buf_chroma; 1264 1265 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte 1266 * due to interleaved input 1267 */ 1268 num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE); 1269 for (i = 0; i < num_rows; i++) 1270 { 1271 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd); 1272 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1]; 1273 pu1_dst += ps_proc->i4_src_chroma_strd; 1274 } 1275 1276 } 1277 break; 1278 1279 case IV_YUV_420P : 1280 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) + 1281 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1282 1283 pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) + 1284 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE); 1285 1286 pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) + 1287 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE); 1288 1289 ps_codec->pf_ih264e_conv_420p_to_420sp( 1290 pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base, 1291 ps_proc->pu1_src_buf_luma, 1292 ps_proc->pu1_src_buf_chroma, u2_num_rows, 1293 ps_codec->s_cfg.u4_disp_wd, 1294 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0], 1295 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1], 1296 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2], 1297 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd, 1298 convert_uv_only); 1299 break; 1300 1301 case IV_YUV_422ILE : 1302 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2) 1303 + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1304 1305 ps_codec->pf_ih264e_fmt_conv_422i_to_420sp( 1306 ps_proc->pu1_src_buf_luma, 1307 ps_proc->pu1_src_buf_chroma, 1308 ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base, 1309 ps_codec->s_cfg.u4_disp_wd, u2_num_rows, 1310 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd, 1311 ps_proc->i4_src_chroma_strd, 1312 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1); 1313 break; 1314 1315 default: 1316 break; 1317 } 1318 1319 if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0) && 1320 (ps_proc->i4_src_strd > (WORD32)ps_codec->s_cfg.u4_disp_wd) ) 1321 { 1322 UWORD32 u4_pad_wd, u4_pad_ht; 1323 u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd); 1324 u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd); 1325 u4_pad_ht = MB_SIZE; 1326 if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1327 u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz)); 1328 1329 ih264_pad_right_luma( 1330 ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd, 1331 ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd); 1332 1333 ih264_pad_right_chroma( 1334 ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd, 1335 ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd); 1336 } 1337 1338 /* pad bottom edge */ 1339 if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0) 1340 { 1341 ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd, 1342 ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz); 1343 1344 ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2, 1345 ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2)); 1346 } 1347 1348 1349 /* packed mb coeff data */ 1350 ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data; 1351 1352 /* packed mb header data */ 1353 ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data; 1354 1355 /* slice index */ 1356 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x]; 1357 1358 /*********************************************************************/ 1359 /* ih264e_init_quant_params() routine is called at the pic init level*/ 1360 /* this would have initialized the qp. */ 1361 /* TODO_LATER: currently it is assumed that quant params donot change*/ 1362 /* across mb's. When they do calculate update ps_qp_params accordingly*/ 1363 /*********************************************************************/ 1364 1365 /* init mv buffer ptr */ 1366 ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE)); 1367 1368 /* Init co-located mv buffer */ 1369 ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE)); 1370 1371 if (i4_mb_y == 0) 1372 { 1373 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu; 1374 } 1375 else 1376 { 1377 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE)); 1378 } 1379 1380 ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs); 1381 1382 /* mb type */ 1383 ps_proc->u4_mb_type = I16x16; 1384 1385 /* lambda */ 1386 ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp]; 1387 1388 /* mb distortion */ 1389 ps_proc->i4_mb_distortion = SHRT_MAX; 1390 1391 if (i4_mb_x == 0) 1392 { 1393 ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0; 1394 1395 ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0; 1396 1397 ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0; 1398 1399 if (i4_mb_y == 0) 1400 { 1401 memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t)); 1402 } 1403 } 1404 1405 /* mb cost */ 1406 ps_proc->i4_mb_cost = INT_MAX; 1407 1408 /**********************/ 1409 /* init deblk context */ 1410 /**********************/ 1411 ps_deblk->i4_mb_x = ps_proc->i4_mb_x; 1412 /* deblk lags the current mb proc by 1 row */ 1413 /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */ 1414 /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */ 1415 /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */ 1416 ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1; 1417 1418 /* buffer ptrs */ 1419 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE); 1420 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE); 1421 1422 /* init deblk bs context */ 1423 /* mb indices */ 1424 ps_bs->i4_mb_x = ps_proc->i4_mb_x; 1425 ps_bs->i4_mb_y = ps_proc->i4_mb_y; 1426 1427 /* init n_mb_process context */ 1428 ps_n_mb_ctxt->i4_mb_x = 0; 1429 ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y; 1430 ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy; 1431 1432 return IH264E_SUCCESS; 1433} 1434 1435/** 1436******************************************************************************* 1437* 1438* @brief This function performs luma & chroma padding 1439* 1440* @par Description: 1441* 1442* @param[in] ps_proc 1443* Process context corresponding to the job 1444* 1445* @param[in] pu1_curr_pic_luma 1446* Pointer to luma buffer 1447* 1448* @param[in] pu1_curr_pic_chroma 1449* Pointer to chroma buffer 1450* 1451* @param[in] i4_mb_x 1452* mb index x 1453* 1454* @param[in] i4_mb_y 1455* mb index y 1456* 1457* @param[in] i4_pad_ht 1458* number of rows to be padded 1459* 1460* @returns error status 1461* 1462* @remarks none 1463* 1464******************************************************************************* 1465*/ 1466IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc, 1467 UWORD8 *pu1_curr_pic_luma, 1468 UWORD8 *pu1_curr_pic_chroma, 1469 WORD32 i4_mb_x, 1470 WORD32 i4_mb_y, 1471 WORD32 i4_pad_ht) 1472{ 1473 /* codec context */ 1474 codec_t *ps_codec = ps_proc->ps_codec; 1475 1476 /* strides */ 1477 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1478 1479 if (i4_mb_x == 0) 1480 { 1481 /* padding left luma */ 1482 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT); 1483 1484 /* padding left chroma */ 1485 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT); 1486 } 1487 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1488 { 1489 /* padding right luma */ 1490 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT); 1491 1492 /* padding right chroma */ 1493 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT); 1494 1495 if (i4_mb_y == ps_proc->i4_ht_mbs - 1) 1496 { 1497 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd); 1498 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd); 1499 1500 /* padding bottom luma */ 1501 ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT); 1502 1503 /* padding bottom chroma */ 1504 ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); 1505 } 1506 } 1507 1508 if (i4_mb_y == 0) 1509 { 1510 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma; 1511 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma; 1512 WORD32 wd = MB_SIZE; 1513 1514 if (i4_mb_x == 0) 1515 { 1516 pu1_rec_luma -= PAD_LEFT; 1517 pu1_rec_chroma -= PAD_LEFT; 1518 1519 wd += PAD_LEFT; 1520 } 1521 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1522 { 1523 wd += PAD_RIGHT; 1524 } 1525 1526 /* padding top luma */ 1527 ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP); 1528 1529 /* padding top chroma */ 1530 ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1)); 1531 } 1532 1533 return IH264E_SUCCESS; 1534} 1535 1536 1537 1538 1539/** 1540******************************************************************************* 1541* 1542* @brief This function performs deblocking, padding and halfpel generation for 1543* 'n' MBs 1544* 1545* @par Description: 1546* 1547* @param[in] ps_proc 1548* Process context corresponding to the job 1549* 1550* @param[in] pu1_curr_pic_luma 1551* Current MB being processed(Luma) 1552* 1553* @param[in] pu1_curr_pic_chroma 1554* Current MB being processed(Chroma) 1555* 1556* @param[in] i4_mb_x 1557* Column value of current MB processed 1558* 1559* @param[in] i4_mb_y 1560* Curent row processed 1561* 1562* @returns error status 1563* 1564* @remarks none 1565* 1566******************************************************************************* 1567*/ 1568IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc, 1569 UWORD8 *pu1_curr_pic_luma, 1570 UWORD8 *pu1_curr_pic_chroma, 1571 WORD32 i4_mb_x, 1572 WORD32 i4_mb_y) 1573{ 1574 /* codec context */ 1575 codec_t *ps_codec = ps_proc->ps_codec; 1576 1577 /* n_mb processing context */ 1578 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; 1579 1580 /* deblk context */ 1581 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 1582 1583 /* strides */ 1584 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1585 1586 /* loop variables */ 1587 WORD32 row, i, j, col; 1588 1589 /* Padding Width */ 1590 UWORD32 u4_pad_wd; 1591 1592 /* deblk_map of the row being deblocked */ 1593 UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs; 1594 1595 /* deblk_map_previous row */ 1596 UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs; 1597 1598 WORD32 u4_pad_top = 0; 1599 1600 WORD32 u4_deblk_prev_row = 0; 1601 1602 /* Number of mbs to be processed */ 1603 WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs; 1604 1605 /* Number of mbs actually processed 1606 * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */ 1607 WORD32 i4_n_mb_process_count = 0; 1608 1609 UWORD8 *pu1_pad_bottom_src = NULL; 1610 1611 UWORD8 *pu1_pad_src_luma = NULL; 1612 UWORD8 *pu1_pad_src_chroma = NULL; 1613 1614 if (ps_proc->u4_disable_deblock_level == 1) 1615 { 1616 /* If left most MB is processed, then pad left */ 1617 if (i4_mb_x == 0) 1618 { 1619 /* padding left luma */ 1620 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1621 1622 /* padding left chroma */ 1623 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); 1624 } 1625 /*last col*/ 1626 if (i4_mb_x == (ps_proc->i4_wd_mbs - 1)) 1627 { 1628 /* padding right luma */ 1629 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1630 1631 /* padding right chroma */ 1632 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); 1633 } 1634 } 1635 1636 if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1))) 1637 { 1638 /* if number of mb's to be processed are less than 'N', go back. 1639 * exception to the above clause is end of row */ 1640 if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) ) 1641 { 1642 return IH264E_SUCCESS; 1643 } 1644 else 1645 { 1646 i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs); 1647 1648 /* performing deblocking for required number of MBs */ 1649 if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1)) 1650 { 1651 u4_deblk_prev_row = 1; 1652 1653 /* checking whether the top rows are deblocked */ 1654 for (col = 0; col < i4_n_mb_process_count; col++) 1655 { 1656 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col]; 1657 } 1658 1659 /* checking whether the top right MB is deblocked */ 1660 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs) 1661 { 1662 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count]; 1663 } 1664 1665 /* Top or Top right MBs not deblocked */ 1666 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0)) 1667 { 1668 return IH264E_SUCCESS; 1669 } 1670 1671 for (row = 0; row < i4_n_mb_process_count; row++) 1672 { 1673 ih264e_deblock_mb(ps_proc, ps_deblk); 1674 1675 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1676 1677 if (ps_deblk->i4_mb_y > 0) 1678 { 1679 if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/ 1680 { 1681 /* padding left luma */ 1682 ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT); 1683 1684 /* padding left chroma */ 1685 ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); 1686 } 1687 1688 if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/ 1689 { 1690 /* padding right luma */ 1691 ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1692 1693 /* padding right chroma */ 1694 ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); 1695 } 1696 } 1697 ps_deblk->i4_mb_x++; 1698 1699 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1700 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1701 1702 } 1703 } 1704 else if(i4_mb_y > 0) 1705 { 1706 ps_deblk->i4_mb_x += i4_n_mb_process_count; 1707 1708 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE; 1709 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE; 1710 } 1711 1712 if (i4_mb_y == 2) 1713 { 1714 u4_pad_wd = i4_n_mb_process_count * MB_SIZE; 1715 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE; 1716 1717 if (ps_n_mb_ctxt->i4_mb_x == 0) 1718 { 1719 u4_pad_wd += PAD_LEFT; 1720 u4_pad_top = -PAD_LEFT; 1721 } 1722 1723 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1724 { 1725 u4_pad_wd += PAD_RIGHT; 1726 } 1727 1728 /* padding top luma */ 1729 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP); 1730 1731 /* padding top chroma */ 1732 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1)); 1733 } 1734 1735 ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count; 1736 1737 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1738 { 1739 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1740 { 1741 /* Bottom Padding is done in one stretch for the entire width */ 1742 if (ps_proc->u4_disable_deblock_level != 1) 1743 { 1744 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE; 1745 1746 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE; 1747 1748 ps_n_mb_ctxt->i4_mb_x = 0; 1749 ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y; 1750 ps_deblk->i4_mb_x = 0; 1751 ps_deblk->i4_mb_y = ps_proc->i4_mb_y; 1752 1753 /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */ 1754 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; 1755 1756 i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs; 1757 1758 j = (ps_proc->i4_wd_mbs) / i4_n_mbs; 1759 1760 for (i = 0; i < j; i++) 1761 { 1762 for (col = 0; col < i4_n_mbs; col++) 1763 { 1764 ih264e_deblock_mb(ps_proc, ps_deblk); 1765 1766 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1767 1768 ps_deblk->i4_mb_x++; 1769 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1770 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1771 ps_n_mb_ctxt->i4_mb_x++; 1772 } 1773 } 1774 1775 for (col = 0; col < i4_n_mb_process_count; col++) 1776 { 1777 ih264e_deblock_mb(ps_proc, ps_deblk); 1778 1779 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1780 1781 ps_deblk->i4_mb_x++; 1782 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1783 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1784 ps_n_mb_ctxt->i4_mb_x++; 1785 } 1786 1787 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd; 1788 1789 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd; 1790 1791 /* padding left luma */ 1792 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1793 1794 /* padding left chroma */ 1795 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); 1796 1797 pu1_pad_src_luma += i4_rec_strd * MB_SIZE; 1798 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; 1799 1800 /* padding left luma */ 1801 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1802 1803 /* padding left chroma */ 1804 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); 1805 1806 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; 1807 1808 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; 1809 1810 /* padding right luma */ 1811 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1812 1813 /* padding right chroma */ 1814 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); 1815 1816 pu1_pad_src_luma += i4_rec_strd * MB_SIZE; 1817 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; 1818 1819 /* padding right luma */ 1820 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1821 1822 /* padding right chroma */ 1823 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); 1824 1825 } 1826 1827 /* In case height is less than 2 MBs pad top */ 1828 if (ps_proc->i4_ht_mbs <= 2) 1829 { 1830 UWORD8 *pu1_pad_top_src; 1831 /* padding top luma */ 1832 pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT; 1833 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP); 1834 1835 /* padding top chroma */ 1836 pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT; 1837 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1)); 1838 } 1839 1840 /* padding bottom luma */ 1841 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT; 1842 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT); 1843 1844 /* padding bottom chroma */ 1845 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT; 1846 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); 1847 } 1848 } 1849 } 1850 } 1851 1852 return IH264E_SUCCESS; 1853} 1854 1855 1856/** 1857******************************************************************************* 1858* 1859* @brief This function performs luma & chroma core coding for a set of mb's. 1860* 1861* @par Description: 1862* The mb to be coded is taken and is evaluated over a predefined set of modes 1863* (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost 1864* is selected and using intra/inter prediction filters, prediction is carried out. 1865* The deviation between src and pred signal constitutes error signal. This error 1866* signal is transformed (hierarchical transform if necessary) and quantized. The 1867* quantized residue is packed in to entropy buffer for entropy coding. This is 1868* repeated for all the mb's enlisted under the job. 1869* 1870* @param[in] ps_proc 1871* Process context corresponding to the job 1872* 1873* @returns error status 1874* 1875* @remarks none 1876* 1877******************************************************************************* 1878*/ 1879WORD32 ih264e_process(process_ctxt_t *ps_proc) 1880{ 1881 /* error status */ 1882 WORD32 error_status = IH264_SUCCESS; 1883 1884 /* codec context */ 1885 codec_t *ps_codec = ps_proc->ps_codec; 1886 1887 /* cbp luma, chroma */ 1888 UWORD32 u4_cbp_l, u4_cbp_c; 1889 1890 /* width in mbs */ 1891 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; 1892 1893 /* loop var */ 1894 WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt; 1895 1896 /* valid modes */ 1897 UWORD32 u4_valid_modes = 0; 1898 1899 /* gate threshold */ 1900 WORD32 i4_gate_threshold = 0; 1901 1902 /* is intra */ 1903 WORD32 luma_idx, chroma_idx, is_intra; 1904 1905 /* temp variables */ 1906 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1; 1907 1908 /* list of modes for evaluation */ 1909 if (ps_proc->i4_slice_type == ISLICE) 1910 { 1911 /* enable intra 16x16 */ 1912 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1913 1914 /* enable intra 8x8 */ 1915 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0; 1916 1917 /* enable intra 4x4 */ 1918 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1919 } 1920 else if (ps_proc->i4_slice_type == PSLICE) 1921 { 1922 /* enable intra 16x16 */ 1923 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1924 1925 /* enable intra 4x4 */ 1926 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 1927 { 1928 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1929 } 1930 1931 /* enable inter P16x16 */ 1932 u4_valid_modes |= (1 << P16x16); 1933 } 1934 else if (ps_proc->i4_slice_type == BSLICE) 1935 { 1936 /* enable intra 16x16 */ 1937 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1938 1939 /* enable intra 4x4 */ 1940 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 1941 { 1942 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1943 } 1944 1945 /* enable inter B16x16 */ 1946 u4_valid_modes |= (1 << B16x16); 1947 } 1948 1949 1950 /* init entropy */ 1951 ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x; 1952 ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y; 1953 ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x); 1954 1955 /* compute recon when : 1956 * 1. current frame is to be used as a reference 1957 * 2. dump recon for bit stream sanity check 1958 */ 1959 ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref || 1960 ps_codec->s_cfg.u4_enable_recon; 1961 1962 /* Encode 'n' macroblocks, 1963 * 'n' being the number of mbs dictated by current proc ctxt */ 1964 for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++) 1965 { 1966 /* since we have not yet found sad, we have not yet got min sad */ 1967 /* we need to initialize these variables for each MB */ 1968 /* TODO how to get the min sad into the codec */ 1969 ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad; 1970 ps_proc->u4_min_sad_reached = 0; 1971 1972 /* mb analysis */ 1973 { 1974 /* temp var */ 1975 WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs; 1976 1977 /* force intra refresh ? */ 1978 WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) || 1979 (ps_proc->pu1_is_intra_coded[i4_mb_id] != 0) || 1980 (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt); 1981 1982 /* evaluate inter 16x16 modes */ 1983 if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16))) 1984 { 1985 /* compute nmb me */ 1986 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0) 1987 { 1988 ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me, 1989 i4_wd_mbs - ps_proc->i4_mb_x)); 1990 } 1991 1992 /* set pointers to ME data appropriately for other modules to use */ 1993 { 1994 UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ; 1995 1996 /* get the min sad condition for current mb */ 1997 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; 1998 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; 1999 2000 ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]); 2001 ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl); 2002 ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]); 2003 2004 ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion; 2005 ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost; 2006 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; 2007 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; 2008 ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type; 2009 2010 /* get the best sub pel buffer */ 2011 ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf; 2012 ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd; 2013 } 2014 ih264e_derive_nghbr_avbl_of_mbs(ps_proc); 2015 } 2016 else 2017 { 2018 /* Derive neighbor availability for the current macroblock */ 2019 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl; 2020 2021 ih264e_derive_nghbr_avbl_of_mbs(ps_proc); 2022 } 2023 2024 /* 2025 * If air says intra, we need to force the following code path to evaluate intra 2026 * The easy way is just to say that the inter cost is too much 2027 */ 2028 if (!i4_air_enable_inter) 2029 { 2030 ps_proc->u4_min_sad_reached = 0; 2031 ps_proc->i4_mb_cost = INT_MAX; 2032 ps_proc->i4_mb_distortion = INT_MAX; 2033 } 2034 else if (ps_proc->u4_mb_type == PSKIP) 2035 { 2036 goto UPDATE_MB_INFO; 2037 } 2038 2039 /* wait until the proc of [top + 1] mb is computed. 2040 * We wait till the proc dependencies are satisfied */ 2041 if(ps_proc->i4_mb_y > 0) 2042 { 2043 /* proc map */ 2044 UWORD8 *pu1_proc_map_top; 2045 2046 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs); 2047 2048 while (1) 2049 { 2050 volatile UWORD8 *pu1_buf; 2051 WORD32 idx = i4_mb_idx + 1; 2052 2053 idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1)); 2054 pu1_buf = pu1_proc_map_top + idx; 2055 if(*pu1_buf) 2056 break; 2057 ithread_yield(); 2058 } 2059 } 2060 2061 /* If we already have the minimum sad, there is no point in searching for sad again */ 2062 if (ps_proc->u4_min_sad_reached == 0) 2063 { 2064 /* intra gating in inter slices */ 2065 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/ 2066 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate) 2067 { 2068 /* distortion of neighboring blocks */ 2069 WORD32 i4_distortion[4]; 2070 2071 i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion; 2072 2073 i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion; 2074 2075 i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion; 2076 2077 i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion; 2078 2079 i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2; 2080 2081 } 2082 2083 2084 /* If we are going to force intra we need to evaluate intra irrespective of gating */ 2085 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion)) 2086 { 2087 /* evaluate intra 4x4 modes */ 2088 if (u4_valid_modes & (1 << I4x4)) 2089 { 2090 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 2091 { 2092 ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc); 2093 } 2094 else 2095 { 2096 ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc); 2097 } 2098 } 2099 2100 /* evaluate intra 16x16 modes */ 2101 if (u4_valid_modes & (1 << I16x16)) 2102 { 2103 ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc); 2104 } 2105 2106 /* evaluate intra 8x8 modes */ 2107 if (u4_valid_modes & (1 << I8x8)) 2108 { 2109 ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); 2110 } 2111 2112 } 2113 } 2114 } 2115 2116 /* is intra */ 2117 if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8) 2118 { 2119 luma_idx = ps_proc->u4_mb_type; 2120 chroma_idx = 0; 2121 is_intra = 1; 2122 2123 /* evaluate chroma blocks for intra */ 2124 ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); 2125 } 2126 else 2127 { 2128 luma_idx = 3; 2129 chroma_idx = 1; 2130 is_intra = 0; 2131 } 2132 ps_proc->u4_is_intra = is_intra; 2133 ps_proc->ps_pu->b1_intra_flag = is_intra; 2134 2135 /* redo MV pred of neighbors in the case intra mb */ 2136 /* TODO : currently called unconditionally, needs to be called only in the case of intra 2137 * to modify neighbors */ 2138 if (ps_proc->i4_slice_type != ISLICE) 2139 { 2140 ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type); 2141 } 2142 2143 /* Perform luma mb core coding */ 2144 u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc); 2145 2146 /* Perform luma mb core coding */ 2147 u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc); 2148 2149 /* coded block pattern */ 2150 ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l; 2151 2152 if (!ps_proc->u4_is_intra) 2153 { 2154 if (ps_proc->i4_slice_type == BSLICE) 2155 { 2156 if (ih264e_find_bskip_params(ps_proc, PRED_L0)) 2157 { 2158 ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP; 2159 } 2160 } 2161 else if(!ps_proc->u4_cbp) 2162 { 2163 if (ih264e_find_pskip_params(ps_proc, PRED_L0)) 2164 { 2165 ps_proc->u4_mb_type = PSKIP; 2166 } 2167 } 2168 } 2169 2170UPDATE_MB_INFO: 2171 2172 /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */ 2173 ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc); 2174 2175 /**********************************************************************/ 2176 /* if disable deblock level is '0' this implies enable deblocking for */ 2177 /* all edges of all macroblocks with out any restrictions */ 2178 /* */ 2179 /* if disable deblock level is '1' this implies disable deblocking for*/ 2180 /* all edges of all macroblocks with out any restrictions */ 2181 /* */ 2182 /* if disable deblock level is '2' this implies enable deblocking for */ 2183 /* all edges of all macroblocks except edges overlapping with slice */ 2184 /* boundaries. This option is not currently supported by the encoder */ 2185 /* hence the slice map should be of no significance to perform debloc */ 2186 /* king */ 2187 /**********************************************************************/ 2188 2189 if (ps_proc->u4_compute_recon) 2190 { 2191 /* deblk context */ 2192 /* src pointers */ 2193 UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma; 2194 UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma; 2195 2196 /* src indices */ 2197 UWORD32 i4_mb_x = ps_proc->i4_mb_x; 2198 UWORD32 i4_mb_y = ps_proc->i4_mb_y; 2199 2200 /* compute blocking strength */ 2201 if (ps_proc->u4_disable_deblock_level != 1) 2202 { 2203 ih264e_compute_bs(ps_proc); 2204 } 2205 2206 /* nmb deblocking and hpel and padding */ 2207 ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma, 2208 pu1_cur_pic_chroma, i4_mb_x, 2209 i4_mb_y); 2210 } 2211 2212 /* update the context after for coding next mb */ 2213 error_status |= ih264e_update_proc_ctxt(ps_proc); 2214 2215 /* Once the last row is processed, mark the buffer status appropriately */ 2216 if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y) 2217 { 2218 /* Pointer to current picture buffer structure */ 2219 pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic; 2220 2221 /* Pointer to current picture's mv buffer structure */ 2222 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; 2223 2224 /**********************************************************************/ 2225 /* if disable deblock level is '0' this implies enable deblocking for */ 2226 /* all edges of all macroblocks with out any restrictions */ 2227 /* */ 2228 /* if disable deblock level is '1' this implies disable deblocking for*/ 2229 /* all edges of all macroblocks with out any restrictions */ 2230 /* */ 2231 /* if disable deblock level is '2' this implies enable deblocking for */ 2232 /* all edges of all macroblocks except edges overlapping with slice */ 2233 /* boundaries. This option is not currently supported by the encoder */ 2234 /* hence the slice map should be of no significance to perform debloc */ 2235 /* king */ 2236 /**********************************************************************/ 2237 error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC); 2238 2239 error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC); 2240 2241 if (ps_codec->s_cfg.u4_enable_recon) 2242 { 2243 /* pic cnt */ 2244 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt; 2245 2246 /* rec buffers */ 2247 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic; 2248 2249 /* is last? */ 2250 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last; 2251 2252 /* frame time stamp */ 2253 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high; 2254 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low; 2255 } 2256 2257 } 2258 } 2259 2260 DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y); 2261 2262 return error_status; 2263} 2264 2265/** 2266******************************************************************************* 2267* 2268* @brief 2269* Function to update rc context after encoding 2270* 2271* @par Description 2272* This function updates the rate control context after the frame is encoded. 2273* Number of bits consumed by the current frame, frame distortion, frame cost, 2274* number of intra/inter mb's, ... are passed on to rate control context for 2275* updating the rc model. 2276* 2277* @param[in] ps_codec 2278* Handle to codec context 2279* 2280* @param[in] ctxt_sel 2281* frame context selector 2282* 2283* @param[in] pic_cnt 2284* pic count 2285* 2286* @returns i4_stuffing_byte 2287* number of stuffing bytes (if necessary) 2288* 2289* @remarks 2290* 2291******************************************************************************* 2292*/ 2293WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm) 2294{ 2295 /* proc set base idx */ 2296 WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0; 2297 2298 /* proc ctxt */ 2299 process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base]; 2300 2301 /* frame qp */ 2302 UWORD8 u1_frame_qp = ps_codec->u4_frame_qp; 2303 2304 /* cbr rc return status */ 2305 WORD32 i4_stuffing_byte = 0; 2306 2307 /* current frame stats */ 2308 frame_info_t s_frame_info; 2309 picture_type_e rc_pic_type; 2310 2311 /* temp var */ 2312 WORD32 i, j; 2313 2314 /********************************************************************/ 2315 /* BEGIN INIT */ 2316 /********************************************************************/ 2317 2318 /* init frame info */ 2319 irc_init_frame_info(&s_frame_info); 2320 2321 /* get frame info */ 2322 for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++) 2323 { 2324 /*****************************************************************/ 2325 /* One frame can be encoded by max of u4_num_cores threads */ 2326 /* Accumulating the num mbs, sad, qp and intra_mb_cost from */ 2327 /* u4_num_cores threads */ 2328 /*****************************************************************/ 2329 for (j = 0; j< MAX_MB_TYPE; j++) 2330 { 2331 s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j]; 2332 2333 s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j]; 2334 2335 s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j]; 2336 } 2337 2338 s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum; 2339 2340 s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum; 2341 2342 /*****************************************************************/ 2343 /* gather number of residue and header bits consumed by the frame*/ 2344 /*****************************************************************/ 2345 ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy); 2346 } 2347 2348 /* get pic type */ 2349 switch (ps_codec->pic_type) 2350 { 2351 case PIC_I: 2352 case PIC_IDR: 2353 rc_pic_type = I_PIC; 2354 break; 2355 case PIC_P: 2356 rc_pic_type = P_PIC; 2357 break; 2358 case PIC_B: 2359 rc_pic_type = B_PIC; 2360 break; 2361 default: 2362 assert(0); 2363 break; 2364 } 2365 2366 /* update rc lib with current frame stats */ 2367 i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api, 2368 &(s_frame_info), 2369 ps_codec->s_rate_control.pps_pd_frm_rate, 2370 ps_codec->s_rate_control.pps_time_stamp, 2371 ps_codec->s_rate_control.pps_frame_time, 2372 (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs), 2373 &rc_pic_type, 2374 i4_is_first_frm, 2375 &ps_codec->s_rate_control.post_encode_skip[ctxt_sel], 2376 u1_frame_qp, 2377 &ps_codec->s_rate_control.num_intra_in_prev_frame, 2378 &ps_codec->s_rate_control.i4_avg_activity); 2379 return i4_stuffing_byte; 2380} 2381 2382/** 2383******************************************************************************* 2384* 2385* @brief 2386* entry point of a spawned encoder thread 2387* 2388* @par Description: 2389* The encoder thread dequeues a proc/entropy job from the encoder queue and 2390* calls necessary routines. 2391* 2392* @param[in] pv_proc 2393* Process context corresponding to the thread 2394* 2395* @returns error status 2396* 2397* @remarks 2398* 2399******************************************************************************* 2400*/ 2401WORD32 ih264e_process_thread(void *pv_proc) 2402{ 2403 /* error status */ 2404 IH264_ERROR_T ret = IH264_SUCCESS; 2405 WORD32 error_status = IH264_SUCCESS; 2406 2407 /* proc ctxt */ 2408 process_ctxt_t *ps_proc = pv_proc; 2409 2410 /* codec ctxt */ 2411 codec_t *ps_codec = ps_proc->ps_codec; 2412 2413 /* structure to represent a processing job entry */ 2414 job_t s_job; 2415 2416 /* blocking call : entropy dequeue is non-blocking till all 2417 * the proc jobs are processed */ 2418 WORD32 is_blocking = 0; 2419 2420 /* set affinity */ 2421 ithread_set_affinity(ps_proc->i4_id); 2422 2423 while(1) 2424 { 2425 /* dequeue a job from the entropy queue */ 2426 { 2427 int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex); 2428 2429 /* codec context selector */ 2430 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1; 2431 2432 volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel]; 2433 2434 /* have the lock */ 2435 if (error == 0) 2436 { 2437 if (*pu4_buf == 0) 2438 { 2439 /* no entropy threads are active, try dequeuing a job from the entropy queue */ 2440 ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking); 2441 if (IH264_SUCCESS == ret) 2442 { 2443 *pu4_buf = 1; 2444 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2445 goto WORKER; 2446 } 2447 else if(is_blocking) 2448 { 2449 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2450 break; 2451 } 2452 } 2453 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2454 } 2455 } 2456 2457 /* dequeue a job from the process queue */ 2458 ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1); 2459 if (IH264_SUCCESS != ret) 2460 { 2461 if(ps_proc->i4_id) 2462 break; 2463 else 2464 { 2465 is_blocking = 1; 2466 continue; 2467 } 2468 } 2469 2470WORKER: 2471 /* choose appropriate proc context based on proc_base_idx */ 2472 ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx]; 2473 2474 switch (s_job.i4_cmd) 2475 { 2476 case CMD_PROCESS: 2477 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt; 2478 ps_proc->i4_mb_x = s_job.i2_mb_x; 2479 ps_proc->i4_mb_y = s_job.i2_mb_y; 2480 2481 /* init process context */ 2482 ih264e_init_proc_ctxt(ps_proc); 2483 2484 /* core code all mbs enlisted under the current job */ 2485 error_status |= ih264e_process(ps_proc); 2486 break; 2487 2488 case CMD_ENTROPY: 2489 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x; 2490 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y; 2491 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt; 2492 2493 /* init entropy */ 2494 ih264e_init_entropy_ctxt(ps_proc); 2495 2496 /* entropy code all mbs enlisted under the current job */ 2497 error_status |= ih264e_entropy(ps_proc); 2498 break; 2499 2500 default: 2501 error_status |= IH264_FAIL; 2502 break; 2503 } 2504 } 2505 2506 /* send error code */ 2507 ps_proc->i4_error_code = error_status; 2508 return ret; 2509} 2510