ih264e_process.c revision 3749f6f435e79624f72841e866245d84195551cd
1/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20
21/**
22*******************************************************************************
23* @file
24*  ih264e_process.c
25*
26* @brief
27*  Contains functions for codec thread
28*
29* @author
30*  Harish
31*
32* @par List of Functions:
33* - ih264e_generate_sps_pps()
34* - ih264e_init_entropy_ctxt()
35* - ih264e_entropy()
36* - ih264e_pack_header_data()
37* - ih264e_update_proc_ctxt()
38* - ih264e_init_proc_ctxt()
39* - ih264e_pad_recon_buffer()
40* - ih264e_dblk_pad_hpel_processing_n_mbs()
41* - ih264e_process()
42* - ih264e_set_rc_pic_params()
43* - ih264e_update_rc_post_enc()
44* - ih264e_process_thread()
45*
46* @remarks
47*  None
48*
49*******************************************************************************
50*/
51
52/*****************************************************************************/
53/* File Includes                                                             */
54/*****************************************************************************/
55
56/* System include files */
57#include <stdio.h>
58#include <stddef.h>
59#include <stdlib.h>
60#include <string.h>
61#include <limits.h>
62#include <assert.h>
63
64/* User include files */
65#include "ih264_typedefs.h"
66#include "iv2.h"
67#include "ive2.h"
68#include "ih264_defs.h"
69#include "ih264_debug.h"
70#include "ime_distortion_metrics.h"
71#include "ime_defs.h"
72#include "ime_structs.h"
73#include "ih264_error.h"
74#include "ih264_structs.h"
75#include "ih264_trans_quant_itrans_iquant.h"
76#include "ih264_inter_pred_filters.h"
77#include "ih264_mem_fns.h"
78#include "ih264_padding.h"
79#include "ih264_intra_pred_filters.h"
80#include "ih264_deblk_edge_filters.h"
81#include "ih264_cabac_tables.h"
82#include "ih264_platform_macros.h"
83#include "ih264_macros.h"
84#include "ih264_buf_mgr.h"
85#include "ih264e_error.h"
86#include "ih264e_bitstream.h"
87#include "ih264_common_tables.h"
88#include "ih264_list.h"
89#include "ih264e_defs.h"
90#include "irc_cntrl_param.h"
91#include "irc_frame_info_collector.h"
92#include "ih264e_rate_control.h"
93#include "ih264e_cabac_structs.h"
94#include "ih264e_structs.h"
95#include "ih264e_cabac.h"
96#include "ih264e_process.h"
97#include "ithread.h"
98#include "ih264e_intra_modes_eval.h"
99#include "ih264e_encode_header.h"
100#include "ih264e_globals.h"
101#include "ih264e_config.h"
102#include "ih264e_trace.h"
103#include "ih264e_statistics.h"
104#include "ih264_cavlc_tables.h"
105#include "ih264e_cavlc.h"
106#include "ih264e_deblk.h"
107#include "ih264e_me.h"
108#include "ih264e_debug.h"
109#include "ih264e_master.h"
110#include "ih264e_utils.h"
111#include "irc_mem_req_and_acq.h"
112#include "irc_rate_control_api.h"
113#include "ih264e_platform_macros.h"
114#include "ime_statistics.h"
115
116
117/*****************************************************************************/
118/* Function Definitions                                                      */
119/*****************************************************************************/
120
121/**
122******************************************************************************
123*
124*  @brief This function generates sps, pps set on request
125*
126*  @par   Description
127*  When the encoder is set in header generation mode, the following function
128*  is called. This generates sps and pps headers and returns the control back
129*  to caller.
130*
131*  @param[in]    ps_codec
132*  pointer to codec context
133*
134*  @return      success or failure error code
135*
136******************************************************************************
137*/
138IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
139{
140    /* choose between ping-pong process buffer set */
141    WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
142
143    /* entropy ctxt */
144    entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
145
146    /* Bitstream structure */
147    bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
148
149    /* sps */
150    sps_t *ps_sps = NULL;
151
152    /* pps */
153    pps_t *ps_pps = NULL;
154
155    /* output buff */
156    out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
157
158
159    /********************************************************************/
160    /*      initialize the bit stream buffer                            */
161    /********************************************************************/
162    ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
163
164    /********************************************************************/
165    /*                    BEGIN HEADER GENERATION                       */
166    /********************************************************************/
167    /*ps_codec->i4_pps_id ++;*/
168    ps_codec->i4_pps_id %= MAX_PPS_CNT;
169
170    /*ps_codec->i4_sps_id ++;*/
171    ps_codec->i4_sps_id %= MAX_SPS_CNT;
172
173    /* populate sps header */
174    ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
175    ih264e_populate_sps(ps_codec, ps_sps);
176
177    /* populate pps header */
178    ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
179    ih264e_populate_pps(ps_codec, ps_pps);
180
181    ps_entropy->i4_error_code = IH264E_SUCCESS;
182
183    /* generate sps */
184    ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
185
186    /* generate pps */
187    ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
188
189    /* queue output buffer */
190    ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
191
192    return ps_entropy->i4_error_code;
193}
194
195/**
196*******************************************************************************
197*
198* @brief   initialize entropy context.
199*
200* @par Description:
201*  Before invoking the call to perform to entropy coding the entropy context
202*  associated with the job needs to be initialized. This involves the start
203*  mb address, end mb address, slice index and the pointer to location at
204*  which the mb residue info and mb header info are packed.
205*
206* @param[in] ps_proc
207*  Pointer to the current process context
208*
209* @returns error status
210*
211* @remarks none
212*
213*******************************************************************************
214*/
215IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
216{
217    /* codec context */
218    codec_t *ps_codec = ps_proc->ps_codec;
219
220    /* entropy ctxt */
221    entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
222
223    /* start address */
224    ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
225
226    /* end address */
227    ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
228
229    /* slice index */
230    ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
231
232    /* sof */
233    /* @ start of frame or start of a new slice, set sof flag */
234    if (ps_entropy->i4_mb_start_add == 0)
235    {
236        ps_entropy->i4_sof = 1;
237    }
238
239    if (ps_entropy->i4_mb_x == 0)
240    {
241        /* packed mb coeff data */
242        ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
243                        ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
244
245        /* packed mb header data */
246        ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
247                        ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
248    }
249
250    return IH264E_SUCCESS;
251}
252
253/**
254*******************************************************************************
255*
256* @brief entry point for entropy coding
257*
258* @par Description
259*  This function calls lower level functions to perform entropy coding for a
260*  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
261*  back the control, updates the ctxt and calls lower level functions again.
262*  This process is repeated till all the rows or group of mb's (which ever is
263*  minimum) are coded
264*
265* @param[in] ps_proc
266*  process context
267*
268* @returns  error status
269*
270* @remarks
271*
272*******************************************************************************
273*/
274
275IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
276{
277    /* codec context */
278    codec_t *ps_codec = ps_proc->ps_codec;
279
280    /* entropy context */
281    entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
282
283    /* cabac context */
284    cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
285
286    /* sps */
287    sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
288
289    /* pps */
290    pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
291
292    /* slice header */
293    slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
294
295    /* slice type */
296    WORD32 i4_slice_type = ps_proc->i4_slice_type;
297
298    /* Bitstream structure */
299    bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
300
301    /* output buff */
302    out_buf_t s_out_buf;
303
304    /* proc map */
305    UWORD8  *pu1_proc_map;
306
307    /* entropy map */
308    UWORD8  *pu1_entropy_map_curr;
309
310    /* proc base idx */
311    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
312
313    /* temp var */
314    WORD32 i4_wd_mbs, i4_ht_mbs;
315    UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
316    WORD32 bitstream_start_offset, bitstream_end_offset;
317    /********************************************************************/
318    /*                            BEGIN INIT                            */
319    /********************************************************************/
320
321    /* entropy encode start address */
322    u4_mb_idx = ps_entropy->i4_mb_start_add;
323
324    /* entropy encode end address */
325    u4_mb_end_idx = ps_entropy->i4_mb_end_add;
326
327    /* width in mbs */
328    i4_wd_mbs = ps_entropy->i4_wd_mbs;
329
330    /* height in mbs */
331    i4_ht_mbs = ps_entropy->i4_ht_mbs;
332
333    /* total mb cnt */
334    u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
335
336    /* proc map */
337    pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
338
339    /* entropy map */
340    pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
341
342    /********************************************************************/
343    /* @ start of frame / slice,                                        */
344    /*      initialize the output buffer,                               */
345    /*      initialize the bit stream buffer,                           */
346    /*      check if sps and pps headers have to be generated,          */
347    /*      populate and generate slice header                          */
348    /********************************************************************/
349    if (ps_entropy->i4_sof)
350    {
351        /********************************************************************/
352        /*      initialize the output buffer                                */
353        /********************************************************************/
354        s_out_buf = ps_codec->as_out_buf[ctxt_sel];
355
356        /* is last frame to encode */
357        s_out_buf.u4_is_last = ps_entropy->u4_is_last;
358
359        /* frame idx */
360        s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
361        s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
362
363        /********************************************************************/
364        /*      initialize the bit stream buffer                            */
365        /********************************************************************/
366        ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
367
368        /********************************************************************/
369        /*                    BEGIN HEADER GENERATION                       */
370        /********************************************************************/
371        if (1 == ps_entropy->i4_gen_header)
372        {
373            /* generate sps */
374            ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
375
376            /* generate pps */
377            ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
378
379            /* reset i4_gen_header */
380            ps_entropy->i4_gen_header = 0;
381        }
382
383        /* populate slice header */
384        ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
385
386        /* generate slice header */
387        ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
388                                                                  ps_pps, ps_sps);
389
390        /* once start of frame / slice is done, you can reset it */
391        /* it is the responsibility of the caller to set this flag */
392        ps_entropy->i4_sof = 0;
393
394        if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
395        {
396            BITSTREAM_BYTE_ALIGN(ps_bitstrm);
397            BITSTREAM_FLUSH(ps_bitstrm);
398            ih264e_init_cabac_ctxt(ps_entropy);
399        }
400    }
401
402    /* begin entropy coding for the mb set */
403    while (u4_mb_idx < u4_mb_end_idx)
404    {
405        /* init ptrs/indices */
406        if (ps_entropy->i4_mb_x == i4_wd_mbs)
407        {
408            ps_entropy->i4_mb_y++;
409            ps_entropy->i4_mb_x = 0;
410
411            /* packed mb coeff data */
412            ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
413                            ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
414
415            /* packed mb header data */
416            ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
417                            ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
418
419            /* proc map */
420            pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
421
422            /* entropy map */
423            pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
424        }
425
426        DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
427        ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
428        ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
429
430        /* wait until the curr mb is core coded */
431        /* The wait for curr mb to be core coded is essential when entropy is launched
432         * as a separate job
433         */
434        while (1)
435        {
436            volatile UWORD8 *pu1_buf1;
437            WORD32 idx = ps_entropy->i4_mb_x;
438
439            pu1_buf1 = pu1_proc_map + idx;
440            if (*pu1_buf1)
441                break;
442            ithread_yield();
443        }
444
445
446        /* write mb layer */
447        ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
448        /* Starting bitstream offset for header in bits */
449        bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
450
451        /* set entropy map */
452        pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
453
454        u4_mb_idx++;
455        ps_entropy->i4_mb_x++;
456        /* check for eof */
457        if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
458        {
459            if (ps_entropy->i4_mb_x < i4_wd_mbs)
460            {
461                ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
462            }
463        }
464
465        if (ps_entropy->i4_mb_x == i4_wd_mbs)
466        {
467            /* if slices are enabled */
468            if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
469            {
470                /* current slice index */
471                WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
472
473                /* slice map */
474                UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
475
476                /* No need to open a slice at end of frame. The current slice can be closed at the time
477                 * of signaling eof flag.
478                 */
479                if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
480                                                != pu1_slice_idx[u4_mb_idx]))
481                {
482                    if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
483                    { /* mb skip run */
484                        if ((i4_slice_type != ISLICE)
485                                        && *ps_entropy->pi4_mb_skip_run)
486                        {
487                            if (*ps_entropy->pi4_mb_skip_run)
488                            {
489                            PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
490                                *ps_entropy->pi4_mb_skip_run = 0;
491                            }
492                        }
493                        /* put rbsp trailing bits for the previous slice */
494                                 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
495                    }
496                    else
497                    {
498                        ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
499                    }
500
501                    /* update slice header pointer */
502                    i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
503                    ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
504                    ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
505
506                    /* populate slice header */
507                    ps_entropy->i4_mb_start_add = u4_mb_idx;
508                    ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
509                                                 ps_sps);
510
511                    /* generate slice header */
512                    ps_entropy->i4_error_code |= ih264e_generate_slice_header(
513                                    ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
514                    if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
515                    {
516                        BITSTREAM_BYTE_ALIGN(ps_bitstrm);
517                        BITSTREAM_FLUSH(ps_bitstrm);
518                        ih264e_init_cabac_ctxt(ps_entropy);
519                    }
520                }
521                else
522                {
523                    if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
524                                    && u4_mb_idx != u4_mb_cnt)
525                    {
526                        ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
527                    }
528                }
529            }
530            /* Dont execute any further instructions until store synchronization took place */
531            DATA_SYNC();
532        }
533
534        /* Ending bitstream offset for header in bits */
535        bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
536        ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
537                        bitstream_end_offset - bitstream_start_offset;
538    }
539
540    /* check for eof */
541    if (u4_mb_idx == u4_mb_cnt)
542    {
543        /* set end of frame flag */
544        ps_entropy->i4_eof = 1;
545    }
546    else
547    {
548        if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
549                        && ps_codec->s_cfg.e_slice_mode
550                                        != IVE_SLICE_MODE_BLOCKS)
551        {
552            ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
553        }
554    }
555
556    if (ps_entropy->i4_eof)
557    {
558        if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
559        {
560            /* mb skip run */
561            if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
562            {
563                if (*ps_entropy->pi4_mb_skip_run)
564                {
565                    PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
566                                 ps_entropy->i4_error_code, "mb skip run");
567                    *ps_entropy->pi4_mb_skip_run = 0;
568                }
569            }
570            /* put rbsp trailing bits */
571             ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
572        }
573        else
574        {
575            ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
576        }
577
578        /* update current frame stats to rc library */
579        {
580            /* number of bytes to stuff */
581            WORD32 i4_stuff_bytes;
582
583            /* update */
584            i4_stuff_bytes = ih264e_update_rc_post_enc(
585                            ps_codec, ctxt_sel,
586                            (ps_proc->ps_codec->i4_poc == 0));
587
588            /* cbr rc - house keeping */
589            if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
590            {
591                ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
592            }
593            else if (i4_stuff_bytes)
594            {
595                /* add filler nal units */
596                ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
597            }
598        }
599
600        /*
601         *Frame number is to be incremented only if the current frame is a
602         * reference frame. After each successful frame encode, we increment
603         * frame number by 1
604         */
605        if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
606                        && ps_codec->u4_is_curr_frm_ref)
607        {
608            ps_codec->i4_frame_num++;
609        }
610        /********************************************************************/
611        /*      signal the output                                           */
612        /********************************************************************/
613        ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
614                        ps_entropy->ps_bitstrm->u4_strm_buf_offset;
615
616        DEBUG("entropy status %x", ps_entropy->i4_error_code);
617    }
618
619    /* allow threads to dequeue entropy jobs */
620    ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
621
622    return ps_entropy->i4_error_code;
623}
624
625/**
626*******************************************************************************
627*
628* @brief Packs header information of a mb in to a buffer
629*
630* @par Description:
631*  After the deciding the mode info of a macroblock, the syntax elements
632*  associated with the mb are packed and stored. The entropy thread unpacks
633*  this buffer and generates the end bit stream.
634*
635* @param[in] ps_proc
636*  Pointer to the current process context
637*
638* @returns error status
639*
640* @remarks none
641*
642*******************************************************************************
643*/
644IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
645{
646    /* curr mb type */
647    UWORD32 u4_mb_type = ps_proc->u4_mb_type;
648
649    /* pack mb syntax layer of curr mb (used for entropy coding) */
650    if (u4_mb_type == I4x4)
651    {
652        /* pointer to mb header storage space */
653        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
654
655        /* temp var */
656        WORD32 i4, byte;
657
658        /* mb type plus mode */
659        *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
660
661        /* cbp */
662        *pu1_ptr++ = ps_proc->u4_cbp;
663
664        /* mb qp delta */
665        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
666
667        /* sub mb modes */
668        for (i4 = 0; i4 < 16; i4 ++)
669        {
670            byte = 0;
671
672            if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
673                            ps_proc->au1_intra_luma_mb_4x4_modes[i4])
674            {
675                byte |= 1;
676            }
677            else
678            {
679
680                if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
681                                ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
682                {
683                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
684                }
685                else
686                {
687                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
688                }
689            }
690
691            i4++;
692
693            if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
694                            ps_proc->au1_intra_luma_mb_4x4_modes[i4])
695            {
696                byte |= 16;
697            }
698            else
699            {
700
701                if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
702                                ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
703                {
704                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
705                }
706                else
707                {
708                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
709                }
710            }
711
712            *pu1_ptr++ = byte;
713        }
714
715        /* end of mb layer */
716        ps_proc->pv_mb_header_data = pu1_ptr;
717    }
718    else if (u4_mb_type == I16x16)
719    {
720        /* pointer to mb header storage space */
721        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
722
723        /* mb type plus mode */
724        *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
725
726        /* cbp */
727        *pu1_ptr++ = ps_proc->u4_cbp;
728
729        /* mb qp delta */
730        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
731
732        /* end of mb layer */
733        ps_proc->pv_mb_header_data = pu1_ptr;
734    }
735    else if (u4_mb_type == P16x16)
736    {
737        /* pointer to mb header storage space */
738        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
739
740        WORD16 *i2_mv_ptr;
741
742        /* mb type plus mode */
743        *pu1_ptr++ = u4_mb_type;
744
745        /* cbp */
746        *pu1_ptr++ = ps_proc->u4_cbp;
747
748        /* mb qp delta */
749        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
750
751        i2_mv_ptr = (WORD16 *)pu1_ptr;
752
753        *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
754
755        *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
756
757        /* end of mb layer */
758        ps_proc->pv_mb_header_data = i2_mv_ptr;
759    }
760    else if (u4_mb_type == PSKIP)
761    {
762        /* pointer to mb header storage space */
763        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
764
765        /* mb type plus mode */
766        *pu1_ptr++ = u4_mb_type;
767
768        /* end of mb layer */
769        ps_proc->pv_mb_header_data = pu1_ptr;
770    }
771    else if(u4_mb_type == B16x16)
772    {
773
774        /* pointer to mb header storage space */
775        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
776
777        WORD16 *i2_mv_ptr;
778
779        UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
780
781        /* mb type plus mode */
782        *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
783
784        /* cbp */
785        *pu1_ptr++ = ps_proc->u4_cbp;
786
787        /* mb qp delta */
788        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
789
790        /* l0 & l1 me data */
791        i2_mv_ptr = (WORD16 *)pu1_ptr;
792
793        if (u4_pred_mode != PRED_L1)
794        {
795            *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
796                            - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
797
798            *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
799                            - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
800        }
801        if (u4_pred_mode != PRED_L0)
802        {
803            *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
804                            - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
805
806            *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
807                            - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
808        }
809
810        /* end of mb layer */
811        ps_proc->pv_mb_header_data = i2_mv_ptr;
812
813    }
814    else if(u4_mb_type == BDIRECT)
815    {
816        /* pointer to mb header storage space */
817        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
818
819        /* mb type plus mode */
820        *pu1_ptr++ = u4_mb_type;
821
822        /* cbp */
823        *pu1_ptr++ = ps_proc->u4_cbp;
824
825        /* mb qp delta */
826        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
827
828        ps_proc->pv_mb_header_data = pu1_ptr;
829
830    }
831    else if(u4_mb_type == BSKIP)
832    {
833        UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
834
835        /* pointer to mb header storage space */
836        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
837
838        /* mb type plus mode */
839        *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
840
841        /* end of mb layer */
842        ps_proc->pv_mb_header_data = pu1_ptr;
843    }
844
845    return IH264E_SUCCESS;
846}
847
848/**
849*******************************************************************************
850*
851* @brief   update process context after encoding an mb. This involves preserving
852* the current mb information for later use, initialize the proc ctxt elements to
853* encode next mb.
854*
855* @par Description:
856*  This function performs house keeping tasks after encoding an mb.
857*  After encoding an mb, various elements of the process context needs to be
858*  updated to encode the next mb. For instance, the source, recon and reference
859*  pointers, mb indices have to be adjusted to the next mb. The slice index of
860*  the current mb needs to be updated. If mb qp modulation is enabled, then if
861*  the qp changes the quant param structure needs to be updated. Also to encoding
862*  the next mb, the current mb info is used as part of mode prediction or mv
863*  prediction. Hence the current mb info has to preserved at top/top left/left
864*  locations.
865*
866* @param[in] ps_proc
867*  Pointer to the current process context
868*
869* @returns none
870*
871* @remarks none
872*
873*******************************************************************************
874*/
875WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
876{
877    /* error status */
878    WORD32 error_status = IH264_SUCCESS;
879
880    /* codec context */
881    codec_t *ps_codec = ps_proc->ps_codec;
882
883    /* curr mb indices */
884    WORD32 i4_mb_x = ps_proc->i4_mb_x;
885    WORD32 i4_mb_y = ps_proc->i4_mb_y;
886
887    /* mb syntax elements of neighbors */
888    mb_info_t *ps_left_syn =  &ps_proc->s_left_mb_syntax_ele;
889    mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
890    mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
891
892    /* curr mb type */
893    UWORD32 u4_mb_type = ps_proc->u4_mb_type;
894
895    /* curr mb type */
896    UWORD32 u4_is_intra = ps_proc->u4_is_intra;
897
898    /* width in mbs */
899    WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
900
901    /*height in mbs*/
902    WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
903
904    /* proc map */
905    UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
906
907    /* deblk context */
908    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
909
910    /* deblk bs context */
911    bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
912
913    /* top row motion vector info */
914    enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
915
916    /* top left mb motion vector */
917    enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
918
919    /* left mb motion vector */
920    enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
921
922    /* sub mb modes */
923    UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
924
925    /*************************************************************/
926    /* During MV prediction, when top right mb is not available, */
927    /* top left mb info. is used for prediction. Hence the curr  */
928    /* top, which will be top left for the next mb needs to be   */
929    /* preserved before updating it with curr mb info.           */
930    /*************************************************************/
931
932    /* mb type, mb class, csbp */
933    *ps_top_left_syn = *ps_top_syn;
934
935    if (ps_proc->i4_slice_type != ISLICE)
936    {
937        /*****************************************/
938        /* update top left with top info results */
939        /*****************************************/
940        /* mv */
941        *ps_top_left_mb_pu = *ps_top_row_pu;
942    }
943
944    /*************************************************/
945    /* update top and left with curr mb info results */
946    /*************************************************/
947
948    /* mb type */
949    ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
950
951    /* mb class */
952    ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
953
954    /* csbp */
955    ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
956
957    /* distortion */
958    ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
959
960    if (u4_is_intra)
961    {
962        /* mb / sub mb modes */
963        if (I16x16 == u4_mb_type)
964        {
965            pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
966        }
967        else if (I4x4 == u4_mb_type)
968        {
969            ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
970            ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
971        }
972        else if (I8x8 == u4_mb_type)
973        {
974            memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
975            memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
976        }
977
978        if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
979        {
980            /* mv */
981            *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
982        }
983
984        *ps_proc->pu4_mb_pu_cnt = 1;
985    }
986    else
987    {
988        /* mv */
989        *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
990    }
991
992    /*
993     * Mark that the MB has been coded intra
994     * So that future AIRs can skip it
995     */
996    ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
997
998    /**************************************************/
999    /* pack mb header info. for entropy coding        */
1000    /**************************************************/
1001    ih264e_pack_header_data(ps_proc);
1002
1003    /* update previous mb qp */
1004    ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1005
1006    /* store qp */
1007    ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1008
1009    /*
1010     * We need to sync the cache to make sure that the nmv content of proc
1011     * is updated to cache properly
1012     */
1013    DATA_SYNC();
1014
1015    /* Just before finishing the row, enqueue the job in to entropy queue.
1016     * The master thread depending on its convenience shall dequeue it and
1017     * performs entropy.
1018     *
1019     * WARN !! Placing this block post proc map update can cause queuing of
1020     * entropy jobs in out of order.
1021     */
1022    if (i4_mb_x == i4_wd_mbs - 1)
1023    {
1024        /* job structures */
1025        job_t s_job;
1026
1027        /* job class */
1028        s_job.i4_cmd = CMD_ENTROPY;
1029
1030        /* number of mbs to be processed in the current job */
1031        s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1032
1033        /* job start index x */
1034        s_job.i2_mb_x = 0;
1035
1036        /* job start index y */
1037        s_job.i2_mb_y = ps_proc->i4_mb_y;
1038
1039        /* proc base idx */
1040        s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt & 1) ? (MAX_PROCESS_CTXT / 2): 0 ;
1041
1042        /* queue the job */
1043        error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1044
1045        if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1046            ih264_list_terminate(ps_codec->pv_entropy_jobq);
1047    }
1048
1049    /* update proc map */
1050    pu1_proc_map[i4_mb_x] = 1;
1051
1052    /**************************************************/
1053    /* update proc ctxt elements for encoding next mb */
1054    /**************************************************/
1055    /* update indices */
1056    i4_mb_x ++;
1057    ps_proc->i4_mb_x = i4_mb_x;
1058
1059    if (ps_proc->i4_mb_x == i4_wd_mbs)
1060    {
1061        ps_proc->i4_mb_y++;
1062        ps_proc->i4_mb_x = 0;
1063    }
1064
1065    /* update slice index */
1066    ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1067
1068    /* update buffers pointers */
1069    ps_proc->pu1_src_buf_luma += MB_SIZE;
1070    ps_proc->pu1_rec_buf_luma += MB_SIZE;
1071    ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1072    ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1073
1074    /*
1075     * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1076     * the stride per MB is MB_SIZE
1077     */
1078    ps_proc->pu1_src_buf_chroma += MB_SIZE;
1079    ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1080    ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1081    ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1082
1083
1084
1085    /* Reset cost, distortion params */
1086    ps_proc->i4_mb_cost = INT_MAX;
1087    ps_proc->i4_mb_distortion = SHRT_MAX;
1088
1089    ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1090
1091    ps_proc->pu4_mb_pu_cnt += 1;
1092
1093    /* Update colocated pu */
1094    if (ps_proc->i4_slice_type == BSLICE)
1095        ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt +  (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1096
1097    /* deblk ctxts */
1098    if (ps_proc->u4_disable_deblock_level != 1)
1099    {
1100        /* indices */
1101        ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1102        ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1103
1104#ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1105        ps_deblk->i4_mb_x ++;
1106
1107        ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1108        /*
1109         * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1110         * the stride per MB is MB_SIZE
1111         */
1112        ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1113#endif
1114    }
1115
1116    return error_status;
1117}
1118
1119/**
1120*******************************************************************************
1121*
1122* @brief   initialize process context.
1123*
1124* @par Description:
1125*  Before dispatching the current job to process thread, the process context
1126*  associated with the job is initialized. Usually every job aims to encode one
1127*  row of mb's. Basing on the row indices provided by the job, the process
1128*  context's buffer ptrs, slice indices and other elements that are necessary
1129*  during core-coding are initialized.
1130*
1131* @param[in] ps_proc
1132*  Pointer to the current process context
1133*
1134* @returns error status
1135*
1136* @remarks none
1137*
1138*******************************************************************************
1139*/
1140IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1141{
1142    /* codec context */
1143    codec_t *ps_codec = ps_proc->ps_codec;
1144
1145    /* nmb processing context*/
1146    n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1147
1148    /* indices */
1149    WORD32 i4_mb_x, i4_mb_y;
1150
1151    /* strides */
1152    WORD32 i4_src_strd = ps_proc->i4_src_strd;
1153    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1154
1155    /* quant params */
1156    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1157
1158    /* deblk ctxt */
1159    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1160
1161    /* deblk bs context */
1162    bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1163
1164    /* Pointer to mv_buffer of current frame */
1165    mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1166
1167    /* Pointers for color space conversion */
1168    UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1169
1170    /* Pad the MB to support non standard sizes */
1171    UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1172    UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1173    UWORD16 u2_num_rows = MB_SIZE;
1174    WORD32 convert_uv_only;
1175
1176    /********************************************************************/
1177    /*                            BEGIN INIT                            */
1178    /********************************************************************/
1179
1180    i4_mb_x = ps_proc->i4_mb_x;
1181    i4_mb_y = ps_proc->i4_mb_y;
1182
1183    /* Number of mbs processed in one loop of process function */
1184    ps_proc->i4_nmb_ntrpy = (ps_proc->i4_wd_mbs > MAX_NMB) ? MAX_NMB : ps_proc->i4_wd_mbs;
1185    ps_proc->u4_nmb_me = (ps_proc->i4_wd_mbs > MAX_NMB)? MAX_NMB : ps_proc->i4_wd_mbs;
1186
1187    /* init buffer pointers */
1188    convert_uv_only = 1;
1189    if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1))
1190    {
1191        u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1192        ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1193        ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1194        convert_uv_only = 0;
1195    }
1196    else
1197        ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1198
1199
1200    if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1201        ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1202        ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1203    {
1204        if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1205            (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1206            ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1207
1208        ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1209    }
1210    else
1211    {
1212        ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * BLK8x8SIZE);
1213    }
1214
1215    ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1216    ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1217
1218    /* Tempral back and forward reference buffer */
1219    ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1220    ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1221    ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1222    ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1223
1224    /*
1225     * Do color space conversion
1226     * NOTE : We assume there that the number of MB's to process will not span multiple rows
1227     */
1228    switch (ps_codec->s_cfg.e_inp_color_fmt)
1229    {
1230        case IV_YUV_420SP_UV:
1231        case IV_YUV_420SP_VU:
1232            /* In case of 420 semi-planar input, copy last few rows to intermediate
1233               buffer as chroma trans functions access one extra byte due to interleaved input.
1234               This data will be padded if required */
1235            if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1236            {
1237                WORD32 num_rows = ps_codec->s_cfg.u4_disp_ht & 0xF;
1238                UWORD8 *pu1_src;
1239                UWORD8 *pu1_dst;
1240                WORD32 i;
1241                pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1242                          ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1243
1244                pu1_dst = ps_proc->pu1_src_buf_luma;
1245
1246                for (i = 0; i < num_rows; i++)
1247                {
1248                    memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1249                    pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1250                    pu1_dst += ps_proc->i4_src_strd;
1251                }
1252                pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1253                          ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1254                pu1_dst = ps_proc->pu1_src_buf_chroma;
1255
1256                /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1257                 * due to interleaved input
1258                 */
1259                num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1260                for (i = 0; i < num_rows; i++)
1261                {
1262                    memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1263                    pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1264                    pu1_dst += ps_proc->i4_src_strd;
1265                }
1266
1267            }
1268            break;
1269
1270        case IV_YUV_420P :
1271            pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1272                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1273
1274            pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1275                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1276
1277            pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1278                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1279
1280            ps_codec->pf_ih264e_conv_420p_to_420sp(
1281                            pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1282                            ps_proc->pu1_src_buf_luma,
1283                            ps_proc->pu1_src_buf_chroma, u2_num_rows,
1284                            ps_codec->s_cfg.u4_disp_wd,
1285                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1286                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1287                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1288                            ps_proc->i4_src_strd, ps_proc->i4_src_strd,
1289                            convert_uv_only);
1290            break;
1291
1292        case IV_YUV_422ILE :
1293            pu1_y_buf_base =  (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1294                              + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1295
1296            ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1297                            ps_proc->pu1_src_buf_luma,
1298                            ps_proc->pu1_src_buf_chroma,
1299                            ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1300                            ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1301                            ps_proc->i4_src_strd, ps_proc->i4_src_strd,
1302                            ps_proc->i4_src_strd,
1303                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1304            break;
1305
1306        default:
1307            break;
1308    }
1309
1310    if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0) &&
1311                    (ps_proc->i4_src_strd > (WORD32)ps_codec->s_cfg.u4_disp_wd) )
1312    {
1313        UWORD32 u4_pad_wd, u4_pad_ht;
1314        u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1315        u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1316        u4_pad_ht = MB_SIZE;
1317        if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1318            u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1319
1320        ih264_pad_right_luma(
1321                        ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1322                        ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1323
1324        ih264_pad_right_chroma(
1325                        ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1326                        ps_proc->i4_src_strd, u4_pad_ht / 2, u4_pad_wd);
1327    }
1328
1329    /* pad bottom edge */
1330    if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1331    {
1332        ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1333                         ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1334
1335        ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd / 2,
1336                         ps_proc->i4_src_strd, ps_proc->i4_src_strd, (u4_pad_bottom_sz / 2));
1337    }
1338
1339
1340    /* packed mb coeff data */
1341    ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1342
1343    /* packed mb header data */
1344    ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1345
1346    /* slice index */
1347    ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1348
1349    /*********************************************************************/
1350    /* ih264e_init_quant_params() routine is called at the pic init level*/
1351    /* this would have initialized the qp.                               */
1352    /* TODO_LATER: currently it is assumed that quant params donot change*/
1353    /* across mb's. When they do calculate update ps_qp_params accordingly*/
1354    /*********************************************************************/
1355
1356    /* init mv buffer ptr */
1357    ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
1358
1359    /* Init co-located mv buffer */
1360    ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
1361
1362    if (i4_mb_y == 0)
1363    {
1364        ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1365    }
1366    else
1367    {
1368        ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
1369    }
1370
1371    ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1372
1373    /* mb type */
1374    ps_proc->u4_mb_type = I16x16;
1375
1376    /* lambda */
1377    ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
1378
1379    /* mb distortion */
1380    ps_proc->i4_mb_distortion = SHRT_MAX;
1381
1382    if (i4_mb_x == 0)
1383    {
1384        ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1385
1386        ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1387
1388        ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1389
1390        if (i4_mb_y == 0)
1391        {
1392            memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1393        }
1394    }
1395
1396    /* mb cost */
1397    ps_proc->i4_mb_cost = INT_MAX;
1398
1399    /**********************/
1400    /* init deblk context */
1401    /**********************/
1402    ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1403    /* deblk lags the current mb proc by 1 row */
1404    /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1405    /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1406    /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1407    ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1408
1409    /* buffer ptrs */
1410    ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1411    ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1412
1413    /* init deblk bs context */
1414    /* mb indices */
1415    ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1416    ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1417
1418    /* init n_mb_process  context */
1419    ps_n_mb_ctxt->i4_mb_x = 0;
1420    ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1421    ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1422
1423    return IH264E_SUCCESS;
1424}
1425
1426/**
1427*******************************************************************************
1428*
1429* @brief This function performs luma & chroma padding
1430*
1431* @par Description:
1432*
1433* @param[in] ps_proc
1434*  Process context corresponding to the job
1435*
1436* @param[in] pu1_curr_pic_luma
1437*  Pointer to luma buffer
1438*
1439* @param[in] pu1_curr_pic_chroma
1440*  Pointer to chroma buffer
1441*
1442* @param[in] i4_mb_x
1443*  mb index x
1444*
1445* @param[in] i4_mb_y
1446*  mb index y
1447*
1448*  @param[in] i4_pad_ht
1449*  number of rows to be padded
1450*
1451* @returns  error status
1452*
1453* @remarks none
1454*
1455*******************************************************************************
1456*/
1457IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1458                                       UWORD8 *pu1_curr_pic_luma,
1459                                       UWORD8 *pu1_curr_pic_chroma,
1460                                       WORD32 i4_mb_x,
1461                                       WORD32 i4_mb_y,
1462                                       WORD32 i4_pad_ht)
1463{
1464    /* codec context */
1465    codec_t *ps_codec = ps_proc->ps_codec;
1466
1467    /* strides */
1468    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1469
1470    if (i4_mb_x == 0)
1471    {
1472        /* padding left luma */
1473        ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1474
1475        /* padding left chroma */
1476        ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1477    }
1478    if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1479    {
1480        /* padding right luma */
1481        ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1482
1483        /* padding right chroma */
1484        ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1485
1486        if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1487        {
1488            UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1489            UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1490
1491            /* padding bottom luma */
1492            ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1493
1494            /* padding bottom chroma */
1495            ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1496        }
1497    }
1498
1499    if (i4_mb_y == 0)
1500    {
1501        UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1502        UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1503        WORD32 wd = MB_SIZE;
1504
1505        if (i4_mb_x == 0)
1506        {
1507            pu1_rec_luma -= PAD_LEFT;
1508            pu1_rec_chroma -= PAD_LEFT;
1509
1510            wd += PAD_LEFT;
1511        }
1512        if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1513        {
1514            wd += PAD_RIGHT;
1515        }
1516
1517        /* padding top luma */
1518        ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1519
1520        /* padding top chroma */
1521        ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1522    }
1523
1524    return IH264E_SUCCESS;
1525}
1526
1527
1528
1529
1530/**
1531*******************************************************************************
1532*
1533* @brief This function performs deblocking, padding and halfpel generation for
1534*  'n' MBs
1535*
1536* @par Description:
1537*
1538* @param[in] ps_proc
1539*  Process context corresponding to the job
1540*
1541* @param[in] pu1_curr_pic_luma
1542* Current MB being processed(Luma)
1543*
1544* @param[in] pu1_curr_pic_chroma
1545* Current MB being processed(Chroma)
1546*
1547* @param[in] i4_mb_x
1548* Column value of current MB processed
1549*
1550* @param[in] i4_mb_y
1551* Curent row processed
1552*
1553* @returns  error status
1554*
1555* @remarks none
1556*
1557*******************************************************************************
1558*/
1559IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1560                                                     UWORD8 *pu1_curr_pic_luma,
1561                                                     UWORD8 *pu1_curr_pic_chroma,
1562                                                     WORD32 i4_mb_x,
1563                                                     WORD32 i4_mb_y)
1564{
1565    /* codec context */
1566    codec_t *ps_codec = ps_proc->ps_codec;
1567
1568    /* n_mb processing context */
1569    n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1570
1571    /* deblk context */
1572    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1573
1574    /* strides */
1575    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1576
1577    /* loop variables */
1578    WORD32 row, i, j, col;
1579
1580    /* Padding Width */
1581    UWORD32 u4_pad_wd;
1582
1583    /* deblk_map of the row being deblocked */
1584    UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1585
1586    /* deblk_map_previous row */
1587    UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1588
1589    WORD32 u4_pad_top = 0;
1590
1591    WORD32 u4_deblk_prev_row = 0;
1592
1593    /* Number of mbs to be processed */
1594    WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1595
1596    /* Number of mbs  actually processed
1597     * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1598    WORD32 i4_n_mb_process_count = 0;
1599
1600    UWORD8 *pu1_pad_bottom_src = NULL;
1601
1602    UWORD8 *pu1_pad_src_luma = NULL;
1603    UWORD8 *pu1_pad_src_chroma = NULL;
1604
1605    if (ps_proc->u4_disable_deblock_level == 1)
1606    {
1607        /* If left most MB is processed, then pad left */
1608        if (i4_mb_x == 0)
1609        {
1610            /* padding left luma */
1611            ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1612
1613            /* padding left chroma */
1614            ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1615        }
1616        /*last col*/
1617        if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1618        {
1619            /* padding right luma */
1620            ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1621
1622            /* padding right chroma */
1623            ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1624        }
1625    }
1626
1627    if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1628    {
1629        /* if number of mb's to be processed are less than 'N', go back.
1630         * exception to the above clause is end of row */
1631        if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1632        {
1633            return IH264E_SUCCESS;
1634        }
1635        else
1636        {
1637            i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1638
1639            /* performing deblocking for required number of MBs */
1640            if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1641            {
1642                u4_deblk_prev_row = 1;
1643
1644                /* checking whether the top rows are deblocked */
1645                for (col = 0; col < i4_n_mb_process_count; col++)
1646                {
1647                    u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1648                }
1649
1650                /* checking whether the top right MB is deblocked */
1651                if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1652                {
1653                    u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1654                }
1655
1656                /* Top or Top right MBs not deblocked */
1657                if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1658                {
1659                    return IH264E_SUCCESS;
1660                }
1661
1662                for (row = 0; row < i4_n_mb_process_count; row++)
1663                {
1664                    ih264e_deblock_mb(ps_proc, ps_deblk);
1665
1666                    pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1667
1668                    if (ps_deblk->i4_mb_y > 0)
1669                    {
1670                        if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1671                        {
1672                            /* padding left luma */
1673                            ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1674
1675                            /* padding left chroma */
1676                            ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1677                        }
1678
1679                        if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1680                        {
1681                            /* padding right luma */
1682                            ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1683
1684                            /* padding right chroma */
1685                            ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1686                        }
1687                    }
1688                    ps_deblk->i4_mb_x++;
1689
1690                    ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1691                    ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1692
1693                }
1694            }
1695            else if(i4_mb_y > 0)
1696            {
1697                ps_deblk->i4_mb_x += i4_n_mb_process_count;
1698
1699                ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1700                ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1701            }
1702
1703            if (i4_mb_y == 2)
1704            {
1705                u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1706                u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1707
1708                if (ps_n_mb_ctxt->i4_mb_x == 0)
1709                {
1710                    u4_pad_wd += PAD_LEFT;
1711                    u4_pad_top = -PAD_LEFT;
1712                }
1713
1714                if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1715                {
1716                    u4_pad_wd += PAD_RIGHT;
1717                }
1718
1719                /* padding top luma */
1720                ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1721
1722                /* padding top chroma */
1723                ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1724            }
1725
1726            ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1727
1728            if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1729            {
1730                if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1731                {
1732                    /* Bottom Padding is done in one stretch for the entire width */
1733                    if (ps_proc->u4_disable_deblock_level != 1)
1734                    {
1735                        ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1736
1737                        ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1738
1739                        ps_n_mb_ctxt->i4_mb_x = 0;
1740                        ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1741                        ps_deblk->i4_mb_x = 0;
1742                        ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1743
1744                        /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1745                        ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1746
1747                        i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1748
1749                        j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1750
1751                        for (i = 0; i < j; i++)
1752                        {
1753                            for (col = 0; col < i4_n_mbs; col++)
1754                            {
1755                                ih264e_deblock_mb(ps_proc, ps_deblk);
1756
1757                                pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1758
1759                                ps_deblk->i4_mb_x++;
1760                                ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1761                                ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1762                                ps_n_mb_ctxt->i4_mb_x++;
1763                            }
1764                        }
1765
1766                        for (col = 0; col < i4_n_mb_process_count; col++)
1767                        {
1768                            ih264e_deblock_mb(ps_proc, ps_deblk);
1769
1770                            pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1771
1772                            ps_deblk->i4_mb_x++;
1773                            ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1774                            ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1775                            ps_n_mb_ctxt->i4_mb_x++;
1776                        }
1777
1778                        pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1779
1780                        pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1781
1782                        /* padding left luma */
1783                        ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1784
1785                        /* padding left chroma */
1786                        ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1787
1788                        pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1789                        pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1790
1791                        /* padding left luma */
1792                        ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1793
1794                        /* padding left chroma */
1795                        ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1796
1797                        pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1798
1799                        pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1800
1801                        /* padding right luma */
1802                        ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1803
1804                        /* padding right chroma */
1805                        ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1806
1807                        pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1808                        pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1809
1810                        /* padding right luma */
1811                        ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1812
1813                        /* padding right chroma */
1814                        ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1815
1816                    }
1817
1818                    /* In case height is less than 2 MBs pad top */
1819                    if (ps_proc->i4_ht_mbs <= 2)
1820                    {
1821                        UWORD8 *pu1_pad_top_src;
1822                        /* padding top luma */
1823                        pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1824                        ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1825
1826                        /* padding top chroma */
1827                        pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1828                        ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1829                    }
1830
1831                    /* padding bottom luma */
1832                    pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1833                    ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1834
1835                    /* padding bottom chroma */
1836                    pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1837                    ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1838                }
1839            }
1840        }
1841    }
1842
1843    return IH264E_SUCCESS;
1844}
1845
1846
1847/**
1848*******************************************************************************
1849*
1850* @brief This function performs luma & chroma core coding for a set of mb's.
1851*
1852* @par Description:
1853*  The mb to be coded is taken and is evaluated over a predefined set of modes
1854*  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1855*  is selected and using intra/inter prediction filters, prediction is carried out.
1856*  The deviation between src and pred signal constitutes error signal. This error
1857*  signal is transformed (hierarchical transform if necessary) and quantized. The
1858*  quantized residue is packed in to entropy buffer for entropy coding. This is
1859*  repeated for all the mb's enlisted under the job.
1860*
1861* @param[in] ps_proc
1862*  Process context corresponding to the job
1863*
1864* @returns  error status
1865*
1866* @remarks none
1867*
1868*******************************************************************************
1869*/
1870WORD32 ih264e_process(process_ctxt_t *ps_proc)
1871{
1872    /* error status */
1873    WORD32 error_status = IH264_SUCCESS;
1874
1875    /* codec context */
1876    codec_t *ps_codec = ps_proc->ps_codec;
1877
1878    /* cbp luma, chroma */
1879    UWORD32 u4_cbp_l, u4_cbp_c;
1880
1881    /* width in mbs */
1882    WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1883
1884    /* loop var */
1885    WORD32  i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1886
1887    /* valid modes */
1888    UWORD32 u4_valid_modes = 0;
1889
1890    /* gate threshold */
1891    WORD32 i4_gate_threshold = 0;
1892
1893    /* is intra */
1894    WORD32 luma_idx, chroma_idx, is_intra;
1895
1896    /* temp variables */
1897    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
1898
1899    /* list of modes for evaluation */
1900    if (ps_proc->i4_slice_type == ISLICE)
1901    {
1902        /* enable intra 16x16 */
1903        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1904
1905        /* enable intra 8x8 */
1906        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
1907
1908        /* enable intra 4x4 */
1909        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1910    }
1911    else if (ps_proc->i4_slice_type == PSLICE)
1912    {
1913        /* enable intra 16x16 */
1914        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1915
1916        /* enable intra 4x4 */
1917        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1918        {
1919            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1920        }
1921
1922        /* enable inter P16x16 */
1923        u4_valid_modes |= (1 << P16x16);
1924    }
1925    else if (ps_proc->i4_slice_type == BSLICE)
1926    {
1927        /* enable intra 16x16 */
1928        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1929
1930        /* enable intra 4x4 */
1931        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1932        {
1933            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1934        }
1935
1936        /* enable inter B16x16 */
1937        u4_valid_modes |= (1 << B16x16);
1938    }
1939
1940
1941    /* init entropy */
1942    ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
1943    ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
1944    ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
1945
1946    /* compute recon when :
1947     *   1. current frame is to be used as a reference
1948     *   2. dump recon for bit stream sanity check
1949     */
1950    ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
1951                                ps_codec->s_cfg.u4_enable_recon;
1952
1953    /* Encode 'n' macroblocks,
1954     * 'n' being the number of mbs dictated by current proc ctxt */
1955    for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
1956    {
1957        /* since we have not yet found sad, we have not yet got min sad */
1958        /* we need to initialize these variables for each MB */
1959        /* TODO how to get the min sad into the codec */
1960        ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
1961        ps_proc->u4_min_sad_reached = 0;
1962
1963        /* mb analysis */
1964        {
1965            /* temp var */
1966            WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
1967
1968            /* force intra refresh ? */
1969            WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
1970                            (ps_proc->pu1_is_intra_coded[i4_mb_id] != 0) ||
1971                            (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
1972
1973            /* evaluate inter 16x16 modes */
1974            if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
1975            {
1976                /* compute nmb me */
1977                if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
1978                {
1979                    ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
1980                                                       i4_wd_mbs - ps_proc->i4_mb_x));
1981                }
1982
1983                /* set pointers to ME data appropriately for other modules to use */
1984                {
1985                    UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
1986
1987                    /* get the min sad condition for current mb */
1988                    ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
1989                    ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
1990
1991                    ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
1992                    ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
1993                    ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
1994
1995                    ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
1996                    ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
1997                    ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
1998                    ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
1999                    ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2000
2001                    /* get the best sub pel buffer */
2002                    ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2003                    ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2004                }
2005                ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2006            }
2007            else
2008            {
2009                /* Derive neighbor availability for the current macroblock */
2010                ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2011
2012                ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2013            }
2014
2015            /*
2016             * If air says intra, we need to force the following code path to evaluate intra
2017             * The easy way is just to say that the inter cost is too much
2018             */
2019            if (!i4_air_enable_inter)
2020            {
2021                ps_proc->u4_min_sad_reached = 0;
2022                ps_proc->i4_mb_cost = INT_MAX;
2023                ps_proc->i4_mb_distortion = INT_MAX;
2024            }
2025            else if (ps_proc->u4_mb_type == PSKIP)
2026            {
2027                goto UPDATE_MB_INFO;
2028            }
2029
2030            /* wait until the proc of [top + 1] mb is computed.
2031             * We wait till the proc dependencies are satisfied */
2032             if(ps_proc->i4_mb_y > 0)
2033             {
2034                /* proc map */
2035                UWORD8  *pu1_proc_map_top;
2036
2037                pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2038
2039                while (1)
2040                {
2041                    volatile UWORD8 *pu1_buf;
2042                    WORD32 idx = i4_mb_idx + 1;
2043
2044                    idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2045                    pu1_buf =  pu1_proc_map_top + idx;
2046                    if(*pu1_buf)
2047                        break;
2048                    ithread_yield();
2049                }
2050            }
2051
2052            /* If we already have the minimum sad, there is no point in searching for sad again */
2053            if (ps_proc->u4_min_sad_reached == 0)
2054            {
2055                /* intra gating in inter slices */
2056                /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2057                if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2058                {
2059                    /* distortion of neighboring blocks */
2060                    WORD32 i4_distortion[4];
2061
2062                    i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2063
2064                    i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2065
2066                    i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2067
2068                    i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2069
2070                    i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2071
2072                }
2073
2074
2075                /* If we are going to force intra we need to evaluate intra irrespective of gating */
2076                if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2077                {
2078                    /* evaluate intra 4x4 modes */
2079                    if (u4_valid_modes & (1 << I4x4))
2080                    {
2081                        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2082                        {
2083                            ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2084                        }
2085                        else
2086                        {
2087                            ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2088                        }
2089                    }
2090
2091                    /* evaluate intra 16x16 modes */
2092                    if (u4_valid_modes & (1 << I16x16))
2093                    {
2094                        ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2095                    }
2096
2097                    /* evaluate intra 8x8 modes */
2098                    if (u4_valid_modes & (1 << I8x8))
2099                    {
2100                        ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2101                    }
2102
2103                }
2104        }
2105     }
2106
2107        /* is intra */
2108        if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2109        {
2110            luma_idx = ps_proc->u4_mb_type;
2111            chroma_idx = 0;
2112            is_intra = 1;
2113
2114            /* evaluate chroma blocks for intra */
2115            ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2116        }
2117        else
2118        {
2119            luma_idx = 3;
2120            chroma_idx = 1;
2121            is_intra = 0;
2122        }
2123        ps_proc->u4_is_intra = is_intra;
2124        ps_proc->ps_pu->b1_intra_flag = is_intra;
2125
2126        /* redo MV pred of neighbors in the case intra mb */
2127        /* TODO : currently called unconditionally, needs to be called only in the case of intra
2128         * to modify neighbors */
2129        if (ps_proc->i4_slice_type != ISLICE)
2130        {
2131            ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2132        }
2133
2134        /* Perform luma mb core coding */
2135        u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2136
2137        /* Perform luma mb core coding */
2138        u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2139
2140        /* coded block pattern */
2141        ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2142
2143        if (!ps_proc->u4_is_intra)
2144        {
2145            if (ps_proc->i4_slice_type == BSLICE)
2146            {
2147                if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2148                {
2149                    ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2150                }
2151            }
2152            else if(!ps_proc->u4_cbp)
2153            {
2154                if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2155                {
2156                    ps_proc->u4_mb_type = PSKIP;
2157                }
2158            }
2159        }
2160
2161UPDATE_MB_INFO:
2162
2163        /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2164        ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2165
2166        /**********************************************************************/
2167        /* if disable deblock level is '0' this implies enable deblocking for */
2168        /* all edges of all macroblocks with out any restrictions             */
2169        /*                                                                    */
2170        /* if disable deblock level is '1' this implies disable deblocking for*/
2171        /* all edges of all macroblocks with out any restrictions             */
2172        /*                                                                    */
2173        /* if disable deblock level is '2' this implies enable deblocking for */
2174        /* all edges of all macroblocks except edges overlapping with slice   */
2175        /* boundaries. This option is not currently supported by the encoder  */
2176        /* hence the slice map should be of no significance to perform debloc */
2177        /* king                                                               */
2178        /**********************************************************************/
2179
2180        if (ps_proc->u4_compute_recon)
2181        {
2182            /* deblk context */
2183            /* src pointers */
2184            UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2185            UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2186
2187            /* src indices */
2188            UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2189            UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2190
2191            /* compute blocking strength */
2192            if (ps_proc->u4_disable_deblock_level != 1)
2193            {
2194                ih264e_compute_bs(ps_proc);
2195            }
2196
2197            /* nmb deblocking and hpel and padding */
2198            ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2199                                                  pu1_cur_pic_chroma, i4_mb_x,
2200                                                  i4_mb_y);
2201        }
2202
2203        /* update the context after for coding next mb */
2204        error_status |= ih264e_update_proc_ctxt(ps_proc);
2205
2206        /* Once the last row is processed, mark the buffer status appropriately */
2207        if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2208        {
2209            /* Pointer to current picture buffer structure */
2210            pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2211
2212            /* Pointer to current picture's mv buffer structure */
2213            mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2214
2215            /**********************************************************************/
2216            /* if disable deblock level is '0' this implies enable deblocking for */
2217            /* all edges of all macroblocks with out any restrictions             */
2218            /*                                                                    */
2219            /* if disable deblock level is '1' this implies disable deblocking for*/
2220            /* all edges of all macroblocks with out any restrictions             */
2221            /*                                                                    */
2222            /* if disable deblock level is '2' this implies enable deblocking for */
2223            /* all edges of all macroblocks except edges overlapping with slice   */
2224            /* boundaries. This option is not currently supported by the encoder  */
2225            /* hence the slice map should be of no significance to perform debloc */
2226            /* king                                                               */
2227            /**********************************************************************/
2228            error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2229
2230            error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2231
2232            if (ps_codec->s_cfg.u4_enable_recon)
2233            {
2234                /* pic cnt */
2235                ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2236
2237                /* rec buffers */
2238                ps_codec->as_rec_buf[ctxt_sel].s_pic_buf  = *ps_proc->ps_cur_pic;
2239
2240                /* is last? */
2241                ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2242
2243                /* frame time stamp */
2244                ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2245                ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2246            }
2247
2248        }
2249    }
2250
2251    DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2252
2253    return error_status;
2254}
2255
2256/**
2257*******************************************************************************
2258*
2259* @brief
2260*  Function to update rc context after encoding
2261*
2262* @par   Description
2263*  This function updates the rate control context after the frame is encoded.
2264*  Number of bits consumed by the current frame, frame distortion, frame cost,
2265*  number of intra/inter mb's, ... are passed on to rate control context for
2266*  updating the rc model.
2267*
2268* @param[in] ps_codec
2269*  Handle to codec context
2270*
2271* @param[in] ctxt_sel
2272*  frame context selector
2273*
2274* @param[in] pic_cnt
2275*  pic count
2276*
2277* @returns i4_stuffing_byte
2278*  number of stuffing bytes (if necessary)
2279*
2280* @remarks
2281*
2282*******************************************************************************
2283*/
2284WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2285{
2286    /* proc set base idx */
2287    WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2288
2289    /* proc ctxt */
2290    process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2291
2292    /* frame qp */
2293    UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2294
2295    /* cbr rc return status */
2296    WORD32 i4_stuffing_byte = 0;
2297
2298    /* current frame stats */
2299    frame_info_t s_frame_info;
2300    picture_type_e rc_pic_type;
2301
2302    /* temp var */
2303    WORD32 i, j;
2304
2305    /********************************************************************/
2306    /*                            BEGIN INIT                            */
2307    /********************************************************************/
2308
2309    /* init frame info */
2310    irc_init_frame_info(&s_frame_info);
2311
2312    /* get frame info */
2313    for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2314    {
2315        /*****************************************************************/
2316        /* One frame can be encoded by max of u4_num_cores threads       */
2317        /* Accumulating the num mbs, sad, qp and intra_mb_cost from      */
2318        /* u4_num_cores threads                                          */
2319        /*****************************************************************/
2320        for (j = 0; j< MAX_MB_TYPE; j++)
2321        {
2322            s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2323
2324            s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2325
2326            s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2327        }
2328
2329        s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2330
2331        s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2332
2333        /*****************************************************************/
2334        /* gather number of residue and header bits consumed by the frame*/
2335        /*****************************************************************/
2336        ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2337    }
2338
2339    /* get pic type */
2340    switch (ps_codec->pic_type)
2341    {
2342        case PIC_I:
2343        case PIC_IDR:
2344            rc_pic_type = I_PIC;
2345            break;
2346        case PIC_P:
2347            rc_pic_type = P_PIC;
2348            break;
2349        case PIC_B:
2350            rc_pic_type = B_PIC;
2351            break;
2352        default:
2353            assert(0);
2354            break;
2355    }
2356
2357    /* update rc lib with current frame stats */
2358    i4_stuffing_byte =  ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2359                                          &(s_frame_info),
2360                                          ps_codec->s_rate_control.pps_pd_frm_rate,
2361                                          ps_codec->s_rate_control.pps_time_stamp,
2362                                          ps_codec->s_rate_control.pps_frame_time,
2363                                          (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2364                                          &rc_pic_type,
2365                                          i4_is_first_frm,
2366                                          &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2367                                          u1_frame_qp,
2368                                          &ps_codec->s_rate_control.num_intra_in_prev_frame,
2369                                          &ps_codec->s_rate_control.i4_avg_activity);
2370    return i4_stuffing_byte;
2371}
2372
2373/**
2374*******************************************************************************
2375*
2376* @brief
2377*  entry point of a spawned encoder thread
2378*
2379* @par Description:
2380*  The encoder thread dequeues a proc/entropy job from the encoder queue and
2381*  calls necessary routines.
2382*
2383* @param[in] pv_proc
2384*  Process context corresponding to the thread
2385*
2386* @returns  error status
2387*
2388* @remarks
2389*
2390*******************************************************************************
2391*/
2392WORD32 ih264e_process_thread(void *pv_proc)
2393{
2394    /* error status */
2395    IH264_ERROR_T ret = IH264_SUCCESS;
2396    WORD32 error_status = IH264_SUCCESS;
2397
2398    /* proc ctxt */
2399    process_ctxt_t *ps_proc = pv_proc;
2400
2401    /* codec ctxt */
2402    codec_t *ps_codec = ps_proc->ps_codec;
2403
2404    /* structure to represent a processing job entry */
2405    job_t s_job;
2406
2407    /* blocking call : entropy dequeue is non-blocking till all
2408     * the proc jobs are processed */
2409    WORD32 is_blocking = 0;
2410
2411    /* set affinity */
2412    ithread_set_affinity(ps_proc->i4_id);
2413
2414    while(1)
2415    {
2416        /* dequeue a job from the entropy queue */
2417        {
2418            int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2419
2420            /* codec context selector */
2421            WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
2422
2423            volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2424
2425            /* have the lock */
2426            if (error == 0)
2427            {
2428                if (*pu4_buf == 0)
2429                {
2430                    /* no entropy threads are active, try dequeuing a job from the entropy queue */
2431                    ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2432                    if (IH264_SUCCESS == ret)
2433                    {
2434                        *pu4_buf = 1;
2435                        ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2436                        goto WORKER;
2437                    }
2438                    else if(is_blocking)
2439                    {
2440                        ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2441                        break;
2442                    }
2443                }
2444                ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2445            }
2446        }
2447
2448        /* dequeue a job from the process queue */
2449        ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2450        if (IH264_SUCCESS != ret)
2451        {
2452            if(ps_proc->i4_id)
2453                break;
2454            else
2455            {
2456                is_blocking = 1;
2457                continue;
2458            }
2459        }
2460
2461WORKER:
2462        /* choose appropriate proc context based on proc_base_idx */
2463        ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2464
2465        switch (s_job.i4_cmd)
2466        {
2467            case CMD_PROCESS:
2468                ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2469                ps_proc->i4_mb_x = s_job.i2_mb_x;
2470                ps_proc->i4_mb_y = s_job.i2_mb_y;
2471
2472                /* init process context */
2473                ih264e_init_proc_ctxt(ps_proc);
2474
2475                /* core code all mbs enlisted under the current job */
2476                error_status |= ih264e_process(ps_proc);
2477                break;
2478
2479            case CMD_ENTROPY:
2480                ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2481                ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2482                ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2483
2484                /* init entropy */
2485                ih264e_init_entropy_ctxt(ps_proc);
2486
2487                /* entropy code all mbs enlisted under the current job */
2488                error_status |= ih264e_entropy(ps_proc);
2489                break;
2490
2491            default:
2492                error_status |= IH264_FAIL;
2493                break;
2494        }
2495    }
2496
2497    /* send error code */
2498    ps_proc->i4_error_code = error_status;
2499    return ret;
2500}
2501