1/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20
21/**
22*******************************************************************************
23* @file
24*  ih264e_process.c
25*
26* @brief
27*  Contains functions for codec thread
28*
29* @author
30*  Harish
31*
32* @par List of Functions:
33* - ih264e_generate_sps_pps()
34* - ih264e_init_entropy_ctxt()
35* - ih264e_entropy()
36* - ih264e_pack_header_data()
37* - ih264e_update_proc_ctxt()
38* - ih264e_init_proc_ctxt()
39* - ih264e_pad_recon_buffer()
40* - ih264e_dblk_pad_hpel_processing_n_mbs()
41* - ih264e_process()
42* - ih264e_set_rc_pic_params()
43* - ih264e_update_rc_post_enc()
44* - ih264e_process_thread()
45*
46* @remarks
47*  None
48*
49*******************************************************************************
50*/
51
52/*****************************************************************************/
53/* File Includes                                                             */
54/*****************************************************************************/
55
56/* System include files */
57#include <stdio.h>
58#include <stddef.h>
59#include <stdlib.h>
60#include <string.h>
61#include <limits.h>
62#include <assert.h>
63
64/* User include files */
65#include "ih264_typedefs.h"
66#include "iv2.h"
67#include "ive2.h"
68#include "ih264_defs.h"
69#include "ih264_debug.h"
70#include "ime_distortion_metrics.h"
71#include "ime_defs.h"
72#include "ime_structs.h"
73#include "ih264_error.h"
74#include "ih264_structs.h"
75#include "ih264_trans_quant_itrans_iquant.h"
76#include "ih264_inter_pred_filters.h"
77#include "ih264_mem_fns.h"
78#include "ih264_padding.h"
79#include "ih264_intra_pred_filters.h"
80#include "ih264_deblk_edge_filters.h"
81#include "ih264_cabac_tables.h"
82#include "ih264_platform_macros.h"
83#include "ih264_macros.h"
84#include "ih264_buf_mgr.h"
85#include "ih264e_error.h"
86#include "ih264e_bitstream.h"
87#include "ih264_common_tables.h"
88#include "ih264_list.h"
89#include "ih264e_defs.h"
90#include "irc_cntrl_param.h"
91#include "irc_frame_info_collector.h"
92#include "ih264e_rate_control.h"
93#include "ih264e_cabac_structs.h"
94#include "ih264e_structs.h"
95#include "ih264e_cabac.h"
96#include "ih264e_process.h"
97#include "ithread.h"
98#include "ih264e_intra_modes_eval.h"
99#include "ih264e_encode_header.h"
100#include "ih264e_globals.h"
101#include "ih264e_config.h"
102#include "ih264e_trace.h"
103#include "ih264e_statistics.h"
104#include "ih264_cavlc_tables.h"
105#include "ih264e_cavlc.h"
106#include "ih264e_deblk.h"
107#include "ih264e_me.h"
108#include "ih264e_debug.h"
109#include "ih264e_master.h"
110#include "ih264e_utils.h"
111#include "irc_mem_req_and_acq.h"
112#include "irc_rate_control_api.h"
113#include "ih264e_platform_macros.h"
114#include "ime_statistics.h"
115
116
117/*****************************************************************************/
118/* Function Definitions                                                      */
119/*****************************************************************************/
120
121/**
122******************************************************************************
123*
124*  @brief This function generates sps, pps set on request
125*
126*  @par   Description
127*  When the encoder is set in header generation mode, the following function
128*  is called. This generates sps and pps headers and returns the control back
129*  to caller.
130*
131*  @param[in]    ps_codec
132*  pointer to codec context
133*
134*  @return      success or failure error code
135*
136******************************************************************************
137*/
138IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
139{
140    /* choose between ping-pong process buffer set */
141    WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
142
143    /* entropy ctxt */
144    entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
145
146    /* Bitstream structure */
147    bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
148
149    /* sps */
150    sps_t *ps_sps = NULL;
151
152    /* pps */
153    pps_t *ps_pps = NULL;
154
155    /* output buff */
156    out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
157
158
159    /********************************************************************/
160    /*      initialize the bit stream buffer                            */
161    /********************************************************************/
162    ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
163
164    /********************************************************************/
165    /*                    BEGIN HEADER GENERATION                       */
166    /********************************************************************/
167    /*ps_codec->i4_pps_id ++;*/
168    ps_codec->i4_pps_id %= MAX_PPS_CNT;
169
170    /*ps_codec->i4_sps_id ++;*/
171    ps_codec->i4_sps_id %= MAX_SPS_CNT;
172
173    /* populate sps header */
174    ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
175    ih264e_populate_sps(ps_codec, ps_sps);
176
177    /* populate pps header */
178    ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
179    ih264e_populate_pps(ps_codec, ps_pps);
180
181    ps_entropy->i4_error_code = IH264E_SUCCESS;
182
183    /* generate sps */
184    ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps, &ps_codec->s_vui);
185
186    /* generate pps */
187    ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
188
189    /* queue output buffer */
190    ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
191
192    return ps_entropy->i4_error_code;
193}
194
195/**
196*******************************************************************************
197*
198* @brief   initialize entropy context.
199*
200* @par Description:
201*  Before invoking the call to perform to entropy coding the entropy context
202*  associated with the job needs to be initialized. This involves the start
203*  mb address, end mb address, slice index and the pointer to location at
204*  which the mb residue info and mb header info are packed.
205*
206* @param[in] ps_proc
207*  Pointer to the current process context
208*
209* @returns error status
210*
211* @remarks none
212*
213*******************************************************************************
214*/
215IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
216{
217    /* codec context */
218    codec_t *ps_codec = ps_proc->ps_codec;
219
220    /* entropy ctxt */
221    entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
222
223    /* start address */
224    ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
225
226    /* end address */
227    ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
228
229    /* slice index */
230    ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
231
232    /* sof */
233    /* @ start of frame or start of a new slice, set sof flag */
234    if (ps_entropy->i4_mb_start_add == 0)
235    {
236        ps_entropy->i4_sof = 1;
237    }
238
239    if (ps_entropy->i4_mb_x == 0)
240    {
241        /* packed mb coeff data */
242        ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
243                        ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
244
245        /* packed mb header data */
246        ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
247                        ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
248    }
249
250    return IH264E_SUCCESS;
251}
252
253/**
254*******************************************************************************
255*
256* @brief entry point for entropy coding
257*
258* @par Description
259*  This function calls lower level functions to perform entropy coding for a
260*  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
261*  back the control, updates the ctxt and calls lower level functions again.
262*  This process is repeated till all the rows or group of mb's (which ever is
263*  minimum) are coded
264*
265* @param[in] ps_proc
266*  process context
267*
268* @returns  error status
269*
270* @remarks
271*
272*******************************************************************************
273*/
274
275IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
276{
277    /* codec context */
278    codec_t *ps_codec = ps_proc->ps_codec;
279
280    /* entropy context */
281    entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
282
283    /* cabac context */
284    cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
285
286    /* sps */
287    sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
288
289    /* pps */
290    pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
291
292    /* slice header */
293    slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
294
295    /* slice type */
296    WORD32 i4_slice_type = ps_proc->i4_slice_type;
297
298    /* Bitstream structure */
299    bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
300
301    /* output buff */
302    out_buf_t s_out_buf;
303
304    /* proc map */
305    UWORD8  *pu1_proc_map;
306
307    /* entropy map */
308    UWORD8  *pu1_entropy_map_curr;
309
310    /* proc base idx */
311    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
312
313    /* temp var */
314    WORD32 i4_wd_mbs, i4_ht_mbs;
315    UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
316    WORD32 bitstream_start_offset, bitstream_end_offset;
317    /********************************************************************/
318    /*                            BEGIN INIT                            */
319    /********************************************************************/
320
321    /* entropy encode start address */
322    u4_mb_idx = ps_entropy->i4_mb_start_add;
323
324    /* entropy encode end address */
325    u4_mb_end_idx = ps_entropy->i4_mb_end_add;
326
327    /* width in mbs */
328    i4_wd_mbs = ps_entropy->i4_wd_mbs;
329
330    /* height in mbs */
331    i4_ht_mbs = ps_entropy->i4_ht_mbs;
332
333    /* total mb cnt */
334    u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
335
336    /* proc map */
337    pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
338
339    /* entropy map */
340    pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
341
342    /********************************************************************/
343    /* @ start of frame / slice,                                        */
344    /*      initialize the output buffer,                               */
345    /*      initialize the bit stream buffer,                           */
346    /*      check if sps and pps headers have to be generated,          */
347    /*      populate and generate slice header                          */
348    /********************************************************************/
349    if (ps_entropy->i4_sof)
350    {
351        /********************************************************************/
352        /*      initialize the output buffer                                */
353        /********************************************************************/
354        s_out_buf = ps_codec->as_out_buf[ctxt_sel];
355
356        /* is last frame to encode */
357        s_out_buf.u4_is_last = ps_entropy->u4_is_last;
358
359        /* frame idx */
360        s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
361        s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
362
363        /********************************************************************/
364        /*      initialize the bit stream buffer                            */
365        /********************************************************************/
366        ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
367
368        /********************************************************************/
369        /*                    BEGIN HEADER GENERATION                       */
370        /********************************************************************/
371        if (1 == ps_entropy->i4_gen_header)
372        {
373            /* generate sps */
374            ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps, &ps_codec->s_vui);
375
376            /* generate pps */
377            ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
378
379            /* reset i4_gen_header */
380            ps_entropy->i4_gen_header = 0;
381        }
382
383        /* populate slice header */
384        ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
385
386        /* generate slice header */
387        ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
388                                                                  ps_pps, ps_sps);
389
390        /* once start of frame / slice is done, you can reset it */
391        /* it is the responsibility of the caller to set this flag */
392        ps_entropy->i4_sof = 0;
393
394        if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
395        {
396            BITSTREAM_BYTE_ALIGN(ps_bitstrm);
397            BITSTREAM_FLUSH(ps_bitstrm);
398            ih264e_init_cabac_ctxt(ps_entropy);
399        }
400    }
401
402    /* begin entropy coding for the mb set */
403    while (u4_mb_idx < u4_mb_end_idx)
404    {
405        /* init ptrs/indices */
406        if (ps_entropy->i4_mb_x == i4_wd_mbs)
407        {
408            ps_entropy->i4_mb_y++;
409            ps_entropy->i4_mb_x = 0;
410
411            /* packed mb coeff data */
412            ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
413                            ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
414
415            /* packed mb header data */
416            ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
417                            ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
418
419            /* proc map */
420            pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
421
422            /* entropy map */
423            pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
424        }
425
426        DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
427        ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
428        ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
429
430        /* wait until the curr mb is core coded */
431        /* The wait for curr mb to be core coded is essential when entropy is launched
432         * as a separate job
433         */
434        while (1)
435        {
436            volatile UWORD8 *pu1_buf1;
437            WORD32 idx = ps_entropy->i4_mb_x;
438
439            pu1_buf1 = pu1_proc_map + idx;
440            if (*pu1_buf1)
441                break;
442            ithread_yield();
443        }
444
445
446        /* write mb layer */
447        ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
448        /* Starting bitstream offset for header in bits */
449        bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
450
451        /* set entropy map */
452        pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
453
454        u4_mb_idx++;
455        ps_entropy->i4_mb_x++;
456        /* check for eof */
457        if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
458        {
459            if (ps_entropy->i4_mb_x < i4_wd_mbs)
460            {
461                ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
462            }
463        }
464
465        if (ps_entropy->i4_mb_x == i4_wd_mbs)
466        {
467            /* if slices are enabled */
468            if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
469            {
470                /* current slice index */
471                WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
472
473                /* slice map */
474                UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
475
476                /* No need to open a slice at end of frame. The current slice can be closed at the time
477                 * of signaling eof flag.
478                 */
479                if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
480                                                != pu1_slice_idx[u4_mb_idx]))
481                {
482                    if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
483                    { /* mb skip run */
484                        if ((i4_slice_type != ISLICE)
485                                        && *ps_entropy->pi4_mb_skip_run)
486                        {
487                            if (*ps_entropy->pi4_mb_skip_run)
488                            {
489                            PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
490                                *ps_entropy->pi4_mb_skip_run = 0;
491                            }
492                        }
493                        /* put rbsp trailing bits for the previous slice */
494                                 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
495                    }
496                    else
497                    {
498                        ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
499                    }
500
501                    /* update slice header pointer */
502                    i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
503                    ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
504                    ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
505
506                    /* populate slice header */
507                    ps_entropy->i4_mb_start_add = u4_mb_idx;
508                    ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
509                                                 ps_sps);
510
511                    /* generate slice header */
512                    ps_entropy->i4_error_code |= ih264e_generate_slice_header(
513                                    ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
514                    if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
515                    {
516                        BITSTREAM_BYTE_ALIGN(ps_bitstrm);
517                        BITSTREAM_FLUSH(ps_bitstrm);
518                        ih264e_init_cabac_ctxt(ps_entropy);
519                    }
520                }
521                else
522                {
523                    if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
524                                    && u4_mb_idx != u4_mb_cnt)
525                    {
526                        ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
527                    }
528                }
529            }
530            /* Dont execute any further instructions until store synchronization took place */
531            DATA_SYNC();
532        }
533
534        /* Ending bitstream offset for header in bits */
535        bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
536        ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
537                        bitstream_end_offset - bitstream_start_offset;
538    }
539
540    /* check for eof */
541    if (u4_mb_idx == u4_mb_cnt)
542    {
543        /* set end of frame flag */
544        ps_entropy->i4_eof = 1;
545    }
546    else
547    {
548        if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
549                        && ps_codec->s_cfg.e_slice_mode
550                                        != IVE_SLICE_MODE_BLOCKS)
551        {
552            ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
553        }
554    }
555
556    if (ps_entropy->i4_eof)
557    {
558        if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
559        {
560            /* mb skip run */
561            if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
562            {
563                if (*ps_entropy->pi4_mb_skip_run)
564                {
565                    PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
566                                 ps_entropy->i4_error_code, "mb skip run");
567                    *ps_entropy->pi4_mb_skip_run = 0;
568                }
569            }
570            /* put rbsp trailing bits */
571             ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
572        }
573        else
574        {
575            ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
576        }
577
578        /* update current frame stats to rc library */
579        {
580            /* number of bytes to stuff */
581            WORD32 i4_stuff_bytes;
582
583            /* update */
584            i4_stuff_bytes = ih264e_update_rc_post_enc(
585                            ps_codec, ctxt_sel,
586                            (ps_proc->ps_codec->i4_poc == 0));
587
588            /* cbr rc - house keeping */
589            if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
590            {
591                ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
592            }
593            else if (i4_stuff_bytes)
594            {
595                /* add filler nal units */
596                ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
597            }
598        }
599
600        /*
601         *Frame number is to be incremented only if the current frame is a
602         * reference frame. After each successful frame encode, we increment
603         * frame number by 1
604         */
605        if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
606                        && ps_codec->u4_is_curr_frm_ref)
607        {
608            ps_codec->i4_frame_num++;
609        }
610        /********************************************************************/
611        /*      signal the output                                           */
612        /********************************************************************/
613        ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
614                        ps_entropy->ps_bitstrm->u4_strm_buf_offset;
615
616        DEBUG("entropy status %x", ps_entropy->i4_error_code);
617    }
618
619    /* allow threads to dequeue entropy jobs */
620    ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
621
622    return ps_entropy->i4_error_code;
623}
624
625/**
626*******************************************************************************
627*
628* @brief Packs header information of a mb in to a buffer
629*
630* @par Description:
631*  After the deciding the mode info of a macroblock, the syntax elements
632*  associated with the mb are packed and stored. The entropy thread unpacks
633*  this buffer and generates the end bit stream.
634*
635* @param[in] ps_proc
636*  Pointer to the current process context
637*
638* @returns error status
639*
640* @remarks none
641*
642*******************************************************************************
643*/
644IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
645{
646    /* curr mb type */
647    UWORD32 u4_mb_type = ps_proc->u4_mb_type;
648
649    /* pack mb syntax layer of curr mb (used for entropy coding) */
650    if (u4_mb_type == I4x4)
651    {
652        /* pointer to mb header storage space */
653        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
654
655        /* temp var */
656        WORD32 i4, byte;
657
658        /* mb type plus mode */
659        *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
660
661        /* cbp */
662        *pu1_ptr++ = ps_proc->u4_cbp;
663
664        /* mb qp delta */
665        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
666
667        /* sub mb modes */
668        for (i4 = 0; i4 < 16; i4 ++)
669        {
670            byte = 0;
671
672            if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
673                            ps_proc->au1_intra_luma_mb_4x4_modes[i4])
674            {
675                byte |= 1;
676            }
677            else
678            {
679
680                if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
681                                ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
682                {
683                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
684                }
685                else
686                {
687                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
688                }
689            }
690
691            i4++;
692
693            if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
694                            ps_proc->au1_intra_luma_mb_4x4_modes[i4])
695            {
696                byte |= 16;
697            }
698            else
699            {
700
701                if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
702                                ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
703                {
704                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
705                }
706                else
707                {
708                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
709                }
710            }
711
712            *pu1_ptr++ = byte;
713        }
714
715        /* end of mb layer */
716        ps_proc->pv_mb_header_data = pu1_ptr;
717    }
718    else if (u4_mb_type == I16x16)
719    {
720        /* pointer to mb header storage space */
721        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
722
723        /* mb type plus mode */
724        *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
725
726        /* cbp */
727        *pu1_ptr++ = ps_proc->u4_cbp;
728
729        /* mb qp delta */
730        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
731
732        /* end of mb layer */
733        ps_proc->pv_mb_header_data = pu1_ptr;
734    }
735    else if (u4_mb_type == P16x16)
736    {
737        /* pointer to mb header storage space */
738        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
739
740        WORD16 *i2_mv_ptr;
741
742        /* mb type plus mode */
743        *pu1_ptr++ = u4_mb_type;
744
745        /* cbp */
746        *pu1_ptr++ = ps_proc->u4_cbp;
747
748        /* mb qp delta */
749        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
750
751        i2_mv_ptr = (WORD16 *)pu1_ptr;
752
753        *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
754
755        *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
756
757        /* end of mb layer */
758        ps_proc->pv_mb_header_data = i2_mv_ptr;
759    }
760    else if (u4_mb_type == PSKIP)
761    {
762        /* pointer to mb header storage space */
763        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
764
765        /* mb type plus mode */
766        *pu1_ptr++ = u4_mb_type;
767
768        /* end of mb layer */
769        ps_proc->pv_mb_header_data = pu1_ptr;
770    }
771    else if(u4_mb_type == B16x16)
772    {
773
774        /* pointer to mb header storage space */
775        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
776
777        WORD16 *i2_mv_ptr;
778
779        UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
780
781        /* mb type plus mode */
782        *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
783
784        /* cbp */
785        *pu1_ptr++ = ps_proc->u4_cbp;
786
787        /* mb qp delta */
788        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
789
790        /* l0 & l1 me data */
791        i2_mv_ptr = (WORD16 *)pu1_ptr;
792
793        if (u4_pred_mode != PRED_L1)
794        {
795            *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
796                            - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
797
798            *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
799                            - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
800        }
801        if (u4_pred_mode != PRED_L0)
802        {
803            *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
804                            - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
805
806            *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
807                            - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
808        }
809
810        /* end of mb layer */
811        ps_proc->pv_mb_header_data = i2_mv_ptr;
812
813    }
814    else if(u4_mb_type == BDIRECT)
815    {
816        /* pointer to mb header storage space */
817        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
818
819        /* mb type plus mode */
820        *pu1_ptr++ = u4_mb_type;
821
822        /* cbp */
823        *pu1_ptr++ = ps_proc->u4_cbp;
824
825        /* mb qp delta */
826        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
827
828        ps_proc->pv_mb_header_data = pu1_ptr;
829
830    }
831    else if(u4_mb_type == BSKIP)
832    {
833        UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
834
835        /* pointer to mb header storage space */
836        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
837
838        /* mb type plus mode */
839        *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
840
841        /* end of mb layer */
842        ps_proc->pv_mb_header_data = pu1_ptr;
843    }
844
845    return IH264E_SUCCESS;
846}
847
848/**
849*******************************************************************************
850*
851* @brief   update process context after encoding an mb. This involves preserving
852* the current mb information for later use, initialize the proc ctxt elements to
853* encode next mb.
854*
855* @par Description:
856*  This function performs house keeping tasks after encoding an mb.
857*  After encoding an mb, various elements of the process context needs to be
858*  updated to encode the next mb. For instance, the source, recon and reference
859*  pointers, mb indices have to be adjusted to the next mb. The slice index of
860*  the current mb needs to be updated. If mb qp modulation is enabled, then if
861*  the qp changes the quant param structure needs to be updated. Also to encoding
862*  the next mb, the current mb info is used as part of mode prediction or mv
863*  prediction. Hence the current mb info has to preserved at top/top left/left
864*  locations.
865*
866* @param[in] ps_proc
867*  Pointer to the current process context
868*
869* @returns none
870*
871* @remarks none
872*
873*******************************************************************************
874*/
875WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
876{
877    /* error status */
878    WORD32 error_status = IH264_SUCCESS;
879
880    /* codec context */
881    codec_t *ps_codec = ps_proc->ps_codec;
882
883    /* curr mb indices */
884    WORD32 i4_mb_x = ps_proc->i4_mb_x;
885    WORD32 i4_mb_y = ps_proc->i4_mb_y;
886
887    /* mb syntax elements of neighbors */
888    mb_info_t *ps_left_syn =  &ps_proc->s_left_mb_syntax_ele;
889    mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
890    mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
891
892    /* curr mb type */
893    UWORD32 u4_mb_type = ps_proc->u4_mb_type;
894
895    /* curr mb type */
896    UWORD32 u4_is_intra = ps_proc->u4_is_intra;
897
898    /* width in mbs */
899    WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
900
901    /*height in mbs*/
902    WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
903
904    /* proc map */
905    UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
906
907    /* deblk context */
908    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
909
910    /* deblk bs context */
911    bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
912
913    /* top row motion vector info */
914    enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
915
916    /* top left mb motion vector */
917    enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
918
919    /* left mb motion vector */
920    enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
921
922    /* sub mb modes */
923    UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
924
925    /*************************************************************/
926    /* During MV prediction, when top right mb is not available, */
927    /* top left mb info. is used for prediction. Hence the curr  */
928    /* top, which will be top left for the next mb needs to be   */
929    /* preserved before updating it with curr mb info.           */
930    /*************************************************************/
931
932    /* mb type, mb class, csbp */
933    *ps_top_left_syn = *ps_top_syn;
934
935    if (ps_proc->i4_slice_type != ISLICE)
936    {
937        /*****************************************/
938        /* update top left with top info results */
939        /*****************************************/
940        /* mv */
941        *ps_top_left_mb_pu = *ps_top_row_pu;
942    }
943
944    /*************************************************/
945    /* update top and left with curr mb info results */
946    /*************************************************/
947
948    /* mb type */
949    ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
950
951    /* mb class */
952    ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
953
954    /* csbp */
955    ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
956
957    /* distortion */
958    ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
959
960    if (u4_is_intra)
961    {
962        /* mb / sub mb modes */
963        if (I16x16 == u4_mb_type)
964        {
965            pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
966        }
967        else if (I4x4 == u4_mb_type)
968        {
969            ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
970            ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
971        }
972        else if (I8x8 == u4_mb_type)
973        {
974            memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
975            memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
976        }
977
978        if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
979        {
980            /* mv */
981            *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
982        }
983
984        *ps_proc->pu4_mb_pu_cnt = 1;
985    }
986    else
987    {
988        /* mv */
989        *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
990    }
991
992    /*
993     * Mark that the MB has been coded intra
994     * So that future AIRs can skip it
995     */
996    ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
997
998    /**************************************************/
999    /* pack mb header info. for entropy coding        */
1000    /**************************************************/
1001    ih264e_pack_header_data(ps_proc);
1002
1003    /* update previous mb qp */
1004    ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1005
1006    /* store qp */
1007    ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1008
1009    /*
1010     * We need to sync the cache to make sure that the nmv content of proc
1011     * is updated to cache properly
1012     */
1013    DATA_SYNC();
1014
1015    /* Just before finishing the row, enqueue the job in to entropy queue.
1016     * The master thread depending on its convenience shall dequeue it and
1017     * performs entropy.
1018     *
1019     * WARN !! Placing this block post proc map update can cause queuing of
1020     * entropy jobs in out of order.
1021     */
1022    if (i4_mb_x == i4_wd_mbs - 1)
1023    {
1024        /* job structures */
1025        job_t s_job;
1026
1027        /* job class */
1028        s_job.i4_cmd = CMD_ENTROPY;
1029
1030        /* number of mbs to be processed in the current job */
1031        s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1032
1033        /* job start index x */
1034        s_job.i2_mb_x = 0;
1035
1036        /* job start index y */
1037        s_job.i2_mb_y = ps_proc->i4_mb_y;
1038
1039        /* proc base idx */
1040        s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
1041
1042        /* queue the job */
1043        error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1044
1045        if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1046            ih264_list_terminate(ps_codec->pv_entropy_jobq);
1047    }
1048
1049    /* update proc map */
1050    pu1_proc_map[i4_mb_x] = 1;
1051
1052    /**************************************************/
1053    /* update proc ctxt elements for encoding next mb */
1054    /**************************************************/
1055    /* update indices */
1056    i4_mb_x ++;
1057    ps_proc->i4_mb_x = i4_mb_x;
1058
1059    if (ps_proc->i4_mb_x == i4_wd_mbs)
1060    {
1061        ps_proc->i4_mb_y++;
1062        ps_proc->i4_mb_x = 0;
1063    }
1064
1065    /* update slice index */
1066    ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1067
1068    /* update buffers pointers */
1069    ps_proc->pu1_src_buf_luma += MB_SIZE;
1070    ps_proc->pu1_rec_buf_luma += MB_SIZE;
1071    ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1072    ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1073
1074    /*
1075     * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1076     * the stride per MB is MB_SIZE
1077     */
1078    ps_proc->pu1_src_buf_chroma += MB_SIZE;
1079    ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1080    ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1081    ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1082
1083
1084
1085    /* Reset cost, distortion params */
1086    ps_proc->i4_mb_cost = INT_MAX;
1087    ps_proc->i4_mb_distortion = SHRT_MAX;
1088
1089    ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1090
1091    ps_proc->pu4_mb_pu_cnt += 1;
1092
1093    /* Update colocated pu */
1094    if (ps_proc->i4_slice_type == BSLICE)
1095        ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt +  (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1096
1097    /* deblk ctxts */
1098    if (ps_proc->u4_disable_deblock_level != 1)
1099    {
1100        /* indices */
1101        ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1102        ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1103
1104#ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1105        ps_deblk->i4_mb_x ++;
1106
1107        ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1108        /*
1109         * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1110         * the stride per MB is MB_SIZE
1111         */
1112        ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1113#endif
1114    }
1115
1116    return error_status;
1117}
1118
1119/**
1120*******************************************************************************
1121*
1122* @brief   initialize process context.
1123*
1124* @par Description:
1125*  Before dispatching the current job to process thread, the process context
1126*  associated with the job is initialized. Usually every job aims to encode one
1127*  row of mb's. Basing on the row indices provided by the job, the process
1128*  context's buffer ptrs, slice indices and other elements that are necessary
1129*  during core-coding are initialized.
1130*
1131* @param[in] ps_proc
1132*  Pointer to the current process context
1133*
1134* @returns error status
1135*
1136* @remarks none
1137*
1138*******************************************************************************
1139*/
1140IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1141{
1142    /* codec context */
1143    codec_t *ps_codec = ps_proc->ps_codec;
1144
1145    /* nmb processing context*/
1146    n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1147
1148    /* indices */
1149    WORD32 i4_mb_x, i4_mb_y;
1150
1151    /* strides */
1152    WORD32 i4_src_strd = ps_proc->i4_src_strd;
1153    WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
1154    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1155
1156    /* quant params */
1157    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1158
1159    /* deblk ctxt */
1160    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1161
1162    /* deblk bs context */
1163    bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1164
1165    /* Pointer to mv_buffer of current frame */
1166    mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1167
1168    /* Pointers for color space conversion */
1169    UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1170
1171    /* Pad the MB to support non standard sizes */
1172    UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1173    UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1174    UWORD16 u2_num_rows = MB_SIZE;
1175    WORD32 convert_uv_only;
1176
1177    /********************************************************************/
1178    /*                            BEGIN INIT                            */
1179    /********************************************************************/
1180
1181    i4_mb_x = ps_proc->i4_mb_x;
1182    i4_mb_y = ps_proc->i4_mb_y;
1183
1184    /* Number of mbs processed in one loop of process function */
1185    ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
1186    ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
1187
1188    /* init buffer pointers */
1189    convert_uv_only = 1;
1190    if (u4_pad_bottom_sz || u4_pad_right_sz ||
1191        ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
1192    {
1193        if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1194            u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1195        ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1196        i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
1197        ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1198        convert_uv_only = 0;
1199    }
1200    else
1201    {
1202        i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1203        ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1204    }
1205
1206
1207    if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1208        ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1209        ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
1210        u4_pad_bottom_sz || u4_pad_right_sz)
1211    {
1212        if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1213            (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1214            ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1215
1216        ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1217        i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
1218    }
1219    else
1220    {
1221        i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1222        ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
1223    }
1224
1225    ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1226    ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1227
1228    /* Tempral back and forward reference buffer */
1229    ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1230    ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1231    ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1232    ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1233
1234    /*
1235     * Do color space conversion
1236     * NOTE : We assume there that the number of MB's to process will not span multiple rows
1237     */
1238    switch (ps_codec->s_cfg.e_inp_color_fmt)
1239    {
1240        case IV_YUV_420SP_UV:
1241        case IV_YUV_420SP_VU:
1242            /* In case of 420 semi-planar input, copy last few rows to intermediate
1243               buffer as chroma trans functions access one extra byte due to interleaved input.
1244               This data will be padded if required */
1245            if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
1246            {
1247                WORD32 num_rows = MB_SIZE;
1248                UWORD8 *pu1_src;
1249                UWORD8 *pu1_dst;
1250                WORD32 i;
1251                pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1252                          ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1253
1254                pu1_dst = ps_proc->pu1_src_buf_luma;
1255
1256                /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */
1257                if (u4_pad_bottom_sz || u4_pad_right_sz) {
1258                    if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1259                        num_rows = MB_SIZE - u4_pad_bottom_sz;
1260                    for (i = 0; i < num_rows; i++)
1261                    {
1262                        memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1263                        pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1264                        pu1_dst += ps_proc->i4_src_strd;
1265                    }
1266                }
1267                pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1268                          ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1269                pu1_dst = ps_proc->pu1_src_buf_chroma;
1270
1271                /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1272                 * due to interleaved input
1273                 */
1274                if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1275                    num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1276                else
1277                    num_rows = BLK8x8SIZE;
1278                for (i = 0; i < num_rows; i++)
1279                {
1280                    memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1281                    pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1282                    pu1_dst += ps_proc->i4_src_chroma_strd;
1283                }
1284
1285            }
1286            break;
1287
1288        case IV_YUV_420P :
1289            pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1290                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1291
1292            pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1293                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1294
1295            pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1296                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1297
1298            ps_codec->pf_ih264e_conv_420p_to_420sp(
1299                            pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1300                            ps_proc->pu1_src_buf_luma,
1301                            ps_proc->pu1_src_buf_chroma, u2_num_rows,
1302                            ps_codec->s_cfg.u4_disp_wd,
1303                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1304                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1305                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1306                            ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1307                            convert_uv_only);
1308            break;
1309
1310        case IV_YUV_422ILE :
1311            pu1_y_buf_base =  (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1312                              + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1313
1314            ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1315                            ps_proc->pu1_src_buf_luma,
1316                            ps_proc->pu1_src_buf_chroma,
1317                            ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1318                            ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1319                            ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1320                            ps_proc->i4_src_chroma_strd,
1321                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1322            break;
1323
1324        default:
1325            break;
1326    }
1327
1328    if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
1329    {
1330        UWORD32 u4_pad_wd, u4_pad_ht;
1331        u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1332        u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1333        u4_pad_ht = MB_SIZE;
1334        if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1335            u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1336
1337        ih264_pad_right_luma(
1338                        ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1339                        ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1340
1341        ih264_pad_right_chroma(
1342                        ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1343                        ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
1344    }
1345
1346    /* pad bottom edge */
1347    if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1348    {
1349        ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1350                         ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1351
1352        ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
1353                         ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
1354    }
1355
1356
1357    /* packed mb coeff data */
1358    ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1359
1360    /* packed mb header data */
1361    ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1362
1363    /* slice index */
1364    ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1365
1366    /*********************************************************************/
1367    /* ih264e_init_quant_params() routine is called at the pic init level*/
1368    /* this would have initialized the qp.                               */
1369    /* TODO_LATER: currently it is assumed that quant params donot change*/
1370    /* across mb's. When they do calculate update ps_qp_params accordingly*/
1371    /*********************************************************************/
1372
1373    /* init mv buffer ptr */
1374    ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1375                     ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1376
1377    /* Init co-located mv buffer */
1378    ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1379                        ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1380
1381    if (i4_mb_y == 0)
1382    {
1383        ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1384    }
1385    else
1386    {
1387        ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
1388                                    ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1389    }
1390
1391    ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1392
1393    /* mb type */
1394    ps_proc->u4_mb_type = I16x16;
1395
1396    /* lambda */
1397    ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
1398
1399    /* mb distortion */
1400    ps_proc->i4_mb_distortion = SHRT_MAX;
1401
1402    if (i4_mb_x == 0)
1403    {
1404        ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1405
1406        ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1407
1408        ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1409
1410        if (i4_mb_y == 0)
1411        {
1412            memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1413        }
1414    }
1415
1416    /* mb cost */
1417    ps_proc->i4_mb_cost = INT_MAX;
1418
1419    /**********************/
1420    /* init deblk context */
1421    /**********************/
1422    ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1423    /* deblk lags the current mb proc by 1 row */
1424    /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1425    /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1426    /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1427    ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1428
1429    /* buffer ptrs */
1430    ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1431    ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1432
1433    /* init deblk bs context */
1434    /* mb indices */
1435    ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1436    ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1437
1438    /* init n_mb_process  context */
1439    ps_n_mb_ctxt->i4_mb_x = 0;
1440    ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1441    ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1442
1443    return IH264E_SUCCESS;
1444}
1445
1446/**
1447*******************************************************************************
1448*
1449* @brief This function performs luma & chroma padding
1450*
1451* @par Description:
1452*
1453* @param[in] ps_proc
1454*  Process context corresponding to the job
1455*
1456* @param[in] pu1_curr_pic_luma
1457*  Pointer to luma buffer
1458*
1459* @param[in] pu1_curr_pic_chroma
1460*  Pointer to chroma buffer
1461*
1462* @param[in] i4_mb_x
1463*  mb index x
1464*
1465* @param[in] i4_mb_y
1466*  mb index y
1467*
1468*  @param[in] i4_pad_ht
1469*  number of rows to be padded
1470*
1471* @returns  error status
1472*
1473* @remarks none
1474*
1475*******************************************************************************
1476*/
1477IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1478                                       UWORD8 *pu1_curr_pic_luma,
1479                                       UWORD8 *pu1_curr_pic_chroma,
1480                                       WORD32 i4_mb_x,
1481                                       WORD32 i4_mb_y,
1482                                       WORD32 i4_pad_ht)
1483{
1484    /* codec context */
1485    codec_t *ps_codec = ps_proc->ps_codec;
1486
1487    /* strides */
1488    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1489
1490    if (i4_mb_x == 0)
1491    {
1492        /* padding left luma */
1493        ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1494
1495        /* padding left chroma */
1496        ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1497    }
1498    if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1499    {
1500        /* padding right luma */
1501        ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1502
1503        /* padding right chroma */
1504        ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1505
1506        if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1507        {
1508            UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1509            UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1510
1511            /* padding bottom luma */
1512            ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1513
1514            /* padding bottom chroma */
1515            ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1516        }
1517    }
1518
1519    if (i4_mb_y == 0)
1520    {
1521        UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1522        UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1523        WORD32 wd = MB_SIZE;
1524
1525        if (i4_mb_x == 0)
1526        {
1527            pu1_rec_luma -= PAD_LEFT;
1528            pu1_rec_chroma -= PAD_LEFT;
1529
1530            wd += PAD_LEFT;
1531        }
1532        if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1533        {
1534            wd += PAD_RIGHT;
1535        }
1536
1537        /* padding top luma */
1538        ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1539
1540        /* padding top chroma */
1541        ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1542    }
1543
1544    return IH264E_SUCCESS;
1545}
1546
1547
1548
1549
1550/**
1551*******************************************************************************
1552*
1553* @brief This function performs deblocking, padding and halfpel generation for
1554*  'n' MBs
1555*
1556* @par Description:
1557*
1558* @param[in] ps_proc
1559*  Process context corresponding to the job
1560*
1561* @param[in] pu1_curr_pic_luma
1562* Current MB being processed(Luma)
1563*
1564* @param[in] pu1_curr_pic_chroma
1565* Current MB being processed(Chroma)
1566*
1567* @param[in] i4_mb_x
1568* Column value of current MB processed
1569*
1570* @param[in] i4_mb_y
1571* Curent row processed
1572*
1573* @returns  error status
1574*
1575* @remarks none
1576*
1577*******************************************************************************
1578*/
1579IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1580                                                     UWORD8 *pu1_curr_pic_luma,
1581                                                     UWORD8 *pu1_curr_pic_chroma,
1582                                                     WORD32 i4_mb_x,
1583                                                     WORD32 i4_mb_y)
1584{
1585    /* codec context */
1586    codec_t *ps_codec = ps_proc->ps_codec;
1587
1588    /* n_mb processing context */
1589    n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1590
1591    /* deblk context */
1592    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1593
1594    /* strides */
1595    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1596
1597    /* loop variables */
1598    WORD32 row, i, j, col;
1599
1600    /* Padding Width */
1601    UWORD32 u4_pad_wd;
1602
1603    /* deblk_map of the row being deblocked */
1604    UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1605
1606    /* deblk_map_previous row */
1607    UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1608
1609    WORD32 u4_pad_top = 0;
1610
1611    WORD32 u4_deblk_prev_row = 0;
1612
1613    /* Number of mbs to be processed */
1614    WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1615
1616    /* Number of mbs  actually processed
1617     * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1618    WORD32 i4_n_mb_process_count = 0;
1619
1620    UWORD8 *pu1_pad_bottom_src = NULL;
1621
1622    UWORD8 *pu1_pad_src_luma = NULL;
1623    UWORD8 *pu1_pad_src_chroma = NULL;
1624
1625    if (ps_proc->u4_disable_deblock_level == 1)
1626    {
1627        /* If left most MB is processed, then pad left */
1628        if (i4_mb_x == 0)
1629        {
1630            /* padding left luma */
1631            ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1632
1633            /* padding left chroma */
1634            ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1635        }
1636        /*last col*/
1637        if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1638        {
1639            /* padding right luma */
1640            ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1641
1642            /* padding right chroma */
1643            ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1644        }
1645    }
1646
1647    if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1648    {
1649        /* if number of mb's to be processed are less than 'N', go back.
1650         * exception to the above clause is end of row */
1651        if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1652        {
1653            return IH264E_SUCCESS;
1654        }
1655        else
1656        {
1657            i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1658
1659            /* performing deblocking for required number of MBs */
1660            if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1661            {
1662                u4_deblk_prev_row = 1;
1663
1664                /* checking whether the top rows are deblocked */
1665                for (col = 0; col < i4_n_mb_process_count; col++)
1666                {
1667                    u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1668                }
1669
1670                /* checking whether the top right MB is deblocked */
1671                if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1672                {
1673                    u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1674                }
1675
1676                /* Top or Top right MBs not deblocked */
1677                if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1678                {
1679                    return IH264E_SUCCESS;
1680                }
1681
1682                for (row = 0; row < i4_n_mb_process_count; row++)
1683                {
1684                    ih264e_deblock_mb(ps_proc, ps_deblk);
1685
1686                    pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1687
1688                    if (ps_deblk->i4_mb_y > 0)
1689                    {
1690                        if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1691                        {
1692                            /* padding left luma */
1693                            ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1694
1695                            /* padding left chroma */
1696                            ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1697                        }
1698
1699                        if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1700                        {
1701                            /* padding right luma */
1702                            ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1703
1704                            /* padding right chroma */
1705                            ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1706                        }
1707                    }
1708                    ps_deblk->i4_mb_x++;
1709
1710                    ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1711                    ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1712
1713                }
1714            }
1715            else if(i4_mb_y > 0)
1716            {
1717                ps_deblk->i4_mb_x += i4_n_mb_process_count;
1718
1719                ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1720                ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1721            }
1722
1723            if (i4_mb_y == 2)
1724            {
1725                u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1726                u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1727
1728                if (ps_n_mb_ctxt->i4_mb_x == 0)
1729                {
1730                    u4_pad_wd += PAD_LEFT;
1731                    u4_pad_top = -PAD_LEFT;
1732                }
1733
1734                if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1735                {
1736                    u4_pad_wd += PAD_RIGHT;
1737                }
1738
1739                /* padding top luma */
1740                ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1741
1742                /* padding top chroma */
1743                ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1744            }
1745
1746            ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1747
1748            if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1749            {
1750                if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1751                {
1752                    /* Bottom Padding is done in one stretch for the entire width */
1753                    if (ps_proc->u4_disable_deblock_level != 1)
1754                    {
1755                        ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1756
1757                        ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1758
1759                        ps_n_mb_ctxt->i4_mb_x = 0;
1760                        ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1761                        ps_deblk->i4_mb_x = 0;
1762                        ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1763
1764                        /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1765                        ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1766
1767                        i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1768
1769                        j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1770
1771                        for (i = 0; i < j; i++)
1772                        {
1773                            for (col = 0; col < i4_n_mbs; col++)
1774                            {
1775                                ih264e_deblock_mb(ps_proc, ps_deblk);
1776
1777                                pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1778
1779                                ps_deblk->i4_mb_x++;
1780                                ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1781                                ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1782                                ps_n_mb_ctxt->i4_mb_x++;
1783                            }
1784                        }
1785
1786                        for (col = 0; col < i4_n_mb_process_count; col++)
1787                        {
1788                            ih264e_deblock_mb(ps_proc, ps_deblk);
1789
1790                            pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1791
1792                            ps_deblk->i4_mb_x++;
1793                            ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1794                            ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1795                            ps_n_mb_ctxt->i4_mb_x++;
1796                        }
1797
1798                        pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1799
1800                        pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1801
1802                        /* padding left luma */
1803                        ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1804
1805                        /* padding left chroma */
1806                        ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1807
1808                        pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1809                        pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1810
1811                        /* padding left luma */
1812                        ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1813
1814                        /* padding left chroma */
1815                        ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1816
1817                        pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1818
1819                        pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1820
1821                        /* padding right luma */
1822                        ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1823
1824                        /* padding right chroma */
1825                        ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1826
1827                        pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1828                        pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1829
1830                        /* padding right luma */
1831                        ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1832
1833                        /* padding right chroma */
1834                        ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1835
1836                    }
1837
1838                    /* In case height is less than 2 MBs pad top */
1839                    if (ps_proc->i4_ht_mbs <= 2)
1840                    {
1841                        UWORD8 *pu1_pad_top_src;
1842                        /* padding top luma */
1843                        pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1844                        ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1845
1846                        /* padding top chroma */
1847                        pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1848                        ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1849                    }
1850
1851                    /* padding bottom luma */
1852                    pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1853                    ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1854
1855                    /* padding bottom chroma */
1856                    pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1857                    ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1858                }
1859            }
1860        }
1861    }
1862
1863    return IH264E_SUCCESS;
1864}
1865
1866
1867/**
1868*******************************************************************************
1869*
1870* @brief This function performs luma & chroma core coding for a set of mb's.
1871*
1872* @par Description:
1873*  The mb to be coded is taken and is evaluated over a predefined set of modes
1874*  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1875*  is selected and using intra/inter prediction filters, prediction is carried out.
1876*  The deviation between src and pred signal constitutes error signal. This error
1877*  signal is transformed (hierarchical transform if necessary) and quantized. The
1878*  quantized residue is packed in to entropy buffer for entropy coding. This is
1879*  repeated for all the mb's enlisted under the job.
1880*
1881* @param[in] ps_proc
1882*  Process context corresponding to the job
1883*
1884* @returns  error status
1885*
1886* @remarks none
1887*
1888*******************************************************************************
1889*/
1890WORD32 ih264e_process(process_ctxt_t *ps_proc)
1891{
1892    /* error status */
1893    WORD32 error_status = IH264_SUCCESS;
1894
1895    /* codec context */
1896    codec_t *ps_codec = ps_proc->ps_codec;
1897
1898    /* cbp luma, chroma */
1899    UWORD32 u4_cbp_l, u4_cbp_c;
1900
1901    /* width in mbs */
1902    WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1903
1904    /* loop var */
1905    WORD32  i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1906
1907    /* valid modes */
1908    UWORD32 u4_valid_modes = 0;
1909
1910    /* gate threshold */
1911    WORD32 i4_gate_threshold = 0;
1912
1913    /* is intra */
1914    WORD32 luma_idx, chroma_idx, is_intra;
1915
1916    /* temp variables */
1917    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
1918
1919    /*
1920     * list of modes for evaluation
1921     * -------------------------------------------------------------------------
1922     * Note on enabling I4x4 and I16x16
1923     * At very low QP's the hadamard transform in I16x16 will push up the maximum
1924     * coeff value very high. CAVLC may not be able to represent the value and
1925     * hence the stream may not be decodable in some clips.
1926     * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
1927     */
1928    if (ps_proc->i4_slice_type == ISLICE)
1929    {
1930        if (ps_proc->u4_frame_qp > 10)
1931        {
1932            /* enable intra 16x16 */
1933            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1934
1935            /* enable intra 8x8 */
1936            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
1937        }
1938
1939        /* enable intra 4x4 */
1940        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1941        u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1942
1943    }
1944    else if (ps_proc->i4_slice_type == PSLICE)
1945    {
1946        if (ps_proc->u4_frame_qp > 10)
1947        {
1948            /* enable intra 16x16 */
1949            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1950        }
1951
1952        /* enable intra 4x4 */
1953        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1954        {
1955            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1956        }
1957        u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1958
1959        /* enable inter P16x16 */
1960        u4_valid_modes |= (1 << P16x16);
1961    }
1962    else if (ps_proc->i4_slice_type == BSLICE)
1963    {
1964        if (ps_proc->u4_frame_qp > 10)
1965        {
1966            /* enable intra 16x16 */
1967            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1968        }
1969
1970        /* enable intra 4x4 */
1971        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1972        {
1973            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1974        }
1975        u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1976
1977        /* enable inter B16x16 */
1978        u4_valid_modes |= (1 << B16x16);
1979    }
1980
1981
1982    /* init entropy */
1983    ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
1984    ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
1985    ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
1986
1987    /* compute recon when :
1988     *   1. current frame is to be used as a reference
1989     *   2. dump recon for bit stream sanity check
1990     */
1991    ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
1992                                ps_codec->s_cfg.u4_enable_recon;
1993
1994    /* Encode 'n' macroblocks,
1995     * 'n' being the number of mbs dictated by current proc ctxt */
1996    for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
1997    {
1998        /* since we have not yet found sad, we have not yet got min sad */
1999        /* we need to initialize these variables for each MB */
2000        /* TODO how to get the min sad into the codec */
2001        ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
2002        ps_proc->u4_min_sad_reached = 0;
2003
2004        /* mb analysis */
2005        {
2006            /* temp var */
2007            WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
2008
2009            /* force intra refresh ? */
2010            WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
2011                            (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
2012
2013            /* evaluate inter 16x16 modes */
2014            if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
2015            {
2016                /* compute nmb me */
2017                if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
2018                {
2019                    ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
2020                                                       i4_wd_mbs - ps_proc->i4_mb_x));
2021                }
2022
2023                /* set pointers to ME data appropriately for other modules to use */
2024                {
2025                    UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
2026
2027                    /* get the min sad condition for current mb */
2028                    ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2029                    ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2030
2031                    ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
2032                    ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
2033                    ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
2034
2035                    ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
2036                    ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
2037                    ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2038                    ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2039                    ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2040
2041                    /* get the best sub pel buffer */
2042                    ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2043                    ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2044                }
2045                ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2046            }
2047            else
2048            {
2049                /* Derive neighbor availability for the current macroblock */
2050                ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2051
2052                ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2053            }
2054
2055            /*
2056             * If air says intra, we need to force the following code path to evaluate intra
2057             * The easy way is just to say that the inter cost is too much
2058             */
2059            if (!i4_air_enable_inter)
2060            {
2061                ps_proc->u4_min_sad_reached = 0;
2062                ps_proc->i4_mb_cost = INT_MAX;
2063                ps_proc->i4_mb_distortion = INT_MAX;
2064            }
2065            else if (ps_proc->u4_mb_type == PSKIP)
2066            {
2067                goto UPDATE_MB_INFO;
2068            }
2069
2070            /* wait until the proc of [top + 1] mb is computed.
2071             * We wait till the proc dependencies are satisfied */
2072             if(ps_proc->i4_mb_y > 0)
2073             {
2074                /* proc map */
2075                UWORD8  *pu1_proc_map_top;
2076
2077                pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2078
2079                while (1)
2080                {
2081                    volatile UWORD8 *pu1_buf;
2082                    WORD32 idx = i4_mb_idx + 1;
2083
2084                    idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2085                    pu1_buf =  pu1_proc_map_top + idx;
2086                    if(*pu1_buf)
2087                        break;
2088                    ithread_yield();
2089                }
2090            }
2091
2092            /* If we already have the minimum sad, there is no point in searching for sad again */
2093            if (ps_proc->u4_min_sad_reached == 0)
2094            {
2095                /* intra gating in inter slices */
2096                /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2097                if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2098                {
2099                    /* distortion of neighboring blocks */
2100                    WORD32 i4_distortion[4];
2101
2102                    i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2103
2104                    i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2105
2106                    i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2107
2108                    i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2109
2110                    i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2111
2112                }
2113
2114
2115                /* If we are going to force intra we need to evaluate intra irrespective of gating */
2116                if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2117                {
2118                    /* evaluate intra 4x4 modes */
2119                    if (u4_valid_modes & (1 << I4x4))
2120                    {
2121                        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2122                        {
2123                            ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2124                        }
2125                        else
2126                        {
2127                            ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2128                        }
2129                    }
2130
2131                    /* evaluate intra 16x16 modes */
2132                    if (u4_valid_modes & (1 << I16x16))
2133                    {
2134                        ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2135                    }
2136
2137                    /* evaluate intra 8x8 modes */
2138                    if (u4_valid_modes & (1 << I8x8))
2139                    {
2140                        ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2141                    }
2142
2143                }
2144        }
2145     }
2146
2147        /* is intra */
2148        if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2149        {
2150            luma_idx = ps_proc->u4_mb_type;
2151            chroma_idx = 0;
2152            is_intra = 1;
2153
2154            /* evaluate chroma blocks for intra */
2155            ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2156        }
2157        else
2158        {
2159            luma_idx = 3;
2160            chroma_idx = 1;
2161            is_intra = 0;
2162        }
2163        ps_proc->u4_is_intra = is_intra;
2164        ps_proc->ps_pu->b1_intra_flag = is_intra;
2165
2166        /* redo MV pred of neighbors in the case intra mb */
2167        /* TODO : currently called unconditionally, needs to be called only in the case of intra
2168         * to modify neighbors */
2169        if (ps_proc->i4_slice_type != ISLICE)
2170        {
2171            ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2172        }
2173
2174        /* Perform luma mb core coding */
2175        u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2176
2177        /* Perform luma mb core coding */
2178        u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2179
2180        /* coded block pattern */
2181        ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2182
2183        if (!ps_proc->u4_is_intra)
2184        {
2185            if (ps_proc->i4_slice_type == BSLICE)
2186            {
2187                if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2188                {
2189                    ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2190                }
2191            }
2192            else if(!ps_proc->u4_cbp)
2193            {
2194                if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2195                {
2196                    ps_proc->u4_mb_type = PSKIP;
2197                }
2198            }
2199        }
2200
2201UPDATE_MB_INFO:
2202
2203        /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2204        ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2205
2206        /**********************************************************************/
2207        /* if disable deblock level is '0' this implies enable deblocking for */
2208        /* all edges of all macroblocks with out any restrictions             */
2209        /*                                                                    */
2210        /* if disable deblock level is '1' this implies disable deblocking for*/
2211        /* all edges of all macroblocks with out any restrictions             */
2212        /*                                                                    */
2213        /* if disable deblock level is '2' this implies enable deblocking for */
2214        /* all edges of all macroblocks except edges overlapping with slice   */
2215        /* boundaries. This option is not currently supported by the encoder  */
2216        /* hence the slice map should be of no significance to perform debloc */
2217        /* king                                                               */
2218        /**********************************************************************/
2219
2220        if (ps_proc->u4_compute_recon)
2221        {
2222            /* deblk context */
2223            /* src pointers */
2224            UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2225            UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2226
2227            /* src indices */
2228            UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2229            UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2230
2231            /* compute blocking strength */
2232            if (ps_proc->u4_disable_deblock_level != 1)
2233            {
2234                ih264e_compute_bs(ps_proc);
2235            }
2236
2237            /* nmb deblocking and hpel and padding */
2238            ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2239                                                  pu1_cur_pic_chroma, i4_mb_x,
2240                                                  i4_mb_y);
2241        }
2242
2243        /* update the context after for coding next mb */
2244        error_status |= ih264e_update_proc_ctxt(ps_proc);
2245
2246        /* Once the last row is processed, mark the buffer status appropriately */
2247        if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2248        {
2249            /* Pointer to current picture buffer structure */
2250            pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2251
2252            /* Pointer to current picture's mv buffer structure */
2253            mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2254
2255            /**********************************************************************/
2256            /* if disable deblock level is '0' this implies enable deblocking for */
2257            /* all edges of all macroblocks with out any restrictions             */
2258            /*                                                                    */
2259            /* if disable deblock level is '1' this implies disable deblocking for*/
2260            /* all edges of all macroblocks with out any restrictions             */
2261            /*                                                                    */
2262            /* if disable deblock level is '2' this implies enable deblocking for */
2263            /* all edges of all macroblocks except edges overlapping with slice   */
2264            /* boundaries. This option is not currently supported by the encoder  */
2265            /* hence the slice map should be of no significance to perform debloc */
2266            /* king                                                               */
2267            /**********************************************************************/
2268            error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2269
2270            error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2271
2272            if (ps_codec->s_cfg.u4_enable_recon)
2273            {
2274                /* pic cnt */
2275                ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2276
2277                /* rec buffers */
2278                ps_codec->as_rec_buf[ctxt_sel].s_pic_buf  = *ps_proc->ps_cur_pic;
2279
2280                /* is last? */
2281                ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2282
2283                /* frame time stamp */
2284                ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2285                ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2286            }
2287
2288        }
2289    }
2290
2291    DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2292
2293    return error_status;
2294}
2295
2296/**
2297*******************************************************************************
2298*
2299* @brief
2300*  Function to update rc context after encoding
2301*
2302* @par   Description
2303*  This function updates the rate control context after the frame is encoded.
2304*  Number of bits consumed by the current frame, frame distortion, frame cost,
2305*  number of intra/inter mb's, ... are passed on to rate control context for
2306*  updating the rc model.
2307*
2308* @param[in] ps_codec
2309*  Handle to codec context
2310*
2311* @param[in] ctxt_sel
2312*  frame context selector
2313*
2314* @param[in] pic_cnt
2315*  pic count
2316*
2317* @returns i4_stuffing_byte
2318*  number of stuffing bytes (if necessary)
2319*
2320* @remarks
2321*
2322*******************************************************************************
2323*/
2324WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2325{
2326    /* proc set base idx */
2327    WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2328
2329    /* proc ctxt */
2330    process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2331
2332    /* frame qp */
2333    UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2334
2335    /* cbr rc return status */
2336    WORD32 i4_stuffing_byte = 0;
2337
2338    /* current frame stats */
2339    frame_info_t s_frame_info;
2340    picture_type_e rc_pic_type;
2341
2342    /* temp var */
2343    WORD32 i, j;
2344
2345    /********************************************************************/
2346    /*                            BEGIN INIT                            */
2347    /********************************************************************/
2348
2349    /* init frame info */
2350    irc_init_frame_info(&s_frame_info);
2351
2352    /* get frame info */
2353    for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2354    {
2355        /*****************************************************************/
2356        /* One frame can be encoded by max of u4_num_cores threads       */
2357        /* Accumulating the num mbs, sad, qp and intra_mb_cost from      */
2358        /* u4_num_cores threads                                          */
2359        /*****************************************************************/
2360        for (j = 0; j< MAX_MB_TYPE; j++)
2361        {
2362            s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2363
2364            s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2365
2366            s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2367        }
2368
2369        s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2370
2371        s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2372
2373        /*****************************************************************/
2374        /* gather number of residue and header bits consumed by the frame*/
2375        /*****************************************************************/
2376        ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2377    }
2378
2379    /* get pic type */
2380    switch (ps_codec->pic_type)
2381    {
2382        case PIC_I:
2383        case PIC_IDR:
2384            rc_pic_type = I_PIC;
2385            break;
2386        case PIC_P:
2387            rc_pic_type = P_PIC;
2388            break;
2389        case PIC_B:
2390            rc_pic_type = B_PIC;
2391            break;
2392        default:
2393            assert(0);
2394            break;
2395    }
2396
2397    /* update rc lib with current frame stats */
2398    i4_stuffing_byte =  ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2399                                          &(s_frame_info),
2400                                          ps_codec->s_rate_control.pps_pd_frm_rate,
2401                                          ps_codec->s_rate_control.pps_time_stamp,
2402                                          ps_codec->s_rate_control.pps_frame_time,
2403                                          (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2404                                          &rc_pic_type,
2405                                          i4_is_first_frm,
2406                                          &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2407                                          u1_frame_qp,
2408                                          &ps_codec->s_rate_control.num_intra_in_prev_frame,
2409                                          &ps_codec->s_rate_control.i4_avg_activity);
2410    return i4_stuffing_byte;
2411}
2412
2413/**
2414*******************************************************************************
2415*
2416* @brief
2417*  entry point of a spawned encoder thread
2418*
2419* @par Description:
2420*  The encoder thread dequeues a proc/entropy job from the encoder queue and
2421*  calls necessary routines.
2422*
2423* @param[in] pv_proc
2424*  Process context corresponding to the thread
2425*
2426* @returns  error status
2427*
2428* @remarks
2429*
2430*******************************************************************************
2431*/
2432WORD32 ih264e_process_thread(void *pv_proc)
2433{
2434    /* error status */
2435    IH264_ERROR_T ret = IH264_SUCCESS;
2436    WORD32 error_status = IH264_SUCCESS;
2437
2438    /* proc ctxt */
2439    process_ctxt_t *ps_proc = pv_proc;
2440
2441    /* codec ctxt */
2442    codec_t *ps_codec = ps_proc->ps_codec;
2443
2444    /* structure to represent a processing job entry */
2445    job_t s_job;
2446
2447    /* blocking call : entropy dequeue is non-blocking till all
2448     * the proc jobs are processed */
2449    WORD32 is_blocking = 0;
2450
2451    /* set affinity */
2452    ithread_set_affinity(ps_proc->i4_id);
2453
2454    while(1)
2455    {
2456        /* dequeue a job from the entropy queue */
2457        {
2458            int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2459
2460            /* codec context selector */
2461            WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2462
2463            volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2464
2465            /* have the lock */
2466            if (error == 0)
2467            {
2468                if (*pu4_buf == 0)
2469                {
2470                    /* no entropy threads are active, try dequeuing a job from the entropy queue */
2471                    ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2472                    if (IH264_SUCCESS == ret)
2473                    {
2474                        *pu4_buf = 1;
2475                        ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2476                        goto WORKER;
2477                    }
2478                    else if(is_blocking)
2479                    {
2480                        ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2481                        break;
2482                    }
2483                }
2484                ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2485            }
2486        }
2487
2488        /* dequeue a job from the process queue */
2489        ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2490        if (IH264_SUCCESS != ret)
2491        {
2492            if(ps_proc->i4_id)
2493                break;
2494            else
2495            {
2496                is_blocking = 1;
2497                continue;
2498            }
2499        }
2500
2501WORKER:
2502        /* choose appropriate proc context based on proc_base_idx */
2503        ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2504
2505        switch (s_job.i4_cmd)
2506        {
2507            case CMD_PROCESS:
2508                ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2509                ps_proc->i4_mb_x = s_job.i2_mb_x;
2510                ps_proc->i4_mb_y = s_job.i2_mb_y;
2511
2512                /* init process context */
2513                ih264e_init_proc_ctxt(ps_proc);
2514
2515                /* core code all mbs enlisted under the current job */
2516                error_status |= ih264e_process(ps_proc);
2517                break;
2518
2519            case CMD_ENTROPY:
2520                ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2521                ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2522                ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2523
2524                /* init entropy */
2525                ih264e_init_entropy_ctxt(ps_proc);
2526
2527                /* entropy code all mbs enlisted under the current job */
2528                error_status |= ih264e_entropy(ps_proc);
2529                break;
2530
2531            default:
2532                error_status |= IH264_FAIL;
2533                break;
2534        }
2535    }
2536
2537    /* send error code */
2538    ps_proc->i4_error_code = error_status;
2539    return ret;
2540}
2541