ih264e_process.c revision cb6a43532c0b863d46c82feb25b10dc8732a34f9
1/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20
21/**
22*******************************************************************************
23* @file
24*  ih264e_process.c
25*
26* @brief
27*  Contains functions for codec thread
28*
29* @author
30*  Harish
31*
32* @par List of Functions:
33* - ih264e_generate_sps_pps()
34* - ih264e_init_entropy_ctxt()
35* - ih264e_entropy()
36* - ih264e_pack_header_data()
37* - ih264e_update_proc_ctxt()
38* - ih264e_init_proc_ctxt()
39* - ih264e_pad_recon_buffer()
40* - ih264e_dblk_pad_hpel_processing_n_mbs()
41* - ih264e_process()
42* - ih264e_set_rc_pic_params()
43* - ih264e_update_rc_post_enc()
44* - ih264e_process_thread()
45*
46* @remarks
47*  None
48*
49*******************************************************************************
50*/
51
52/*****************************************************************************/
53/* File Includes                                                             */
54/*****************************************************************************/
55
56/* System include files */
57#include <stdio.h>
58#include <stddef.h>
59#include <stdlib.h>
60#include <string.h>
61#include <limits.h>
62#include <assert.h>
63
64/* User include files */
65#include "ih264_typedefs.h"
66#include "iv2.h"
67#include "ive2.h"
68#include "ih264_defs.h"
69#include "ih264_debug.h"
70#include "ime_distortion_metrics.h"
71#include "ime_defs.h"
72#include "ime_structs.h"
73#include "ih264_error.h"
74#include "ih264_structs.h"
75#include "ih264_trans_quant_itrans_iquant.h"
76#include "ih264_inter_pred_filters.h"
77#include "ih264_mem_fns.h"
78#include "ih264_padding.h"
79#include "ih264_intra_pred_filters.h"
80#include "ih264_deblk_edge_filters.h"
81#include "ih264_cabac_tables.h"
82#include "ih264_platform_macros.h"
83#include "ih264_macros.h"
84#include "ih264_buf_mgr.h"
85#include "ih264e_error.h"
86#include "ih264e_bitstream.h"
87#include "ih264_common_tables.h"
88#include "ih264_list.h"
89#include "ih264e_defs.h"
90#include "irc_cntrl_param.h"
91#include "irc_frame_info_collector.h"
92#include "ih264e_rate_control.h"
93#include "ih264e_cabac_structs.h"
94#include "ih264e_structs.h"
95#include "ih264e_cabac.h"
96#include "ih264e_process.h"
97#include "ithread.h"
98#include "ih264e_intra_modes_eval.h"
99#include "ih264e_encode_header.h"
100#include "ih264e_globals.h"
101#include "ih264e_config.h"
102#include "ih264e_trace.h"
103#include "ih264e_statistics.h"
104#include "ih264_cavlc_tables.h"
105#include "ih264e_cavlc.h"
106#include "ih264e_deblk.h"
107#include "ih264e_me.h"
108#include "ih264e_debug.h"
109#include "ih264e_master.h"
110#include "ih264e_utils.h"
111#include "irc_mem_req_and_acq.h"
112#include "irc_rate_control_api.h"
113#include "ih264e_platform_macros.h"
114#include "ime_statistics.h"
115
116
117/*****************************************************************************/
118/* Function Definitions                                                      */
119/*****************************************************************************/
120
121/**
122******************************************************************************
123*
124*  @brief This function generates sps, pps set on request
125*
126*  @par   Description
127*  When the encoder is set in header generation mode, the following function
128*  is called. This generates sps and pps headers and returns the control back
129*  to caller.
130*
131*  @param[in]    ps_codec
132*  pointer to codec context
133*
134*  @return      success or failure error code
135*
136******************************************************************************
137*/
138IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
139{
140    /* choose between ping-pong process buffer set */
141    WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
142
143    /* entropy ctxt */
144    entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
145
146    /* Bitstream structure */
147    bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
148
149    /* sps */
150    sps_t *ps_sps = NULL;
151
152    /* pps */
153    pps_t *ps_pps = NULL;
154
155    /* output buff */
156    out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
157
158
159    /********************************************************************/
160    /*      initialize the bit stream buffer                            */
161    /********************************************************************/
162    ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
163
164    /********************************************************************/
165    /*                    BEGIN HEADER GENERATION                       */
166    /********************************************************************/
167    /*ps_codec->i4_pps_id ++;*/
168    ps_codec->i4_pps_id %= MAX_PPS_CNT;
169
170    /*ps_codec->i4_sps_id ++;*/
171    ps_codec->i4_sps_id %= MAX_SPS_CNT;
172
173    /* populate sps header */
174    ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
175    ih264e_populate_sps(ps_codec, ps_sps);
176
177    /* populate pps header */
178    ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
179    ih264e_populate_pps(ps_codec, ps_pps);
180
181    ps_entropy->i4_error_code = IH264E_SUCCESS;
182
183    /* generate sps */
184    ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
185
186    /* generate pps */
187    ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
188
189    /* queue output buffer */
190    ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
191
192    return ps_entropy->i4_error_code;
193}
194
195/**
196*******************************************************************************
197*
198* @brief   initialize entropy context.
199*
200* @par Description:
201*  Before invoking the call to perform to entropy coding the entropy context
202*  associated with the job needs to be initialized. This involves the start
203*  mb address, end mb address, slice index and the pointer to location at
204*  which the mb residue info and mb header info are packed.
205*
206* @param[in] ps_proc
207*  Pointer to the current process context
208*
209* @returns error status
210*
211* @remarks none
212*
213*******************************************************************************
214*/
215IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
216{
217    /* codec context */
218    codec_t *ps_codec = ps_proc->ps_codec;
219
220    /* entropy ctxt */
221    entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
222
223    /* start address */
224    ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
225
226    /* end address */
227    ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
228
229    /* slice index */
230    ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
231
232    /* sof */
233    /* @ start of frame or start of a new slice, set sof flag */
234    if (ps_entropy->i4_mb_start_add == 0)
235    {
236        ps_entropy->i4_sof = 1;
237    }
238
239    if (ps_entropy->i4_mb_x == 0)
240    {
241        /* packed mb coeff data */
242        ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
243                        ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
244
245        /* packed mb header data */
246        ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
247                        ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
248    }
249
250    return IH264E_SUCCESS;
251}
252
253/**
254*******************************************************************************
255*
256* @brief entry point for entropy coding
257*
258* @par Description
259*  This function calls lower level functions to perform entropy coding for a
260*  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
261*  back the control, updates the ctxt and calls lower level functions again.
262*  This process is repeated till all the rows or group of mb's (which ever is
263*  minimum) are coded
264*
265* @param[in] ps_proc
266*  process context
267*
268* @returns  error status
269*
270* @remarks
271*
272*******************************************************************************
273*/
274
275IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
276{
277    /* codec context */
278    codec_t *ps_codec = ps_proc->ps_codec;
279
280    /* entropy context */
281    entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
282
283    /* cabac context */
284    cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
285
286    /* sps */
287    sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
288
289    /* pps */
290    pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
291
292    /* slice header */
293    slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
294
295    /* slice type */
296    WORD32 i4_slice_type = ps_proc->i4_slice_type;
297
298    /* Bitstream structure */
299    bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
300
301    /* output buff */
302    out_buf_t s_out_buf;
303
304    /* proc map */
305    UWORD8  *pu1_proc_map;
306
307    /* entropy map */
308    UWORD8  *pu1_entropy_map_curr;
309
310    /* proc base idx */
311    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
312
313    /* temp var */
314    WORD32 i4_wd_mbs, i4_ht_mbs;
315    UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
316    WORD32 bitstream_start_offset, bitstream_end_offset;
317    /********************************************************************/
318    /*                            BEGIN INIT                            */
319    /********************************************************************/
320
321    /* entropy encode start address */
322    u4_mb_idx = ps_entropy->i4_mb_start_add;
323
324    /* entropy encode end address */
325    u4_mb_end_idx = ps_entropy->i4_mb_end_add;
326
327    /* width in mbs */
328    i4_wd_mbs = ps_entropy->i4_wd_mbs;
329
330    /* height in mbs */
331    i4_ht_mbs = ps_entropy->i4_ht_mbs;
332
333    /* total mb cnt */
334    u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
335
336    /* proc map */
337    pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
338
339    /* entropy map */
340    pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
341
342    /********************************************************************/
343    /* @ start of frame / slice,                                        */
344    /*      initialize the output buffer,                               */
345    /*      initialize the bit stream buffer,                           */
346    /*      check if sps and pps headers have to be generated,          */
347    /*      populate and generate slice header                          */
348    /********************************************************************/
349    if (ps_entropy->i4_sof)
350    {
351        /********************************************************************/
352        /*      initialize the output buffer                                */
353        /********************************************************************/
354        s_out_buf = ps_codec->as_out_buf[ctxt_sel];
355
356        /* is last frame to encode */
357        s_out_buf.u4_is_last = ps_entropy->u4_is_last;
358
359        /* frame idx */
360        s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
361        s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
362
363        /********************************************************************/
364        /*      initialize the bit stream buffer                            */
365        /********************************************************************/
366        ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
367
368        /********************************************************************/
369        /*                    BEGIN HEADER GENERATION                       */
370        /********************************************************************/
371        if (1 == ps_entropy->i4_gen_header)
372        {
373            /* generate sps */
374            ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
375
376            /* generate pps */
377            ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
378
379            /* reset i4_gen_header */
380            ps_entropy->i4_gen_header = 0;
381        }
382
383        /* populate slice header */
384        ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
385
386        /* generate slice header */
387        ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
388                                                                  ps_pps, ps_sps);
389
390        /* once start of frame / slice is done, you can reset it */
391        /* it is the responsibility of the caller to set this flag */
392        ps_entropy->i4_sof = 0;
393
394        if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
395        {
396            BITSTREAM_BYTE_ALIGN(ps_bitstrm);
397            BITSTREAM_FLUSH(ps_bitstrm);
398            ih264e_init_cabac_ctxt(ps_entropy);
399        }
400    }
401
402    /* begin entropy coding for the mb set */
403    while (u4_mb_idx < u4_mb_end_idx)
404    {
405        /* init ptrs/indices */
406        if (ps_entropy->i4_mb_x == i4_wd_mbs)
407        {
408            ps_entropy->i4_mb_y++;
409            ps_entropy->i4_mb_x = 0;
410
411            /* packed mb coeff data */
412            ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
413                            ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
414
415            /* packed mb header data */
416            ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
417                            ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
418
419            /* proc map */
420            pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
421
422            /* entropy map */
423            pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
424        }
425
426        DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
427        ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
428        ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
429
430        /* wait until the curr mb is core coded */
431        /* The wait for curr mb to be core coded is essential when entropy is launched
432         * as a separate job
433         */
434        while (1)
435        {
436            volatile UWORD8 *pu1_buf1;
437            WORD32 idx = ps_entropy->i4_mb_x;
438
439            pu1_buf1 = pu1_proc_map + idx;
440            if (*pu1_buf1)
441                break;
442            ithread_yield();
443        }
444
445
446        /* write mb layer */
447        ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
448        /* Starting bitstream offset for header in bits */
449        bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
450
451        /* set entropy map */
452        pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
453
454        u4_mb_idx++;
455        ps_entropy->i4_mb_x++;
456        /* check for eof */
457        if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
458        {
459            if (ps_entropy->i4_mb_x < i4_wd_mbs)
460            {
461                ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
462            }
463        }
464
465        if (ps_entropy->i4_mb_x == i4_wd_mbs)
466        {
467            /* if slices are enabled */
468            if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
469            {
470                /* current slice index */
471                WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
472
473                /* slice map */
474                UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
475
476                /* No need to open a slice at end of frame. The current slice can be closed at the time
477                 * of signaling eof flag.
478                 */
479                if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
480                                                != pu1_slice_idx[u4_mb_idx]))
481                {
482                    if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
483                    { /* mb skip run */
484                        if ((i4_slice_type != ISLICE)
485                                        && *ps_entropy->pi4_mb_skip_run)
486                        {
487                            if (*ps_entropy->pi4_mb_skip_run)
488                            {
489                            PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
490                                *ps_entropy->pi4_mb_skip_run = 0;
491                            }
492                        }
493                        /* put rbsp trailing bits for the previous slice */
494                                 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
495                    }
496                    else
497                    {
498                        ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
499                    }
500
501                    /* update slice header pointer */
502                    i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
503                    ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
504                    ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
505
506                    /* populate slice header */
507                    ps_entropy->i4_mb_start_add = u4_mb_idx;
508                    ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
509                                                 ps_sps);
510
511                    /* generate slice header */
512                    ps_entropy->i4_error_code |= ih264e_generate_slice_header(
513                                    ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
514                    if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
515                    {
516                        BITSTREAM_BYTE_ALIGN(ps_bitstrm);
517                        BITSTREAM_FLUSH(ps_bitstrm);
518                        ih264e_init_cabac_ctxt(ps_entropy);
519                    }
520                }
521                else
522                {
523                    if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
524                                    && u4_mb_idx != u4_mb_cnt)
525                    {
526                        ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
527                    }
528                }
529            }
530            /* Dont execute any further instructions until store synchronization took place */
531            DATA_SYNC();
532        }
533
534        /* Ending bitstream offset for header in bits */
535        bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
536        ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
537                        bitstream_end_offset - bitstream_start_offset;
538    }
539
540    /* check for eof */
541    if (u4_mb_idx == u4_mb_cnt)
542    {
543        /* set end of frame flag */
544        ps_entropy->i4_eof = 1;
545    }
546    else
547    {
548        if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
549                        && ps_codec->s_cfg.e_slice_mode
550                                        != IVE_SLICE_MODE_BLOCKS)
551        {
552            ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
553        }
554    }
555
556    if (ps_entropy->i4_eof)
557    {
558        if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
559        {
560            /* mb skip run */
561            if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
562            {
563                if (*ps_entropy->pi4_mb_skip_run)
564                {
565                    PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
566                                 ps_entropy->i4_error_code, "mb skip run");
567                    *ps_entropy->pi4_mb_skip_run = 0;
568                }
569            }
570            /* put rbsp trailing bits */
571             ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
572        }
573        else
574        {
575            ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
576        }
577
578        /* update current frame stats to rc library */
579        {
580            /* number of bytes to stuff */
581            WORD32 i4_stuff_bytes;
582
583            /* update */
584            i4_stuff_bytes = ih264e_update_rc_post_enc(
585                            ps_codec, ctxt_sel,
586                            (ps_proc->ps_codec->i4_poc == 0));
587
588            /* cbr rc - house keeping */
589            if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
590            {
591                ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
592            }
593            else if (i4_stuff_bytes)
594            {
595                /* add filler nal units */
596                ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
597            }
598        }
599
600        /*
601         *Frame number is to be incremented only if the current frame is a
602         * reference frame. After each successful frame encode, we increment
603         * frame number by 1
604         */
605        if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
606                        && ps_codec->u4_is_curr_frm_ref)
607        {
608            ps_codec->i4_frame_num++;
609        }
610        /********************************************************************/
611        /*      signal the output                                           */
612        /********************************************************************/
613        ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
614                        ps_entropy->ps_bitstrm->u4_strm_buf_offset;
615
616        DEBUG("entropy status %x", ps_entropy->i4_error_code);
617    }
618
619    /* allow threads to dequeue entropy jobs */
620    ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
621
622    return ps_entropy->i4_error_code;
623}
624
625/**
626*******************************************************************************
627*
628* @brief Packs header information of a mb in to a buffer
629*
630* @par Description:
631*  After the deciding the mode info of a macroblock, the syntax elements
632*  associated with the mb are packed and stored. The entropy thread unpacks
633*  this buffer and generates the end bit stream.
634*
635* @param[in] ps_proc
636*  Pointer to the current process context
637*
638* @returns error status
639*
640* @remarks none
641*
642*******************************************************************************
643*/
644IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
645{
646    /* curr mb type */
647    UWORD32 u4_mb_type = ps_proc->u4_mb_type;
648
649    /* pack mb syntax layer of curr mb (used for entropy coding) */
650    if (u4_mb_type == I4x4)
651    {
652        /* pointer to mb header storage space */
653        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
654
655        /* temp var */
656        WORD32 i4, byte;
657
658        /* mb type plus mode */
659        *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
660
661        /* cbp */
662        *pu1_ptr++ = ps_proc->u4_cbp;
663
664        /* mb qp delta */
665        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
666
667        /* sub mb modes */
668        for (i4 = 0; i4 < 16; i4 ++)
669        {
670            byte = 0;
671
672            if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
673                            ps_proc->au1_intra_luma_mb_4x4_modes[i4])
674            {
675                byte |= 1;
676            }
677            else
678            {
679
680                if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
681                                ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
682                {
683                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
684                }
685                else
686                {
687                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
688                }
689            }
690
691            i4++;
692
693            if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
694                            ps_proc->au1_intra_luma_mb_4x4_modes[i4])
695            {
696                byte |= 16;
697            }
698            else
699            {
700
701                if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
702                                ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
703                {
704                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
705                }
706                else
707                {
708                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
709                }
710            }
711
712            *pu1_ptr++ = byte;
713        }
714
715        /* end of mb layer */
716        ps_proc->pv_mb_header_data = pu1_ptr;
717    }
718    else if (u4_mb_type == I16x16)
719    {
720        /* pointer to mb header storage space */
721        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
722
723        /* mb type plus mode */
724        *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
725
726        /* cbp */
727        *pu1_ptr++ = ps_proc->u4_cbp;
728
729        /* mb qp delta */
730        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
731
732        /* end of mb layer */
733        ps_proc->pv_mb_header_data = pu1_ptr;
734    }
735    else if (u4_mb_type == P16x16)
736    {
737        /* pointer to mb header storage space */
738        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
739
740        WORD16 *i2_mv_ptr;
741
742        /* mb type plus mode */
743        *pu1_ptr++ = u4_mb_type;
744
745        /* cbp */
746        *pu1_ptr++ = ps_proc->u4_cbp;
747
748        /* mb qp delta */
749        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
750
751        i2_mv_ptr = (WORD16 *)pu1_ptr;
752
753        *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
754
755        *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
756
757        /* end of mb layer */
758        ps_proc->pv_mb_header_data = i2_mv_ptr;
759    }
760    else if (u4_mb_type == PSKIP)
761    {
762        /* pointer to mb header storage space */
763        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
764
765        /* mb type plus mode */
766        *pu1_ptr++ = u4_mb_type;
767
768        /* end of mb layer */
769        ps_proc->pv_mb_header_data = pu1_ptr;
770    }
771    else if(u4_mb_type == B16x16)
772    {
773
774        /* pointer to mb header storage space */
775        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
776
777        WORD16 *i2_mv_ptr;
778
779        UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
780
781        /* mb type plus mode */
782        *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
783
784        /* cbp */
785        *pu1_ptr++ = ps_proc->u4_cbp;
786
787        /* mb qp delta */
788        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
789
790        /* l0 & l1 me data */
791        i2_mv_ptr = (WORD16 *)pu1_ptr;
792
793        if (u4_pred_mode != PRED_L1)
794        {
795            *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
796                            - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
797
798            *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
799                            - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
800        }
801        if (u4_pred_mode != PRED_L0)
802        {
803            *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
804                            - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
805
806            *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
807                            - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
808        }
809
810        /* end of mb layer */
811        ps_proc->pv_mb_header_data = i2_mv_ptr;
812
813    }
814    else if(u4_mb_type == BDIRECT)
815    {
816        /* pointer to mb header storage space */
817        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
818
819        /* mb type plus mode */
820        *pu1_ptr++ = u4_mb_type;
821
822        /* cbp */
823        *pu1_ptr++ = ps_proc->u4_cbp;
824
825        /* mb qp delta */
826        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
827
828        ps_proc->pv_mb_header_data = pu1_ptr;
829
830    }
831    else if(u4_mb_type == BSKIP)
832    {
833        UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
834
835        /* pointer to mb header storage space */
836        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
837
838        /* mb type plus mode */
839        *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
840
841        /* end of mb layer */
842        ps_proc->pv_mb_header_data = pu1_ptr;
843    }
844
845    return IH264E_SUCCESS;
846}
847
848/**
849*******************************************************************************
850*
851* @brief   update process context after encoding an mb. This involves preserving
852* the current mb information for later use, initialize the proc ctxt elements to
853* encode next mb.
854*
855* @par Description:
856*  This function performs house keeping tasks after encoding an mb.
857*  After encoding an mb, various elements of the process context needs to be
858*  updated to encode the next mb. For instance, the source, recon and reference
859*  pointers, mb indices have to be adjusted to the next mb. The slice index of
860*  the current mb needs to be updated. If mb qp modulation is enabled, then if
861*  the qp changes the quant param structure needs to be updated. Also to encoding
862*  the next mb, the current mb info is used as part of mode prediction or mv
863*  prediction. Hence the current mb info has to preserved at top/top left/left
864*  locations.
865*
866* @param[in] ps_proc
867*  Pointer to the current process context
868*
869* @returns none
870*
871* @remarks none
872*
873*******************************************************************************
874*/
875WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
876{
877    /* error status */
878    WORD32 error_status = IH264_SUCCESS;
879
880    /* codec context */
881    codec_t *ps_codec = ps_proc->ps_codec;
882
883    /* curr mb indices */
884    WORD32 i4_mb_x = ps_proc->i4_mb_x;
885    WORD32 i4_mb_y = ps_proc->i4_mb_y;
886
887    /* mb syntax elements of neighbors */
888    mb_info_t *ps_left_syn =  &ps_proc->s_left_mb_syntax_ele;
889    mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
890    mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
891
892    /* curr mb type */
893    UWORD32 u4_mb_type = ps_proc->u4_mb_type;
894
895    /* curr mb type */
896    UWORD32 u4_is_intra = ps_proc->u4_is_intra;
897
898    /* width in mbs */
899    WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
900
901    /*height in mbs*/
902    WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
903
904    /* proc map */
905    UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
906
907    /* deblk context */
908    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
909
910    /* deblk bs context */
911    bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
912
913    /* top row motion vector info */
914    enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
915
916    /* top left mb motion vector */
917    enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
918
919    /* left mb motion vector */
920    enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
921
922    /* sub mb modes */
923    UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
924
925    /*************************************************************/
926    /* During MV prediction, when top right mb is not available, */
927    /* top left mb info. is used for prediction. Hence the curr  */
928    /* top, which will be top left for the next mb needs to be   */
929    /* preserved before updating it with curr mb info.           */
930    /*************************************************************/
931
932    /* mb type, mb class, csbp */
933    *ps_top_left_syn = *ps_top_syn;
934
935    if (ps_proc->i4_slice_type != ISLICE)
936    {
937        /*****************************************/
938        /* update top left with top info results */
939        /*****************************************/
940        /* mv */
941        *ps_top_left_mb_pu = *ps_top_row_pu;
942    }
943
944    /*************************************************/
945    /* update top and left with curr mb info results */
946    /*************************************************/
947
948    /* mb type */
949    ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
950
951    /* mb class */
952    ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
953
954    /* csbp */
955    ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
956
957    /* distortion */
958    ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
959
960    if (u4_is_intra)
961    {
962        /* mb / sub mb modes */
963        if (I16x16 == u4_mb_type)
964        {
965            pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
966        }
967        else if (I4x4 == u4_mb_type)
968        {
969            ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
970            ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
971        }
972        else if (I8x8 == u4_mb_type)
973        {
974            memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
975            memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
976        }
977
978        if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
979        {
980            /* mv */
981            *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
982        }
983
984        *ps_proc->pu4_mb_pu_cnt = 1;
985    }
986    else
987    {
988        /* mv */
989        *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
990    }
991
992    /*
993     * Mark that the MB has been coded intra
994     * So that future AIRs can skip it
995     */
996    ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
997
998    /**************************************************/
999    /* pack mb header info. for entropy coding        */
1000    /**************************************************/
1001    ih264e_pack_header_data(ps_proc);
1002
1003    /* update previous mb qp */
1004    ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1005
1006    /* store qp */
1007    ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1008
1009    /*
1010     * We need to sync the cache to make sure that the nmv content of proc
1011     * is updated to cache properly
1012     */
1013    DATA_SYNC();
1014
1015    /* Just before finishing the row, enqueue the job in to entropy queue.
1016     * The master thread depending on its convenience shall dequeue it and
1017     * performs entropy.
1018     *
1019     * WARN !! Placing this block post proc map update can cause queuing of
1020     * entropy jobs in out of order.
1021     */
1022    if (i4_mb_x == i4_wd_mbs - 1)
1023    {
1024        /* job structures */
1025        job_t s_job;
1026
1027        /* job class */
1028        s_job.i4_cmd = CMD_ENTROPY;
1029
1030        /* number of mbs to be processed in the current job */
1031        s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1032
1033        /* job start index x */
1034        s_job.i2_mb_x = 0;
1035
1036        /* job start index y */
1037        s_job.i2_mb_y = ps_proc->i4_mb_y;
1038
1039        /* proc base idx */
1040        s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt & 1) ? (MAX_PROCESS_CTXT / 2): 0 ;
1041
1042        /* queue the job */
1043        error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1044
1045        if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1046            ih264_list_terminate(ps_codec->pv_entropy_jobq);
1047    }
1048
1049    /* update proc map */
1050    pu1_proc_map[i4_mb_x] = 1;
1051
1052    /**************************************************/
1053    /* update proc ctxt elements for encoding next mb */
1054    /**************************************************/
1055    /* update indices */
1056    i4_mb_x ++;
1057    ps_proc->i4_mb_x = i4_mb_x;
1058
1059    if (ps_proc->i4_mb_x == i4_wd_mbs)
1060    {
1061        ps_proc->i4_mb_y++;
1062        ps_proc->i4_mb_x = 0;
1063    }
1064
1065    /* update slice index */
1066    ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1067
1068    /* update buffers pointers */
1069    ps_proc->pu1_src_buf_luma += MB_SIZE;
1070    ps_proc->pu1_rec_buf_luma += MB_SIZE;
1071    ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1072    ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1073
1074    /*
1075     * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1076     * the stride per MB is MB_SIZE
1077     */
1078    ps_proc->pu1_src_buf_chroma += MB_SIZE;
1079    ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1080    ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1081    ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1082
1083
1084
1085    /* Reset cost, distortion params */
1086    ps_proc->i4_mb_cost = INT_MAX;
1087    ps_proc->i4_mb_distortion = SHRT_MAX;
1088
1089    ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1090
1091    ps_proc->pu4_mb_pu_cnt += 1;
1092
1093    /* Update colocated pu */
1094    if (ps_proc->i4_slice_type == BSLICE)
1095        ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt +  (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1096
1097    /* deblk ctxts */
1098    if (ps_proc->u4_disable_deblock_level != 1)
1099    {
1100        /* indices */
1101        ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1102        ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1103
1104#ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1105        ps_deblk->i4_mb_x ++;
1106
1107        ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1108        /*
1109         * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1110         * the stride per MB is MB_SIZE
1111         */
1112        ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1113#endif
1114    }
1115
1116    return error_status;
1117}
1118
1119/**
1120*******************************************************************************
1121*
1122* @brief   initialize process context.
1123*
1124* @par Description:
1125*  Before dispatching the current job to process thread, the process context
1126*  associated with the job is initialized. Usually every job aims to encode one
1127*  row of mb's. Basing on the row indices provided by the job, the process
1128*  context's buffer ptrs, slice indices and other elements that are necessary
1129*  during core-coding are initialized.
1130*
1131* @param[in] ps_proc
1132*  Pointer to the current process context
1133*
1134* @returns error status
1135*
1136* @remarks none
1137*
1138*******************************************************************************
1139*/
1140IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1141{
1142    /* codec context */
1143    codec_t *ps_codec = ps_proc->ps_codec;
1144
1145    /* nmb processing context*/
1146    n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1147
1148    /* indices */
1149    WORD32 i4_mb_x, i4_mb_y;
1150
1151    /* strides */
1152    WORD32 i4_src_strd = ps_proc->i4_src_strd;
1153    WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
1154    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1155
1156    /* quant params */
1157    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1158
1159    /* deblk ctxt */
1160    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1161
1162    /* deblk bs context */
1163    bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1164
1165    /* Pointer to mv_buffer of current frame */
1166    mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1167
1168    /* Pointers for color space conversion */
1169    UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1170
1171    /* Pad the MB to support non standard sizes */
1172    UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1173    UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1174    UWORD16 u2_num_rows = MB_SIZE;
1175    WORD32 convert_uv_only;
1176
1177    /********************************************************************/
1178    /*                            BEGIN INIT                            */
1179    /********************************************************************/
1180
1181    i4_mb_x = ps_proc->i4_mb_x;
1182    i4_mb_y = ps_proc->i4_mb_y;
1183
1184    /* Number of mbs processed in one loop of process function */
1185    ps_proc->i4_nmb_ntrpy = (ps_proc->i4_wd_mbs > MAX_NMB) ? MAX_NMB : ps_proc->i4_wd_mbs;
1186    ps_proc->u4_nmb_me = (ps_proc->i4_wd_mbs > MAX_NMB)? MAX_NMB : ps_proc->i4_wd_mbs;
1187
1188    /* init buffer pointers */
1189    convert_uv_only = 1;
1190    if ((u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)) ||
1191        ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
1192    {
1193        if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1194            u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1195        ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1196        i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
1197        ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1198        convert_uv_only = 0;
1199    }
1200    else
1201    {
1202        i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1203        ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1204    }
1205
1206
1207    if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1208        ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1209        ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1210    {
1211        if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1212            (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1213            ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1214
1215        ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1216        i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
1217    }
1218    else
1219    {
1220        i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1221        ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
1222    }
1223
1224    ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1225    ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1226
1227    /* Tempral back and forward reference buffer */
1228    ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1229    ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1230    ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1231    ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1232
1233    /*
1234     * Do color space conversion
1235     * NOTE : We assume there that the number of MB's to process will not span multiple rows
1236     */
1237    switch (ps_codec->s_cfg.e_inp_color_fmt)
1238    {
1239        case IV_YUV_420SP_UV:
1240        case IV_YUV_420SP_VU:
1241            /* In case of 420 semi-planar input, copy last few rows to intermediate
1242               buffer as chroma trans functions access one extra byte due to interleaved input.
1243               This data will be padded if required */
1244            if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1245            {
1246                WORD32 num_rows = ps_codec->s_cfg.u4_disp_ht & 0xF;
1247                UWORD8 *pu1_src;
1248                UWORD8 *pu1_dst;
1249                WORD32 i;
1250                pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1251                          ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1252
1253                pu1_dst = ps_proc->pu1_src_buf_luma;
1254
1255                for (i = 0; i < num_rows; i++)
1256                {
1257                    memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1258                    pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1259                    pu1_dst += ps_proc->i4_src_strd;
1260                }
1261                pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1262                          ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1263                pu1_dst = ps_proc->pu1_src_buf_chroma;
1264
1265                /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1266                 * due to interleaved input
1267                 */
1268                num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1269                for (i = 0; i < num_rows; i++)
1270                {
1271                    memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1272                    pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1273                    pu1_dst += ps_proc->i4_src_chroma_strd;
1274                }
1275
1276            }
1277            break;
1278
1279        case IV_YUV_420P :
1280            pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1281                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1282
1283            pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1284                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1285
1286            pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1287                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1288
1289            ps_codec->pf_ih264e_conv_420p_to_420sp(
1290                            pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1291                            ps_proc->pu1_src_buf_luma,
1292                            ps_proc->pu1_src_buf_chroma, u2_num_rows,
1293                            ps_codec->s_cfg.u4_disp_wd,
1294                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1295                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1296                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1297                            ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1298                            convert_uv_only);
1299            break;
1300
1301        case IV_YUV_422ILE :
1302            pu1_y_buf_base =  (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1303                              + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1304
1305            ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1306                            ps_proc->pu1_src_buf_luma,
1307                            ps_proc->pu1_src_buf_chroma,
1308                            ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1309                            ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1310                            ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1311                            ps_proc->i4_src_chroma_strd,
1312                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1313            break;
1314
1315        default:
1316            break;
1317    }
1318
1319    if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0) &&
1320                    (ps_proc->i4_src_strd > (WORD32)ps_codec->s_cfg.u4_disp_wd) )
1321    {
1322        UWORD32 u4_pad_wd, u4_pad_ht;
1323        u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1324        u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1325        u4_pad_ht = MB_SIZE;
1326        if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1327            u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1328
1329        ih264_pad_right_luma(
1330                        ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1331                        ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1332
1333        ih264_pad_right_chroma(
1334                        ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1335                        ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
1336    }
1337
1338    /* pad bottom edge */
1339    if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1340    {
1341        ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1342                         ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1343
1344        ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
1345                         ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
1346    }
1347
1348
1349    /* packed mb coeff data */
1350    ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1351
1352    /* packed mb header data */
1353    ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1354
1355    /* slice index */
1356    ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1357
1358    /*********************************************************************/
1359    /* ih264e_init_quant_params() routine is called at the pic init level*/
1360    /* this would have initialized the qp.                               */
1361    /* TODO_LATER: currently it is assumed that quant params donot change*/
1362    /* across mb's. When they do calculate update ps_qp_params accordingly*/
1363    /*********************************************************************/
1364
1365    /* init mv buffer ptr */
1366    ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
1367
1368    /* Init co-located mv buffer */
1369    ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
1370
1371    if (i4_mb_y == 0)
1372    {
1373        ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1374    }
1375    else
1376    {
1377        ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
1378    }
1379
1380    ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1381
1382    /* mb type */
1383    ps_proc->u4_mb_type = I16x16;
1384
1385    /* lambda */
1386    ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
1387
1388    /* mb distortion */
1389    ps_proc->i4_mb_distortion = SHRT_MAX;
1390
1391    if (i4_mb_x == 0)
1392    {
1393        ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1394
1395        ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1396
1397        ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1398
1399        if (i4_mb_y == 0)
1400        {
1401            memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1402        }
1403    }
1404
1405    /* mb cost */
1406    ps_proc->i4_mb_cost = INT_MAX;
1407
1408    /**********************/
1409    /* init deblk context */
1410    /**********************/
1411    ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1412    /* deblk lags the current mb proc by 1 row */
1413    /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1414    /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1415    /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1416    ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1417
1418    /* buffer ptrs */
1419    ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1420    ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1421
1422    /* init deblk bs context */
1423    /* mb indices */
1424    ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1425    ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1426
1427    /* init n_mb_process  context */
1428    ps_n_mb_ctxt->i4_mb_x = 0;
1429    ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1430    ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1431
1432    return IH264E_SUCCESS;
1433}
1434
1435/**
1436*******************************************************************************
1437*
1438* @brief This function performs luma & chroma padding
1439*
1440* @par Description:
1441*
1442* @param[in] ps_proc
1443*  Process context corresponding to the job
1444*
1445* @param[in] pu1_curr_pic_luma
1446*  Pointer to luma buffer
1447*
1448* @param[in] pu1_curr_pic_chroma
1449*  Pointer to chroma buffer
1450*
1451* @param[in] i4_mb_x
1452*  mb index x
1453*
1454* @param[in] i4_mb_y
1455*  mb index y
1456*
1457*  @param[in] i4_pad_ht
1458*  number of rows to be padded
1459*
1460* @returns  error status
1461*
1462* @remarks none
1463*
1464*******************************************************************************
1465*/
1466IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1467                                       UWORD8 *pu1_curr_pic_luma,
1468                                       UWORD8 *pu1_curr_pic_chroma,
1469                                       WORD32 i4_mb_x,
1470                                       WORD32 i4_mb_y,
1471                                       WORD32 i4_pad_ht)
1472{
1473    /* codec context */
1474    codec_t *ps_codec = ps_proc->ps_codec;
1475
1476    /* strides */
1477    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1478
1479    if (i4_mb_x == 0)
1480    {
1481        /* padding left luma */
1482        ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1483
1484        /* padding left chroma */
1485        ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1486    }
1487    if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1488    {
1489        /* padding right luma */
1490        ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1491
1492        /* padding right chroma */
1493        ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1494
1495        if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1496        {
1497            UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1498            UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1499
1500            /* padding bottom luma */
1501            ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1502
1503            /* padding bottom chroma */
1504            ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1505        }
1506    }
1507
1508    if (i4_mb_y == 0)
1509    {
1510        UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1511        UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1512        WORD32 wd = MB_SIZE;
1513
1514        if (i4_mb_x == 0)
1515        {
1516            pu1_rec_luma -= PAD_LEFT;
1517            pu1_rec_chroma -= PAD_LEFT;
1518
1519            wd += PAD_LEFT;
1520        }
1521        if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1522        {
1523            wd += PAD_RIGHT;
1524        }
1525
1526        /* padding top luma */
1527        ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1528
1529        /* padding top chroma */
1530        ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1531    }
1532
1533    return IH264E_SUCCESS;
1534}
1535
1536
1537
1538
1539/**
1540*******************************************************************************
1541*
1542* @brief This function performs deblocking, padding and halfpel generation for
1543*  'n' MBs
1544*
1545* @par Description:
1546*
1547* @param[in] ps_proc
1548*  Process context corresponding to the job
1549*
1550* @param[in] pu1_curr_pic_luma
1551* Current MB being processed(Luma)
1552*
1553* @param[in] pu1_curr_pic_chroma
1554* Current MB being processed(Chroma)
1555*
1556* @param[in] i4_mb_x
1557* Column value of current MB processed
1558*
1559* @param[in] i4_mb_y
1560* Curent row processed
1561*
1562* @returns  error status
1563*
1564* @remarks none
1565*
1566*******************************************************************************
1567*/
1568IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1569                                                     UWORD8 *pu1_curr_pic_luma,
1570                                                     UWORD8 *pu1_curr_pic_chroma,
1571                                                     WORD32 i4_mb_x,
1572                                                     WORD32 i4_mb_y)
1573{
1574    /* codec context */
1575    codec_t *ps_codec = ps_proc->ps_codec;
1576
1577    /* n_mb processing context */
1578    n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1579
1580    /* deblk context */
1581    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1582
1583    /* strides */
1584    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1585
1586    /* loop variables */
1587    WORD32 row, i, j, col;
1588
1589    /* Padding Width */
1590    UWORD32 u4_pad_wd;
1591
1592    /* deblk_map of the row being deblocked */
1593    UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1594
1595    /* deblk_map_previous row */
1596    UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1597
1598    WORD32 u4_pad_top = 0;
1599
1600    WORD32 u4_deblk_prev_row = 0;
1601
1602    /* Number of mbs to be processed */
1603    WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1604
1605    /* Number of mbs  actually processed
1606     * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1607    WORD32 i4_n_mb_process_count = 0;
1608
1609    UWORD8 *pu1_pad_bottom_src = NULL;
1610
1611    UWORD8 *pu1_pad_src_luma = NULL;
1612    UWORD8 *pu1_pad_src_chroma = NULL;
1613
1614    if (ps_proc->u4_disable_deblock_level == 1)
1615    {
1616        /* If left most MB is processed, then pad left */
1617        if (i4_mb_x == 0)
1618        {
1619            /* padding left luma */
1620            ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1621
1622            /* padding left chroma */
1623            ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1624        }
1625        /*last col*/
1626        if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1627        {
1628            /* padding right luma */
1629            ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1630
1631            /* padding right chroma */
1632            ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1633        }
1634    }
1635
1636    if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1637    {
1638        /* if number of mb's to be processed are less than 'N', go back.
1639         * exception to the above clause is end of row */
1640        if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1641        {
1642            return IH264E_SUCCESS;
1643        }
1644        else
1645        {
1646            i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1647
1648            /* performing deblocking for required number of MBs */
1649            if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1650            {
1651                u4_deblk_prev_row = 1;
1652
1653                /* checking whether the top rows are deblocked */
1654                for (col = 0; col < i4_n_mb_process_count; col++)
1655                {
1656                    u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1657                }
1658
1659                /* checking whether the top right MB is deblocked */
1660                if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1661                {
1662                    u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1663                }
1664
1665                /* Top or Top right MBs not deblocked */
1666                if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1667                {
1668                    return IH264E_SUCCESS;
1669                }
1670
1671                for (row = 0; row < i4_n_mb_process_count; row++)
1672                {
1673                    ih264e_deblock_mb(ps_proc, ps_deblk);
1674
1675                    pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1676
1677                    if (ps_deblk->i4_mb_y > 0)
1678                    {
1679                        if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1680                        {
1681                            /* padding left luma */
1682                            ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1683
1684                            /* padding left chroma */
1685                            ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1686                        }
1687
1688                        if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1689                        {
1690                            /* padding right luma */
1691                            ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1692
1693                            /* padding right chroma */
1694                            ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1695                        }
1696                    }
1697                    ps_deblk->i4_mb_x++;
1698
1699                    ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1700                    ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1701
1702                }
1703            }
1704            else if(i4_mb_y > 0)
1705            {
1706                ps_deblk->i4_mb_x += i4_n_mb_process_count;
1707
1708                ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1709                ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1710            }
1711
1712            if (i4_mb_y == 2)
1713            {
1714                u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1715                u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1716
1717                if (ps_n_mb_ctxt->i4_mb_x == 0)
1718                {
1719                    u4_pad_wd += PAD_LEFT;
1720                    u4_pad_top = -PAD_LEFT;
1721                }
1722
1723                if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1724                {
1725                    u4_pad_wd += PAD_RIGHT;
1726                }
1727
1728                /* padding top luma */
1729                ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1730
1731                /* padding top chroma */
1732                ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1733            }
1734
1735            ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1736
1737            if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1738            {
1739                if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1740                {
1741                    /* Bottom Padding is done in one stretch for the entire width */
1742                    if (ps_proc->u4_disable_deblock_level != 1)
1743                    {
1744                        ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1745
1746                        ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1747
1748                        ps_n_mb_ctxt->i4_mb_x = 0;
1749                        ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1750                        ps_deblk->i4_mb_x = 0;
1751                        ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1752
1753                        /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1754                        ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1755
1756                        i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1757
1758                        j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1759
1760                        for (i = 0; i < j; i++)
1761                        {
1762                            for (col = 0; col < i4_n_mbs; col++)
1763                            {
1764                                ih264e_deblock_mb(ps_proc, ps_deblk);
1765
1766                                pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1767
1768                                ps_deblk->i4_mb_x++;
1769                                ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1770                                ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1771                                ps_n_mb_ctxt->i4_mb_x++;
1772                            }
1773                        }
1774
1775                        for (col = 0; col < i4_n_mb_process_count; col++)
1776                        {
1777                            ih264e_deblock_mb(ps_proc, ps_deblk);
1778
1779                            pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1780
1781                            ps_deblk->i4_mb_x++;
1782                            ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1783                            ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1784                            ps_n_mb_ctxt->i4_mb_x++;
1785                        }
1786
1787                        pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1788
1789                        pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1790
1791                        /* padding left luma */
1792                        ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1793
1794                        /* padding left chroma */
1795                        ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1796
1797                        pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1798                        pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1799
1800                        /* padding left luma */
1801                        ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1802
1803                        /* padding left chroma */
1804                        ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1805
1806                        pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1807
1808                        pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1809
1810                        /* padding right luma */
1811                        ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1812
1813                        /* padding right chroma */
1814                        ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1815
1816                        pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1817                        pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1818
1819                        /* padding right luma */
1820                        ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1821
1822                        /* padding right chroma */
1823                        ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1824
1825                    }
1826
1827                    /* In case height is less than 2 MBs pad top */
1828                    if (ps_proc->i4_ht_mbs <= 2)
1829                    {
1830                        UWORD8 *pu1_pad_top_src;
1831                        /* padding top luma */
1832                        pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1833                        ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1834
1835                        /* padding top chroma */
1836                        pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1837                        ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1838                    }
1839
1840                    /* padding bottom luma */
1841                    pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1842                    ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1843
1844                    /* padding bottom chroma */
1845                    pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1846                    ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1847                }
1848            }
1849        }
1850    }
1851
1852    return IH264E_SUCCESS;
1853}
1854
1855
1856/**
1857*******************************************************************************
1858*
1859* @brief This function performs luma & chroma core coding for a set of mb's.
1860*
1861* @par Description:
1862*  The mb to be coded is taken and is evaluated over a predefined set of modes
1863*  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1864*  is selected and using intra/inter prediction filters, prediction is carried out.
1865*  The deviation between src and pred signal constitutes error signal. This error
1866*  signal is transformed (hierarchical transform if necessary) and quantized. The
1867*  quantized residue is packed in to entropy buffer for entropy coding. This is
1868*  repeated for all the mb's enlisted under the job.
1869*
1870* @param[in] ps_proc
1871*  Process context corresponding to the job
1872*
1873* @returns  error status
1874*
1875* @remarks none
1876*
1877*******************************************************************************
1878*/
1879WORD32 ih264e_process(process_ctxt_t *ps_proc)
1880{
1881    /* error status */
1882    WORD32 error_status = IH264_SUCCESS;
1883
1884    /* codec context */
1885    codec_t *ps_codec = ps_proc->ps_codec;
1886
1887    /* cbp luma, chroma */
1888    UWORD32 u4_cbp_l, u4_cbp_c;
1889
1890    /* width in mbs */
1891    WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1892
1893    /* loop var */
1894    WORD32  i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1895
1896    /* valid modes */
1897    UWORD32 u4_valid_modes = 0;
1898
1899    /* gate threshold */
1900    WORD32 i4_gate_threshold = 0;
1901
1902    /* is intra */
1903    WORD32 luma_idx, chroma_idx, is_intra;
1904
1905    /* temp variables */
1906    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
1907
1908    /* list of modes for evaluation */
1909    if (ps_proc->i4_slice_type == ISLICE)
1910    {
1911        /* enable intra 16x16 */
1912        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1913
1914        /* enable intra 8x8 */
1915        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
1916
1917        /* enable intra 4x4 */
1918        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1919    }
1920    else if (ps_proc->i4_slice_type == PSLICE)
1921    {
1922        /* enable intra 16x16 */
1923        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1924
1925        /* enable intra 4x4 */
1926        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1927        {
1928            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1929        }
1930
1931        /* enable inter P16x16 */
1932        u4_valid_modes |= (1 << P16x16);
1933    }
1934    else if (ps_proc->i4_slice_type == BSLICE)
1935    {
1936        /* enable intra 16x16 */
1937        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1938
1939        /* enable intra 4x4 */
1940        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1941        {
1942            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1943        }
1944
1945        /* enable inter B16x16 */
1946        u4_valid_modes |= (1 << B16x16);
1947    }
1948
1949
1950    /* init entropy */
1951    ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
1952    ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
1953    ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
1954
1955    /* compute recon when :
1956     *   1. current frame is to be used as a reference
1957     *   2. dump recon for bit stream sanity check
1958     */
1959    ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
1960                                ps_codec->s_cfg.u4_enable_recon;
1961
1962    /* Encode 'n' macroblocks,
1963     * 'n' being the number of mbs dictated by current proc ctxt */
1964    for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
1965    {
1966        /* since we have not yet found sad, we have not yet got min sad */
1967        /* we need to initialize these variables for each MB */
1968        /* TODO how to get the min sad into the codec */
1969        ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
1970        ps_proc->u4_min_sad_reached = 0;
1971
1972        /* mb analysis */
1973        {
1974            /* temp var */
1975            WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
1976
1977            /* force intra refresh ? */
1978            WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
1979                            (ps_proc->pu1_is_intra_coded[i4_mb_id] != 0) ||
1980                            (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
1981
1982            /* evaluate inter 16x16 modes */
1983            if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
1984            {
1985                /* compute nmb me */
1986                if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
1987                {
1988                    ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
1989                                                       i4_wd_mbs - ps_proc->i4_mb_x));
1990                }
1991
1992                /* set pointers to ME data appropriately for other modules to use */
1993                {
1994                    UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
1995
1996                    /* get the min sad condition for current mb */
1997                    ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
1998                    ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
1999
2000                    ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
2001                    ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
2002                    ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
2003
2004                    ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
2005                    ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
2006                    ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2007                    ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2008                    ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2009
2010                    /* get the best sub pel buffer */
2011                    ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2012                    ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2013                }
2014                ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2015            }
2016            else
2017            {
2018                /* Derive neighbor availability for the current macroblock */
2019                ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2020
2021                ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2022            }
2023
2024            /*
2025             * If air says intra, we need to force the following code path to evaluate intra
2026             * The easy way is just to say that the inter cost is too much
2027             */
2028            if (!i4_air_enable_inter)
2029            {
2030                ps_proc->u4_min_sad_reached = 0;
2031                ps_proc->i4_mb_cost = INT_MAX;
2032                ps_proc->i4_mb_distortion = INT_MAX;
2033            }
2034            else if (ps_proc->u4_mb_type == PSKIP)
2035            {
2036                goto UPDATE_MB_INFO;
2037            }
2038
2039            /* wait until the proc of [top + 1] mb is computed.
2040             * We wait till the proc dependencies are satisfied */
2041             if(ps_proc->i4_mb_y > 0)
2042             {
2043                /* proc map */
2044                UWORD8  *pu1_proc_map_top;
2045
2046                pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2047
2048                while (1)
2049                {
2050                    volatile UWORD8 *pu1_buf;
2051                    WORD32 idx = i4_mb_idx + 1;
2052
2053                    idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2054                    pu1_buf =  pu1_proc_map_top + idx;
2055                    if(*pu1_buf)
2056                        break;
2057                    ithread_yield();
2058                }
2059            }
2060
2061            /* If we already have the minimum sad, there is no point in searching for sad again */
2062            if (ps_proc->u4_min_sad_reached == 0)
2063            {
2064                /* intra gating in inter slices */
2065                /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2066                if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2067                {
2068                    /* distortion of neighboring blocks */
2069                    WORD32 i4_distortion[4];
2070
2071                    i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2072
2073                    i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2074
2075                    i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2076
2077                    i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2078
2079                    i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2080
2081                }
2082
2083
2084                /* If we are going to force intra we need to evaluate intra irrespective of gating */
2085                if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2086                {
2087                    /* evaluate intra 4x4 modes */
2088                    if (u4_valid_modes & (1 << I4x4))
2089                    {
2090                        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2091                        {
2092                            ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2093                        }
2094                        else
2095                        {
2096                            ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2097                        }
2098                    }
2099
2100                    /* evaluate intra 16x16 modes */
2101                    if (u4_valid_modes & (1 << I16x16))
2102                    {
2103                        ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2104                    }
2105
2106                    /* evaluate intra 8x8 modes */
2107                    if (u4_valid_modes & (1 << I8x8))
2108                    {
2109                        ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2110                    }
2111
2112                }
2113        }
2114     }
2115
2116        /* is intra */
2117        if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2118        {
2119            luma_idx = ps_proc->u4_mb_type;
2120            chroma_idx = 0;
2121            is_intra = 1;
2122
2123            /* evaluate chroma blocks for intra */
2124            ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2125        }
2126        else
2127        {
2128            luma_idx = 3;
2129            chroma_idx = 1;
2130            is_intra = 0;
2131        }
2132        ps_proc->u4_is_intra = is_intra;
2133        ps_proc->ps_pu->b1_intra_flag = is_intra;
2134
2135        /* redo MV pred of neighbors in the case intra mb */
2136        /* TODO : currently called unconditionally, needs to be called only in the case of intra
2137         * to modify neighbors */
2138        if (ps_proc->i4_slice_type != ISLICE)
2139        {
2140            ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2141        }
2142
2143        /* Perform luma mb core coding */
2144        u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2145
2146        /* Perform luma mb core coding */
2147        u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2148
2149        /* coded block pattern */
2150        ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2151
2152        if (!ps_proc->u4_is_intra)
2153        {
2154            if (ps_proc->i4_slice_type == BSLICE)
2155            {
2156                if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2157                {
2158                    ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2159                }
2160            }
2161            else if(!ps_proc->u4_cbp)
2162            {
2163                if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2164                {
2165                    ps_proc->u4_mb_type = PSKIP;
2166                }
2167            }
2168        }
2169
2170UPDATE_MB_INFO:
2171
2172        /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2173        ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2174
2175        /**********************************************************************/
2176        /* if disable deblock level is '0' this implies enable deblocking for */
2177        /* all edges of all macroblocks with out any restrictions             */
2178        /*                                                                    */
2179        /* if disable deblock level is '1' this implies disable deblocking for*/
2180        /* all edges of all macroblocks with out any restrictions             */
2181        /*                                                                    */
2182        /* if disable deblock level is '2' this implies enable deblocking for */
2183        /* all edges of all macroblocks except edges overlapping with slice   */
2184        /* boundaries. This option is not currently supported by the encoder  */
2185        /* hence the slice map should be of no significance to perform debloc */
2186        /* king                                                               */
2187        /**********************************************************************/
2188
2189        if (ps_proc->u4_compute_recon)
2190        {
2191            /* deblk context */
2192            /* src pointers */
2193            UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2194            UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2195
2196            /* src indices */
2197            UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2198            UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2199
2200            /* compute blocking strength */
2201            if (ps_proc->u4_disable_deblock_level != 1)
2202            {
2203                ih264e_compute_bs(ps_proc);
2204            }
2205
2206            /* nmb deblocking and hpel and padding */
2207            ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2208                                                  pu1_cur_pic_chroma, i4_mb_x,
2209                                                  i4_mb_y);
2210        }
2211
2212        /* update the context after for coding next mb */
2213        error_status |= ih264e_update_proc_ctxt(ps_proc);
2214
2215        /* Once the last row is processed, mark the buffer status appropriately */
2216        if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2217        {
2218            /* Pointer to current picture buffer structure */
2219            pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2220
2221            /* Pointer to current picture's mv buffer structure */
2222            mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2223
2224            /**********************************************************************/
2225            /* if disable deblock level is '0' this implies enable deblocking for */
2226            /* all edges of all macroblocks with out any restrictions             */
2227            /*                                                                    */
2228            /* if disable deblock level is '1' this implies disable deblocking for*/
2229            /* all edges of all macroblocks with out any restrictions             */
2230            /*                                                                    */
2231            /* if disable deblock level is '2' this implies enable deblocking for */
2232            /* all edges of all macroblocks except edges overlapping with slice   */
2233            /* boundaries. This option is not currently supported by the encoder  */
2234            /* hence the slice map should be of no significance to perform debloc */
2235            /* king                                                               */
2236            /**********************************************************************/
2237            error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2238
2239            error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2240
2241            if (ps_codec->s_cfg.u4_enable_recon)
2242            {
2243                /* pic cnt */
2244                ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2245
2246                /* rec buffers */
2247                ps_codec->as_rec_buf[ctxt_sel].s_pic_buf  = *ps_proc->ps_cur_pic;
2248
2249                /* is last? */
2250                ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2251
2252                /* frame time stamp */
2253                ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2254                ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2255            }
2256
2257        }
2258    }
2259
2260    DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2261
2262    return error_status;
2263}
2264
2265/**
2266*******************************************************************************
2267*
2268* @brief
2269*  Function to update rc context after encoding
2270*
2271* @par   Description
2272*  This function updates the rate control context after the frame is encoded.
2273*  Number of bits consumed by the current frame, frame distortion, frame cost,
2274*  number of intra/inter mb's, ... are passed on to rate control context for
2275*  updating the rc model.
2276*
2277* @param[in] ps_codec
2278*  Handle to codec context
2279*
2280* @param[in] ctxt_sel
2281*  frame context selector
2282*
2283* @param[in] pic_cnt
2284*  pic count
2285*
2286* @returns i4_stuffing_byte
2287*  number of stuffing bytes (if necessary)
2288*
2289* @remarks
2290*
2291*******************************************************************************
2292*/
2293WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2294{
2295    /* proc set base idx */
2296    WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2297
2298    /* proc ctxt */
2299    process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2300
2301    /* frame qp */
2302    UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2303
2304    /* cbr rc return status */
2305    WORD32 i4_stuffing_byte = 0;
2306
2307    /* current frame stats */
2308    frame_info_t s_frame_info;
2309    picture_type_e rc_pic_type;
2310
2311    /* temp var */
2312    WORD32 i, j;
2313
2314    /********************************************************************/
2315    /*                            BEGIN INIT                            */
2316    /********************************************************************/
2317
2318    /* init frame info */
2319    irc_init_frame_info(&s_frame_info);
2320
2321    /* get frame info */
2322    for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2323    {
2324        /*****************************************************************/
2325        /* One frame can be encoded by max of u4_num_cores threads       */
2326        /* Accumulating the num mbs, sad, qp and intra_mb_cost from      */
2327        /* u4_num_cores threads                                          */
2328        /*****************************************************************/
2329        for (j = 0; j< MAX_MB_TYPE; j++)
2330        {
2331            s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2332
2333            s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2334
2335            s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2336        }
2337
2338        s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2339
2340        s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2341
2342        /*****************************************************************/
2343        /* gather number of residue and header bits consumed by the frame*/
2344        /*****************************************************************/
2345        ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2346    }
2347
2348    /* get pic type */
2349    switch (ps_codec->pic_type)
2350    {
2351        case PIC_I:
2352        case PIC_IDR:
2353            rc_pic_type = I_PIC;
2354            break;
2355        case PIC_P:
2356            rc_pic_type = P_PIC;
2357            break;
2358        case PIC_B:
2359            rc_pic_type = B_PIC;
2360            break;
2361        default:
2362            assert(0);
2363            break;
2364    }
2365
2366    /* update rc lib with current frame stats */
2367    i4_stuffing_byte =  ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2368                                          &(s_frame_info),
2369                                          ps_codec->s_rate_control.pps_pd_frm_rate,
2370                                          ps_codec->s_rate_control.pps_time_stamp,
2371                                          ps_codec->s_rate_control.pps_frame_time,
2372                                          (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2373                                          &rc_pic_type,
2374                                          i4_is_first_frm,
2375                                          &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2376                                          u1_frame_qp,
2377                                          &ps_codec->s_rate_control.num_intra_in_prev_frame,
2378                                          &ps_codec->s_rate_control.i4_avg_activity);
2379    return i4_stuffing_byte;
2380}
2381
2382/**
2383*******************************************************************************
2384*
2385* @brief
2386*  entry point of a spawned encoder thread
2387*
2388* @par Description:
2389*  The encoder thread dequeues a proc/entropy job from the encoder queue and
2390*  calls necessary routines.
2391*
2392* @param[in] pv_proc
2393*  Process context corresponding to the thread
2394*
2395* @returns  error status
2396*
2397* @remarks
2398*
2399*******************************************************************************
2400*/
2401WORD32 ih264e_process_thread(void *pv_proc)
2402{
2403    /* error status */
2404    IH264_ERROR_T ret = IH264_SUCCESS;
2405    WORD32 error_status = IH264_SUCCESS;
2406
2407    /* proc ctxt */
2408    process_ctxt_t *ps_proc = pv_proc;
2409
2410    /* codec ctxt */
2411    codec_t *ps_codec = ps_proc->ps_codec;
2412
2413    /* structure to represent a processing job entry */
2414    job_t s_job;
2415
2416    /* blocking call : entropy dequeue is non-blocking till all
2417     * the proc jobs are processed */
2418    WORD32 is_blocking = 0;
2419
2420    /* set affinity */
2421    ithread_set_affinity(ps_proc->i4_id);
2422
2423    while(1)
2424    {
2425        /* dequeue a job from the entropy queue */
2426        {
2427            int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2428
2429            /* codec context selector */
2430            WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
2431
2432            volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2433
2434            /* have the lock */
2435            if (error == 0)
2436            {
2437                if (*pu4_buf == 0)
2438                {
2439                    /* no entropy threads are active, try dequeuing a job from the entropy queue */
2440                    ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2441                    if (IH264_SUCCESS == ret)
2442                    {
2443                        *pu4_buf = 1;
2444                        ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2445                        goto WORKER;
2446                    }
2447                    else if(is_blocking)
2448                    {
2449                        ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2450                        break;
2451                    }
2452                }
2453                ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2454            }
2455        }
2456
2457        /* dequeue a job from the process queue */
2458        ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2459        if (IH264_SUCCESS != ret)
2460        {
2461            if(ps_proc->i4_id)
2462                break;
2463            else
2464            {
2465                is_blocking = 1;
2466                continue;
2467            }
2468        }
2469
2470WORKER:
2471        /* choose appropriate proc context based on proc_base_idx */
2472        ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2473
2474        switch (s_job.i4_cmd)
2475        {
2476            case CMD_PROCESS:
2477                ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2478                ps_proc->i4_mb_x = s_job.i2_mb_x;
2479                ps_proc->i4_mb_y = s_job.i2_mb_y;
2480
2481                /* init process context */
2482                ih264e_init_proc_ctxt(ps_proc);
2483
2484                /* core code all mbs enlisted under the current job */
2485                error_status |= ih264e_process(ps_proc);
2486                break;
2487
2488            case CMD_ENTROPY:
2489                ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2490                ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2491                ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2492
2493                /* init entropy */
2494                ih264e_init_entropy_ctxt(ps_proc);
2495
2496                /* entropy code all mbs enlisted under the current job */
2497                error_status |= ih264e_entropy(ps_proc);
2498                break;
2499
2500            default:
2501                error_status |= IH264_FAIL;
2502                break;
2503        }
2504    }
2505
2506    /* send error code */
2507    ps_proc->i4_error_code = error_status;
2508    return ret;
2509}
2510