1/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19 *******************************************************************************
20 * @file
21 *  ihevc_inter_pred.c
22 *
23 * @brief
24 *  Calculates the prediction samples for a given cbt
25 *
26 * @author
27 *  Srinivas T
28 *
29 * @par List of Functions:
30 *   - ihevc_inter_pred()
31 *
32 * @remarks
33 *  None
34 *
35 *******************************************************************************
36 */
37#include <stdio.h>
38#include <stddef.h>
39#include <stdlib.h>
40#include <string.h>
41#include <assert.h>
42
43#include "ihevc_typedefs.h"
44#include "iv.h"
45#include "ivd.h"
46#include "ihevcd_cxa.h"
47#include "ithread.h"
48
49#include "ihevc_defs.h"
50#include "ihevc_debug.h"
51#include "ihevc_structs.h"
52#include "ihevc_macros.h"
53#include "ihevc_platform_macros.h"
54#include "ihevc_cabac_tables.h"
55#include "ihevc_weighted_pred.h"
56
57#include "ihevc_error.h"
58#include "ihevc_common_tables.h"
59
60#include "ihevcd_trace.h"
61#include "ihevcd_defs.h"
62#include "ihevcd_function_selector.h"
63#include "ihevcd_structs.h"
64#include "ihevcd_error.h"
65#include "ihevcd_nal.h"
66#include "ihevcd_bitstream.h"
67#include "ihevcd_job_queue.h"
68#include "ihevcd_utils.h"
69
70#include "ihevc_inter_pred.h"
71#include "ihevcd_profile.h"
72
73WORD8 luma_filter[4][NTAPS_LUMA] =
74{
75    { 0, 0, 0, 64, 0, 0, 0, 0 },
76    { -1, 4, -10, 58, 17, -5, 1, 0 },
77    { -1, 4, -11, 40, 40, -11, 4, -1 },
78    { 0, 1, -5, 17, 58, -10, 4, -1 } };
79
80/* The filter uses only the first four elements in each array */
81WORD8 chroma_filter[8][NTAPS_LUMA] =
82{
83    { 0, 64, 0, 0, 0, 0, 0, 0 },
84    { -2, 58, 10, -2, 0, 0, 0, 0 },
85    { -4, 54, 16, -2, 0, 0, 0, 0 },
86    { -6, 46, 28, -4, 0, 0, 0, 0 },
87    { -4, 36, 36, -4, 0, 0, 0, 0 },
88    { -4, 28, 46, -6, 0, 0, 0, 0 },
89    { -2, 16, 54, -4, 0, 0, 0, 0 },
90    { -2, 10, 58, -2, 0, 0, 0, 0 } };
91
92/**
93*******************************************************************************
94*
95* @brief
96*  Inter prediction CTB level function
97*
98* @par Description:
99*  For a given CTB, Inter prediction followed by weighted  prediction is
100* done for all the PUs present in the CTB
101*
102* @param[in] ps_ctb
103*  Pointer to the CTB context
104*
105* @returns
106*
107* @remarks
108*
109*
110*******************************************************************************
111*/
112
113void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc)
114{
115    UWORD8 *ref_pic_luma_l0, *ref_pic_chroma_l0;
116    UWORD8 *ref_pic_luma_l1, *ref_pic_chroma_l1;
117
118    UWORD8 *ref_pic_l0 = NULL, *ref_pic_l1 = NULL;
119
120    slice_header_t *ps_slice_hdr;
121    sps_t *ps_sps;
122    pps_t *ps_pps;
123    pu_t *ps_pu;
124    codec_t *ps_codec;
125    WORD32 pu_indx;
126    WORD32 pu_x, pu_y;
127    WORD32 pu_wd, pu_ht;
128    WORD32 i4_pu_cnt;
129    WORD32 cur_ctb_idx;
130
131    WORD32 clr_indx;
132    WORD32 ntaps;
133
134
135
136    WORD32 ai2_xint[2] = { 0, 0 }, ai2_yint[2] = { 0, 0 };
137    WORD32 ai2_xfrac[2] = { 0, 0 }, ai2_yfrac[2] = { 0, 0 };
138
139    WORD32 weighted_pred, bi_pred;
140
141    WORD32 ref_strd;
142    UWORD8 *pu1_dst_luma, *pu1_dst_chroma;
143
144    UWORD8 *pu1_dst;
145
146    WORD16 *pi2_tmp1, *pi2_tmp2;
147
148    WORD32 luma_weight_l0, luma_weight_l1;
149    WORD32 chroma_weight_l0_cb, chroma_weight_l1_cb, chroma_weight_l0_cr, chroma_weight_l1_cr;
150    WORD32 luma_offset_l0, luma_offset_l1;
151    WORD32 chroma_offset_l0_cb, chroma_offset_l1_cb, chroma_offset_l0_cr, chroma_offset_l1_cr;
152    WORD32 shift, lvl_shift1, lvl_shift2;
153
154    pf_inter_pred func_ptr1, func_ptr2, func_ptr3, func_ptr4;
155    WORD32 func_indx1, func_indx2, func_indx3, func_indx4;
156    void *func_src;
157    void *func_dst;
158    WORD32 func_src_strd;
159    WORD32 func_dst_strd;
160    WORD8 *func_coeff;
161    WORD32 func_wd;
162    WORD32 func_ht;
163    WORD32 next_ctb_idx;
164    WORD8(*coeff)[8];
165    WORD32  chroma_yuv420sp_vu;
166
167    PROFILE_DISABLE_INTER_PRED();
168    ps_codec = ps_proc->ps_codec;
169    ps_slice_hdr = ps_proc->ps_slice_hdr;
170    ps_pps = ps_proc->ps_pps;
171    ps_sps = ps_proc->ps_sps;
172    cur_ctb_idx = ps_proc->i4_ctb_x
173                    + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
174    /*
175     * In case of tiles, the next ctb belonging to the same tile must be used to get the PU index
176     */
177
178    next_ctb_idx = ps_proc->i4_next_pu_ctb_cnt;
179    i4_pu_cnt = ps_proc->pu4_pic_pu_idx[next_ctb_idx] - ps_proc->pu4_pic_pu_idx[cur_ctb_idx];
180
181    ps_pu = ps_proc->ps_pu;
182    ref_strd = ps_codec->i4_strd;
183    pi2_tmp1 = ps_proc->pi2_inter_pred_tmp_buf1;
184    pi2_tmp2 = ps_proc->pi2_inter_pred_tmp_buf2;
185    pu1_dst_luma = ps_proc->pu1_cur_pic_luma;
186    pu1_dst_chroma = ps_proc->pu1_cur_pic_chroma;
187
188    chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU);
189
190    ASSERT(PSLICE == ps_slice_hdr->i1_slice_type || BSLICE == ps_slice_hdr->i1_slice_type);
191
192    ref_pic_luma_l0 = NULL;
193    ref_pic_chroma_l0 = NULL;
194
195    luma_weight_l0 = 0;
196    chroma_weight_l0_cb = 0;
197    chroma_weight_l0_cr = 0;
198
199    luma_offset_l0 = 0;
200    chroma_offset_l0_cb = 0;
201    chroma_offset_l0_cr = 0;
202
203    ref_pic_luma_l1 = NULL;
204    ref_pic_chroma_l1 = NULL;
205
206    luma_weight_l1 = 0;
207    chroma_weight_l1_cb = 0;
208    chroma_weight_l1_cr = 0;
209
210    luma_offset_l1 = 0;
211    chroma_offset_l1_cb = 0;
212    chroma_offset_l1_cr = 0;
213
214    for(pu_indx = 0; pu_indx < i4_pu_cnt; pu_indx++, ps_pu++)
215    {
216        /* If the PU is intra then proceed to the next */
217        if(1 == ps_pu->b1_intra_flag)
218            continue;
219        pu_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (ps_pu->b4_pos_x << 2);
220        pu_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (ps_pu->b4_pos_y << 2);
221
222        pu_wd = (ps_pu->b4_wd + 1) << 2;
223        pu_ht = (ps_pu->b4_ht + 1) << 2;
224
225        weighted_pred = (ps_slice_hdr->i1_slice_type == PSLICE) ? ps_pps->i1_weighted_pred_flag :
226                        ps_pps->i1_weighted_bipred_flag;
227        bi_pred = (ps_pu->b2_pred_mode == PRED_BI);
228
229        if(ps_pu->b2_pred_mode != PRED_L1)
230        {
231            pic_buf_t *ps_pic_buf_l0;
232
233            ps_pic_buf_l0 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list0[ps_pu->mv.i1_l0_ref_idx].pv_pic_buf));
234
235            ref_pic_luma_l0 = ps_pic_buf_l0->pu1_luma;
236            ref_pic_chroma_l0 = ps_pic_buf_l0->pu1_chroma;
237
238            luma_weight_l0 = ps_slice_hdr->s_wt_ofst.i2_luma_weight_l0[ps_pu->mv.i1_l0_ref_idx];
239            chroma_weight_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cb[ps_pu->mv.i1_l0_ref_idx];
240            chroma_weight_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cr[ps_pu->mv.i1_l0_ref_idx];
241
242            luma_offset_l0 = ps_slice_hdr->s_wt_ofst.i2_luma_offset_l0[ps_pu->mv.i1_l0_ref_idx];
243            chroma_offset_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cb[ps_pu->mv.i1_l0_ref_idx];
244            chroma_offset_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cr[ps_pu->mv.i1_l0_ref_idx];
245        }
246
247        if(ps_pu->b2_pred_mode != PRED_L0)
248        {
249            pic_buf_t *ps_pic_buf_l1;
250            ps_pic_buf_l1 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list1[ps_pu->mv.i1_l1_ref_idx].pv_pic_buf));
251            ref_pic_luma_l1 = ps_pic_buf_l1->pu1_luma;
252            ref_pic_chroma_l1 = ps_pic_buf_l1->pu1_chroma;
253
254            luma_weight_l1 = ps_slice_hdr->s_wt_ofst.i2_luma_weight_l1[ps_pu->mv.i1_l1_ref_idx];
255            chroma_weight_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cb[ps_pu->mv.i1_l1_ref_idx];
256            chroma_weight_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cr[ps_pu->mv.i1_l1_ref_idx];
257
258            luma_offset_l1 = ps_slice_hdr->s_wt_ofst.i2_luma_offset_l1[ps_pu->mv.i1_l1_ref_idx];
259            chroma_offset_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cb[ps_pu->mv.i1_l1_ref_idx];
260            chroma_offset_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cr[ps_pu->mv.i1_l1_ref_idx];
261        }
262
263        /*luma and chroma components*/
264        for(clr_indx = 0; clr_indx < 2; clr_indx++)
265        {
266            PROFILE_DISABLE_INTER_PRED_LUMA(clr_indx);
267            PROFILE_DISABLE_INTER_PRED_CHROMA(clr_indx);
268
269            if(clr_indx == 0)
270            {
271                WORD32 mv;
272                if(ps_pu->b2_pred_mode != PRED_L1)
273                {
274                    mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
275                    ai2_xint[0] = pu_x + (mv >> 2);
276                    ai2_xfrac[0] = mv & 3;
277
278                    mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
279                    ai2_yint[0] = pu_y + (mv >> 2);
280                    ai2_yfrac[0] = mv & 3;
281
282                    ai2_xfrac[0] &= ps_codec->i4_mv_frac_mask;
283                    ai2_yfrac[0] &= ps_codec->i4_mv_frac_mask;
284
285
286                    ref_pic_l0 = ref_pic_luma_l0 + ai2_yint[0] * ref_strd
287                                    + ai2_xint[0];
288                }
289
290                if(ps_pu->b2_pred_mode != PRED_L0)
291                {
292
293                    mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
294                    ai2_xint[1] = pu_x + (mv >> 2);
295                    ai2_xfrac[1] = mv & 3;
296
297                    mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
298                    ai2_yint[1] = pu_y + (mv >> 2);
299                    ai2_yfrac[1] = mv & 3;
300
301                    ref_pic_l1 = ref_pic_luma_l1 + ai2_yint[1] * ref_strd
302                                    + ai2_xint[1];
303                    ai2_xfrac[1] &= ps_codec->i4_mv_frac_mask;
304                    ai2_yfrac[1] &= ps_codec->i4_mv_frac_mask;
305
306                }
307
308                pu1_dst = pu1_dst_luma + pu_y * ref_strd + pu_x;
309
310                ntaps = NTAPS_LUMA;
311                coeff = luma_filter;
312            }
313
314            else
315            {
316                WORD32 mv;
317                /* xint is upshifted by 1 because the chroma components are  */
318                /* interleaved which is not the assumption made by standard  */
319                if(ps_pu->b2_pred_mode != PRED_L1)
320                {
321                    mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
322                    ai2_xint[0] = (pu_x / 2 + (mv >> 3)) << 1;
323                    ai2_xfrac[0] = mv & 7;
324
325                    mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
326                    ai2_yint[0] = pu_y / 2 + (mv >> 3);
327                    ai2_yfrac[0] = mv & 7;
328
329                    ref_pic_l0 = ref_pic_chroma_l0 + ai2_yint[0] * ref_strd
330                                    + ai2_xint[0];
331
332                    ai2_xfrac[0] &= ps_codec->i4_mv_frac_mask;
333                    ai2_yfrac[0] &= ps_codec->i4_mv_frac_mask;
334
335                }
336
337                if(ps_pu->b2_pred_mode != PRED_L0)
338                {
339                    mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
340                    ai2_xint[1] = (pu_x / 2 + (mv >> 3)) << 1;
341                    ai2_xfrac[1] = mv & 7;
342
343                    mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
344                    ai2_yint[1] = pu_y / 2 + (mv >> 3);
345                    ai2_yfrac[1] = mv & 7;
346
347                    ref_pic_l1 = ref_pic_chroma_l1 + ai2_yint[1] * ref_strd
348                                    + ai2_xint[1];
349                    ai2_xfrac[1] &= ps_codec->i4_mv_frac_mask;
350                    ai2_yfrac[1] &= ps_codec->i4_mv_frac_mask;
351
352                }
353
354                pu1_dst = pu1_dst_chroma + pu_y * ref_strd / 2 + pu_x;
355
356                ntaps = NTAPS_CHROMA;
357                coeff = chroma_filter;
358            }
359
360            if(ps_pu->b2_pred_mode != PRED_L1)
361            {
362                func_indx1 = 4 * (weighted_pred || bi_pred) + 1 + 11 * clr_indx;
363                func_indx1 += ai2_xfrac[0] ? 2 : 0;
364                func_indx1 += ai2_yfrac[0] ? 1 : 0;
365
366                func_indx2 = (ai2_xfrac[0] && ai2_yfrac[0])
367                                * (9 + (weighted_pred || bi_pred)) + 11 * clr_indx;
368
369                func_ptr1 = ps_codec->apf_inter_pred[func_indx1];
370                func_ptr2 = ps_codec->apf_inter_pred[func_indx2];
371            }
372            else
373            {
374                func_ptr1 = NULL;
375                func_ptr2 = NULL;
376            }
377            if(ps_pu->b2_pred_mode != PRED_L0)
378            {
379                func_indx3 = 4 * (weighted_pred || bi_pred) + 1 + 11 * clr_indx;
380                func_indx3 += ai2_xfrac[1] ? 2 : 0;
381                func_indx3 += ai2_yfrac[1] ? 1 : 0;
382
383                func_indx4 = (ai2_xfrac[1] && ai2_yfrac[1])
384                                * (9 + (weighted_pred || bi_pred)) + 11 * clr_indx;
385
386                func_ptr3 = ps_codec->apf_inter_pred[func_indx3];
387                func_ptr4 = ps_codec->apf_inter_pred[func_indx4];
388            }
389            else
390            {
391                func_ptr3 = NULL;
392                func_ptr4 = NULL;
393            }
394
395            /*Function 1*/
396            if(func_ptr1 != NULL)
397            {
398                func_src_strd = ref_strd;
399                func_src = (ai2_xfrac[0] && ai2_yfrac[0]) ?
400                                ref_pic_l0 - (ntaps / 2 - 1) * func_src_strd :
401                                ref_pic_l0;
402                func_dst = (weighted_pred || bi_pred) ?
403                                (void *)pi2_tmp1 : (void *)pu1_dst;
404                if(ai2_xfrac[0] && ai2_yfrac[0])
405                {
406                    func_dst = pi2_tmp1;
407                }
408
409                func_dst_strd = (weighted_pred || bi_pred
410                                || (ai2_xfrac[0] && ai2_yfrac[0])) ?
411                                pu_wd : ref_strd;
412                func_coeff = ai2_xfrac[0] ?
413                                coeff[ai2_xfrac[0]] : coeff[ai2_yfrac[0]];
414                func_wd = pu_wd >> clr_indx;
415                func_ht = pu_ht >> clr_indx;
416                func_ht += (ai2_xfrac[0] && ai2_yfrac[0]) ? ntaps - 1 : 0;
417                func_ptr1(func_src, func_dst, func_src_strd, func_dst_strd,
418                          func_coeff, func_ht, func_wd);
419            }
420
421            /*Function 2*/
422            if(func_ptr2 != NULL)
423            {
424                func_src_strd = pu_wd;
425                func_src = pi2_tmp1 + (ntaps / 2 - 1) * func_src_strd;
426                func_dst = (weighted_pred || bi_pred) ?
427                                (void *)pi2_tmp1 : (void *)pu1_dst;
428
429                func_dst_strd = (weighted_pred || bi_pred) ?
430                                pu_wd : ref_strd;
431                func_coeff = coeff[ai2_yfrac[0]];
432                func_wd = pu_wd >> clr_indx;
433                func_ht = pu_ht >> clr_indx;
434                func_ptr2(func_src, func_dst, func_src_strd, func_dst_strd,
435                          func_coeff, func_ht, func_wd);
436            }
437
438            if(func_ptr3 != NULL)
439            {
440                func_src_strd = ref_strd;
441                func_src = (ai2_xfrac[1] && ai2_yfrac[1]) ?
442                                ref_pic_l1 - (ntaps / 2 - 1) * func_src_strd :
443                                ref_pic_l1;
444
445                func_dst = (weighted_pred || bi_pred) ?
446                                (void *)pi2_tmp2 : (void *)pu1_dst;
447                if(ai2_xfrac[1] && ai2_yfrac[1])
448                {
449                    func_dst = pi2_tmp2;
450                }
451                func_dst_strd = (weighted_pred || bi_pred
452                                || (ai2_xfrac[1] && ai2_yfrac[1])) ?
453                                pu_wd : ref_strd;
454                func_coeff = ai2_xfrac[1] ?
455                                coeff[ai2_xfrac[1]] : coeff[ai2_yfrac[1]];
456                func_wd = pu_wd >> clr_indx;
457                func_ht = pu_ht >> clr_indx;
458                func_ht += (ai2_xfrac[1] && ai2_yfrac[1]) ? ntaps - 1 : 0;
459                func_ptr3(func_src, func_dst, func_src_strd, func_dst_strd,
460                          func_coeff, func_ht, func_wd);
461
462            }
463
464            if(func_ptr4 != NULL)
465            {
466                func_src_strd = pu_wd;
467                func_src = pi2_tmp2 + (ntaps / 2 - 1) * func_src_strd;
468
469                func_dst = (weighted_pred || bi_pred) ?
470                                (void *)pi2_tmp2 : (void *)pu1_dst;
471                func_dst_strd = (weighted_pred || bi_pred) ?
472                                pu_wd : ref_strd;
473                func_coeff = coeff[ai2_yfrac[1]];
474                func_wd = pu_wd >> clr_indx;
475                func_ht = pu_ht >> clr_indx;
476                func_ptr4(func_src, func_dst, func_src_strd, func_dst_strd,
477                          func_coeff, func_ht, func_wd);
478
479            }
480
481            PROFILE_DISABLE_INTER_PRED_LUMA_AVERAGING(clr_indx);
482            PROFILE_DISABLE_INTER_PRED_CHROMA_AVERAGING(clr_indx);
483
484
485            if((weighted_pred != 0) && (bi_pred != 0))
486            {
487                lvl_shift1 = 0;
488                lvl_shift2 = 0;
489                if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0]))
490                    lvl_shift1 = (1 << 13);
491
492                if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1]))
493                    lvl_shift2 = (1 << 13);
494
495
496                if(0 == clr_indx)
497                {
498                    shift = ps_slice_hdr->s_wt_ofst.i1_luma_log2_weight_denom
499                                    + SHIFT_14_MINUS_BIT_DEPTH + 1;
500
501                    ps_codec->s_func_selector.ihevc_weighted_pred_bi_fptr(pi2_tmp1,
502                                                                          pi2_tmp2,
503                                                                          pu1_dst,
504                                                                          pu_wd,
505                                                                          pu_wd,
506                                                                          ref_strd,
507                                                                          luma_weight_l0,
508                                                                          luma_offset_l0,
509                                                                          luma_weight_l1,
510                                                                          luma_offset_l1,
511                                                                          shift,
512                                                                          lvl_shift1,
513                                                                          lvl_shift2,
514                                                                          pu_ht,
515                                                                          pu_wd);
516                }
517                else
518                {
519                    shift = ps_slice_hdr->s_wt_ofst.i1_chroma_log2_weight_denom
520                                    + SHIFT_14_MINUS_BIT_DEPTH + 1;
521
522                    if(chroma_yuv420sp_vu)
523                    {
524                        ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr(pi2_tmp1,
525                                                                                     pi2_tmp2,
526                                                                                     pu1_dst,
527                                                                                     pu_wd,
528                                                                                     pu_wd,
529                                                                                     ref_strd,
530                                                                                     chroma_weight_l0_cr,
531                                                                                     chroma_weight_l0_cb,
532                                                                                     chroma_offset_l0_cr,
533                                                                                     chroma_offset_l0_cb,
534                                                                                     chroma_weight_l1_cr,
535                                                                                     chroma_weight_l1_cb,
536                                                                                     chroma_offset_l1_cr,
537                                                                                     chroma_offset_l1_cb,
538                                                                                     shift,
539                                                                                     lvl_shift1,
540                                                                                     lvl_shift2,
541                                                                                     pu_ht >> 1,
542                                                                                     pu_wd >> 1);
543                    }
544                    else
545                    {
546                        ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr(pi2_tmp1,
547                                                                                     pi2_tmp2,
548                                                                                     pu1_dst,
549                                                                                     pu_wd,
550                                                                                     pu_wd,
551                                                                                     ref_strd,
552                                                                                     chroma_weight_l0_cb,
553                                                                                     chroma_weight_l0_cr,
554                                                                                     chroma_offset_l0_cb,
555                                                                                     chroma_offset_l0_cr,
556                                                                                     chroma_weight_l1_cb,
557                                                                                     chroma_weight_l1_cr,
558                                                                                     chroma_offset_l1_cb,
559                                                                                     chroma_offset_l1_cr,
560                                                                                     shift,
561                                                                                     lvl_shift1,
562                                                                                     lvl_shift2,
563                                                                                     pu_ht >> 1,
564                                                                                     pu_wd >> 1);
565                    }
566                }
567            }
568
569            else if((weighted_pred != 0) && (bi_pred == 0))
570            {
571                lvl_shift1 = 0;
572                if(ps_pu->b2_pred_mode == PRED_L0)
573                {
574                    if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0]))
575                        lvl_shift1 = (1 << 13);
576                }
577                else
578                {
579                    if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1]))
580                        lvl_shift1 = (1 << 13);
581                }
582
583                if(0 == clr_indx)
584                {
585                    shift = ps_slice_hdr->s_wt_ofst.i1_luma_log2_weight_denom
586                                    + SHIFT_14_MINUS_BIT_DEPTH;
587
588                    ps_codec->s_func_selector.ihevc_weighted_pred_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2,
589                                                                           pu1_dst,
590                                                                           pu_wd,
591                                                                           ref_strd,
592                                                                           ps_pu->b2_pred_mode == PRED_L0 ? luma_weight_l0 : luma_weight_l1,
593                                                                           ps_pu->b2_pred_mode == PRED_L0 ? luma_offset_l0 : luma_offset_l1,
594                                                                           shift,
595                                                                           lvl_shift1,
596                                                                           pu_ht,
597                                                                           pu_wd);
598                }
599                else
600                {
601                    shift = ps_slice_hdr->s_wt_ofst.i1_chroma_log2_weight_denom
602                                    + SHIFT_14_MINUS_BIT_DEPTH;
603
604                    if(chroma_yuv420sp_vu)
605                    {
606                        ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2,
607                                                                                      pu1_dst,
608                                                                                      pu_wd,
609                                                                                      ref_strd,
610                                                                                      ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cr : chroma_weight_l1_cr,
611                                                                                      ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cb : chroma_weight_l1_cb,
612                                                                                      ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cr : chroma_offset_l1_cr,
613                                                                                      ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cb : chroma_offset_l1_cb,
614                                                                                      shift,
615                                                                                      lvl_shift1,
616                                                                                      pu_ht >> 1,
617                                                                                      pu_wd >> 1);
618                    }
619                    else
620                    {
621                        ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2,
622                                                                                      pu1_dst,
623                                                                                      pu_wd,
624                                                                                      ref_strd,
625                                                                                      ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cb : chroma_weight_l1_cb,
626                                                                                      ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cr : chroma_weight_l1_cr,
627                                                                                      ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cb : chroma_offset_l1_cb,
628                                                                                      ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cr : chroma_offset_l1_cr,
629                                                                                      shift,
630                                                                                      lvl_shift1,
631                                                                                      pu_ht >> 1,
632                                                                                      pu_wd >> 1);
633                    }
634                }
635            }
636
637            else if((weighted_pred == 0) && (bi_pred != 0))
638            {
639                lvl_shift1 = 0;
640                lvl_shift2 = 0;
641                if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0]))
642                    lvl_shift1 = (1 << 13);
643
644                if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1]))
645                    lvl_shift2 = (1 << 13);
646
647                if(clr_indx != 0)
648                {
649                    pu_ht = (pu_ht >> 1);
650                }
651                ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr(pi2_tmp1,
652                                                                              pi2_tmp2,
653                                                                              pu1_dst,
654                                                                              pu_wd,
655                                                                              pu_wd,
656                                                                              ref_strd,
657                                                                              lvl_shift1,
658                                                                              lvl_shift2,
659                                                                              pu_ht,
660                                                                              pu_wd);
661
662            }
663        }
664    }
665}
666