1/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19 *******************************************************************************
20 * @file
21 *  ihevcd_iquant_itrans_recon_ctb.c
22 *
23 * @brief
24 *  Contains functions for inverse quantization, inverse transform and recon
25 *
26 * @author
27 *  Ittiam
28 *
29 * @par List of Functions:
30 * - ihevcd_iquant_itrans_recon_ctb()
31 *
32 * @remarks
33 *  None
34 *
35 *******************************************************************************
36 */
37/*****************************************************************************/
38/* File Includes                                                             */
39/*****************************************************************************/
40#include <stdio.h>
41#include <stddef.h>
42#include <stdlib.h>
43#include <string.h>
44
45#include "ihevc_typedefs.h"
46#include "iv.h"
47#include "ivd.h"
48#include "ihevcd_cxa.h"
49
50#include "ihevc_defs.h"
51#include "ihevc_debug.h"
52#include "ihevc_structs.h"
53#include "ihevc_cabac_tables.h"
54#include "ihevc_macros.h"
55#include "ihevc_platform_macros.h"
56
57#include "ihevcd_defs.h"
58#include "ihevcd_function_selector.h"
59#include "ihevcd_structs.h"
60#include "ihevcd_error.h"
61#include "ihevcd_bitstream.h"
62#include "ihevc_common_tables.h"
63
64/* Intra pred includes */
65#include "ihevc_intra_pred.h"
66
67/* Inverse transform common module includes */
68#include "ihevc_trans_tables.h"
69#include "ihevc_trans_macros.h"
70#include "ihevc_itrans_recon.h"
71#include "ihevc_recon.h"
72#include "ihevc_chroma_itrans_recon.h"
73#include "ihevc_chroma_recon.h"
74
75/* Decoder includes */
76#include "ihevcd_common_tables.h"
77#include "ihevcd_iquant_itrans_recon_ctb.h"
78#include "ihevcd_debug.h"
79#include "ihevcd_profile.h"
80#include "ihevcd_statistics.h"
81#include "ihevcd_itrans_recon_dc.h"
82
83static const UWORD32 gau4_ihevcd_4_bit_reverse[] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
84
85
86/* Globals */
87static const WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES] =
88  { IP_FUNC_MODE_0, /* Mode 0 */
89    IP_FUNC_MODE_1, /* Mode 1 */
90    IP_FUNC_MODE_2, /* Mode 2 */
91    IP_FUNC_MODE_3TO9, /* Mode 3 */
92    IP_FUNC_MODE_3TO9, /* Mode 4 */
93    IP_FUNC_MODE_3TO9, /* Mode 5 */
94    IP_FUNC_MODE_3TO9, /* Mode 6 */
95    IP_FUNC_MODE_3TO9, /* Mode 7 */
96    IP_FUNC_MODE_3TO9, /* Mode 8 */
97    IP_FUNC_MODE_3TO9, /* Mode 9 */
98    IP_FUNC_MODE_10, /* Mode 10 */
99    IP_FUNC_MODE_11TO17, /* Mode 11 */
100    IP_FUNC_MODE_11TO17, /* Mode 12 */
101    IP_FUNC_MODE_11TO17, /* Mode 13 */
102    IP_FUNC_MODE_11TO17, /* Mode 14 */
103    IP_FUNC_MODE_11TO17, /* Mode 15 */
104    IP_FUNC_MODE_11TO17, /* Mode 16 */
105    IP_FUNC_MODE_11TO17, /* Mode 17 */
106    IP_FUNC_MODE_18_34, /* Mode 18 */
107    IP_FUNC_MODE_19TO25, /* Mode 19 */
108    IP_FUNC_MODE_19TO25, /* Mode 20 */
109    IP_FUNC_MODE_19TO25, /* Mode 21 */
110    IP_FUNC_MODE_19TO25, /* Mode 22 */
111    IP_FUNC_MODE_19TO25, /* Mode 23 */
112    IP_FUNC_MODE_19TO25, /* Mode 24 */
113    IP_FUNC_MODE_19TO25, /* Mode 25 */
114    IP_FUNC_MODE_26, /* Mode 26 */
115    IP_FUNC_MODE_27TO33, /* Mode 27 */
116    IP_FUNC_MODE_27TO33, /* Mode 26 */
117    IP_FUNC_MODE_27TO33, /* Mode 29 */
118    IP_FUNC_MODE_27TO33, /* Mode 30 */
119    IP_FUNC_MODE_27TO33, /* Mode 31 */
120    IP_FUNC_MODE_27TO33, /* Mode 32 */
121    IP_FUNC_MODE_27TO33, /* Mode 33 */
122    IP_FUNC_MODE_18_34, /* Mode 34 */
123};
124
125
126const WORD16 *g_ai2_ihevc_trans_tables[] =
127  { &g_ai2_ihevc_trans_dst_4[0][0],
128    &g_ai2_ihevc_trans_4[0][0],
129    &g_ai2_ihevc_trans_8[0][0],
130    &g_ai2_ihevc_trans_16[0][0],
131    &g_ai2_ihevc_trans_32[0][0]
132};
133
134
135/*****************************************************************************/
136/* Function Prototypes                                                       */
137/*****************************************************************************/
138/* Returns number of ai2_level read from ps_sblk_coeff */
139UWORD8* ihevcd_unpack_coeffs(WORD16 *pi2_tu_coeff,
140                             WORD32 log2_trans_size,
141                             UWORD8 *pu1_tu_coeff_data,
142                             WORD16 *pi2_dequant_matrix,
143                             WORD32 qp_rem,
144                             WORD32 qp_div,
145                             TRANSFORM_TYPE e_trans_type,
146                             WORD32 trans_quant_bypass,
147                             UWORD32 *pu4_zero_cols,
148                             UWORD32 *pu4_zero_rows,
149                             UWORD32 *pu4_coeff_type,
150                             WORD16 *pi2_coeff_value)
151{
152    /* Generating coeffs from coeff-map */
153    WORD32 i;
154    WORD16 *pi2_sblk_ptr;
155    WORD32 subblk_pos_x, subblk_pos_y;
156    WORD32 sblk_scan_idx, coeff_raster_idx;
157    WORD32 sblk_non_zero_coeff_idx;
158    tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
159    UWORD8 u1_num_coded_sblks, u1_scan_type;
160    UWORD8 *pu1_new_tu_coeff_data;
161    WORD32 trans_size;
162    WORD32 xs, ys;
163    WORD32 trans_skip;
164    WORD16 iquant_out;
165    WORD32 shift_iq;
166    {
167        WORD32 bit_depth;
168
169        bit_depth = 8 + 0;
170        shift_iq = bit_depth + log2_trans_size - 5;
171    }
172    trans_size = (1 << log2_trans_size);
173
174    /* First byte points to number of coded blocks */
175    u1_num_coded_sblks = *pu1_tu_coeff_data++;
176
177    /* Next byte points to scan type */
178    u1_scan_type = *pu1_tu_coeff_data++;
179    /* 0th bit has trans_skip */
180    trans_skip = u1_scan_type & 1;
181    u1_scan_type >>= 1;
182
183    pi2_sblk_ptr = pi2_tu_coeff;
184
185    /* Initially all columns are assumed to be zero */
186    *pu4_zero_cols = 0xFFFFFFFF;
187    /* Initially all rows are assumed to be zero */
188    *pu4_zero_rows = 0xFFFFFFFF;
189
190    ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)(pu1_tu_coeff_data);
191
192    if(trans_skip)
193        memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
194
195    STATS_INIT_SBLK_AND_COEFF_POS();
196
197    /* DC only case */
198    if((e_trans_type != DST_4x4) && (1 == u1_num_coded_sblks)
199                    && (0 == ps_tu_sblk_coeff_data->u2_subblk_pos)
200                    && (1 == ps_tu_sblk_coeff_data->u2_sig_coeff_map))
201    {
202        *pu4_coeff_type = 1;
203
204        if(!trans_quant_bypass)
205        {
206            if(4 == trans_size)
207            {
208                IQUANT_4x4(iquant_out,
209                           ps_tu_sblk_coeff_data->ai2_level[0],
210                           pi2_dequant_matrix[0]
211                                           * g_ihevc_iquant_scales[qp_rem],
212                           shift_iq, qp_div);
213            }
214            else
215            {
216                IQUANT(iquant_out, ps_tu_sblk_coeff_data->ai2_level[0],
217                       pi2_dequant_matrix[0] * g_ihevc_iquant_scales[qp_rem],
218                       shift_iq, qp_div);
219            }
220            if(trans_skip)
221                iquant_out = (iquant_out + 16) >> 5;
222        }
223        else
224        {
225            /* setting the column to zero */
226            for(i = 0; i < trans_size; i++)
227                *(pi2_tu_coeff + i * trans_size) = 0;
228
229            iquant_out = ps_tu_sblk_coeff_data->ai2_level[0];
230        }
231        *pi2_coeff_value = iquant_out;
232        *pi2_tu_coeff = iquant_out;
233        *pu4_zero_cols &= ~0x1;
234        *pu4_zero_rows &= ~0x1;
235        ps_tu_sblk_coeff_data =
236                        (void *)&ps_tu_sblk_coeff_data->ai2_level[1];
237
238        STATS_UPDATE_COEFF_COUNT();
239        STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass),  0, 0);
240        STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
241        return ((UWORD8 *)ps_tu_sblk_coeff_data);
242    }
243    else
244    {
245        *pu4_coeff_type = 0;
246        /* In case of trans skip, memset has already happened */
247        if(!trans_skip)
248            memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
249    }
250
251    for(i = 0; i < u1_num_coded_sblks; i++)
252    {
253        UWORD32 u4_sig_coeff_map;
254        subblk_pos_x = ps_tu_sblk_coeff_data->u2_subblk_pos & 0x00FF;
255        subblk_pos_y = (ps_tu_sblk_coeff_data->u2_subblk_pos & 0xFF00) >> 8;
256
257        STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), subblk_pos_x, subblk_pos_y);
258
259        subblk_pos_x = subblk_pos_x * MIN_TU_SIZE;
260        subblk_pos_y = subblk_pos_y * MIN_TU_SIZE;
261
262        pi2_sblk_ptr = pi2_tu_coeff + subblk_pos_y * trans_size
263                        + subblk_pos_x;
264
265        //*pu4_zero_cols &= ~(0xF << subblk_pos_x);
266
267        sblk_non_zero_coeff_idx = 0;
268        u4_sig_coeff_map = ps_tu_sblk_coeff_data->u2_sig_coeff_map;
269        //for(sblk_scan_idx = (31 - CLZ(u4_sig_coeff_map)); sblk_scan_idx >= 0; sblk_scan_idx--)
270        sblk_scan_idx = 31;
271        do
272        {
273            WORD32 clz = CLZ(u4_sig_coeff_map);
274
275            sblk_scan_idx -= clz;
276            /* when clz is 31, u4_sig_coeff_map << (clz+1) might result in unknown behaviour in some cases */
277            /* Hence either use SHL which takes care of handling these issues based on platform or shift in two stages */
278            u4_sig_coeff_map = u4_sig_coeff_map << clz;
279            /* Copying coeffs and storing in reverse order */
280            {
281                STATS_UPDATE_COEFF_COUNT();
282                coeff_raster_idx =
283                                gau1_ihevc_invscan4x4[u1_scan_type][sblk_scan_idx];
284
285                xs = coeff_raster_idx & 0x3;
286                ys = coeff_raster_idx >> 2;
287
288                if(!trans_quant_bypass)
289                {
290                    if(4 == trans_size)
291                    {
292                        IQUANT_4x4(iquant_out,
293                                   ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
294                                   pi2_dequant_matrix[(subblk_pos_x + xs)
295                                                   + (subblk_pos_y + ys)
296                                                   * trans_size]
297                                   * g_ihevc_iquant_scales[qp_rem],
298                                   shift_iq, qp_div);
299                        sblk_non_zero_coeff_idx++;
300                    }
301                    else
302                    {
303                        IQUANT(iquant_out,
304                               ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
305                               pi2_dequant_matrix[(subblk_pos_x + xs)
306                                               + (subblk_pos_y + ys)
307                                               * trans_size]
308                               * g_ihevc_iquant_scales[qp_rem],
309                               shift_iq, qp_div);
310                        sblk_non_zero_coeff_idx++;
311                    }
312
313                    if(trans_skip)
314                        iquant_out = (iquant_out + 16) >> 5;
315                }
316                else
317                {
318                    iquant_out = ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx++];
319                }
320                *pu4_zero_cols &= ~(0x1 << (subblk_pos_x + xs));
321                *pu4_zero_rows &= ~(0x1 << (subblk_pos_y + ys));
322                *(pi2_sblk_ptr + xs + ys * trans_size) = iquant_out;
323            }
324            sblk_scan_idx--;
325            u4_sig_coeff_map <<= 1;
326
327        }while(u4_sig_coeff_map);
328        /* Updating the sblk pointer */
329        ps_tu_sblk_coeff_data =
330                        (void *)&ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx];
331    }
332
333    STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
334
335    pu1_new_tu_coeff_data = (UWORD8 *)ps_tu_sblk_coeff_data;
336
337    return pu1_new_tu_coeff_data;
338}
339
340WORD32 ihevcd_get_intra_nbr_flag(process_ctxt_t *ps_proc,
341                                 tu_t *ps_tu,
342                                 UWORD32 *pu4_intra_nbr_avail,
343                                 WORD16 i2_pic_width_in_luma_samples,
344                                 UWORD8 i1_constrained_intra_pred_flag,
345                                 WORD32 trans_size,
346                                 WORD32 ctb_size)
347{
348    sps_t *ps_sps;
349    UWORD8 u1_bot_lt_avail, u1_left_avail, u1_top_avail, u1_top_rt_avail,
350                    u1_top_lt_avail;
351    WORD32 x_cur, y_cur, x_nbr, y_nbr;
352    UWORD8 *pu1_nbr_intra_flag;
353    UWORD8 *pu1_pic_intra_flag;
354    UWORD8 top_right, top, top_left, left, bot_left;
355    WORD32 intra_pos;
356    WORD32 num_8_blks, num_8_blks_in_bits;
357    WORD32 numbytes_row = (i2_pic_width_in_luma_samples + 63) / 64;
358    WORD32 cur_x, cur_y;
359    WORD32 i;
360    WORD32 nbr_flags;
361
362    ps_sps = ps_proc->ps_sps;
363    cur_x = ps_tu->b4_pos_x;
364    cur_y = ps_tu->b4_pos_y;
365
366    u1_bot_lt_avail = (pu4_intra_nbr_avail[1 + cur_y + trans_size / MIN_TU_SIZE]
367                    >> (31 - (1 + cur_x - 1))) & 1;
368    u1_left_avail = (pu4_intra_nbr_avail[1 + cur_y] >> (31 - (1 + cur_x - 1)))
369                    & 1;
370    u1_top_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] >> (31 - (1 + cur_x)))
371                    & 1;
372    u1_top_rt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
373                    >> (31 - (1 + cur_x + trans_size / MIN_TU_SIZE))) & 1;
374    u1_top_lt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
375                    >> (31 - (1 + cur_x - 1))) & 1;
376
377    x_cur = ps_proc->i4_ctb_x * ctb_size + cur_x * MIN_TU_SIZE;
378    y_cur = ps_proc->i4_ctb_y * ctb_size + cur_y * MIN_TU_SIZE;
379
380    pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
381
382    /* WORD32 nbr_flags as below  MSB --> LSB */
383    /*    Top-Left | Top-Right | Top | Left | Bottom-Left
384     *       1         4         4     4         4
385     */
386    bot_left = 0;
387    left = 0;
388    top_right = 0;
389    top = 0;
390    top_left = 0;
391
392    num_8_blks = trans_size > 4 ? trans_size / 8 : 1;
393    num_8_blks_in_bits = ((1 << num_8_blks) - 1);
394
395    if(i1_constrained_intra_pred_flag)
396    {
397        /* TODO: constrained intra pred not tested */
398        if(u1_bot_lt_avail)
399        {
400            x_nbr = x_cur - 1;
401            y_nbr = y_cur + trans_size;
402
403            pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
404                            + x_nbr / 64;
405            intra_pos = ((x_nbr / 8) % 8);
406            for(i = 0; i < num_8_blks; i++)
407            {
408                bot_left |= ((*(pu1_nbr_intra_flag + i * numbytes_row)
409                                >> intra_pos) & 1) << i;
410            }
411            bot_left &= num_8_blks_in_bits;
412        }
413        if(u1_left_avail)
414        {
415            x_nbr = x_cur - 1;
416            y_nbr = y_cur;
417
418            pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
419                            + x_nbr / 64;
420            intra_pos = ((x_nbr / 8) % 8);
421
422            for(i = 0; i < num_8_blks; i++)
423            {
424                left |= ((*(pu1_nbr_intra_flag + i * numbytes_row) >> intra_pos)
425                                & 1) << i;
426            }
427            left &= num_8_blks_in_bits;
428        }
429        if(u1_top_avail)
430        {
431            x_nbr = x_cur;
432            y_nbr = y_cur - 1;
433
434            pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
435                            + x_nbr / 64;
436            intra_pos = ((x_nbr / 8) % 8);
437
438            top = (*pu1_nbr_intra_flag >> intra_pos);
439            top &= num_8_blks_in_bits;
440            /*
441             for(i=0;i<num_8_blks;i++)
442             {
443             top |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
444             }
445             */
446        }
447        if(u1_top_rt_avail)
448        {
449            x_nbr = x_cur + trans_size;
450            y_nbr = y_cur - 1;
451
452            pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
453                            + x_nbr / 64;
454            intra_pos = ((x_nbr / 8) % 8);
455
456            top_right = (*pu1_nbr_intra_flag >> intra_pos);
457            top_right &= num_8_blks_in_bits;
458            /*
459             for(i=0;i<num_8_blks;i++)
460             {
461             top_right |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
462             }
463             */
464        }
465        if(u1_top_lt_avail)
466        {
467            x_nbr = x_cur - 1;
468            y_nbr = y_cur - 1;
469
470            pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
471                            + x_nbr / 64;
472            intra_pos = ((x_nbr / 8) % 8);
473
474            top_left = (*pu1_nbr_intra_flag >> intra_pos) & 1;
475        }
476    }
477    else
478    {
479        if(u1_top_avail)
480            top = 0xF;
481        if(u1_top_rt_avail)
482            top_right = 0xF;
483        if(u1_bot_lt_avail)
484            bot_left = 0xF;
485        if(u1_left_avail)
486            left = 0xF;
487        if(u1_top_lt_avail)
488            top_left = 0x1;
489    }
490
491    /* Handling incomplete CTBs */
492    {
493        WORD32 pu_size_limit = MIN(trans_size, 8);
494        WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples
495                        - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size)
496                        - (ps_tu->b4_pos_x * MIN_TU_SIZE)
497                        - (1 << (ps_tu->b3_size + 2));
498        /* ctb_size_top gives number of valid pixels remaining in the current row */
499        WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
500        WORD32 ctb_size_top_bits = (1 << (ctb_size_top / pu_size_limit)) - 1;
501
502        WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples
503                        - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size)
504                        - (ps_tu->b4_pos_y * MIN_TU_SIZE)
505                        - (1 << (ps_tu->b3_size + 2));
506        /* ctb_size_bot gives number of valid pixels remaining in the current column */
507        WORD32 ctb_size_bot = MIN(ctb_size, rows_remaining);
508        WORD32 ctb_size_bot_bits = (1 << (ctb_size_bot / pu_size_limit)) - 1;
509
510        top_right &= ctb_size_top_bits;
511        bot_left &= ctb_size_bot_bits;
512    }
513
514    /*    Top-Left | Top-Right | Top | Left | Bottom-Left
515     *      1         4         4     4         4
516     */
517
518    /*
519     nbr_flags = (top_left << 16) | (gau4_ihevcd_4_bit_reverse[top_right] << 12) | (gau4_ihevcd_4_bit_reverse[top] << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
520     | gau4_ihevcd_4_bit_reverse[bot_left];
521     */
522    nbr_flags = (top_left << 16) | (top_right << 12) | (top << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
523                    | gau4_ihevcd_4_bit_reverse[bot_left];
524
525
526    return nbr_flags;
527
528}
529
530WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc)
531{
532    WORD16 *pi2_scaling_mat;
533    UWORD8 *pu1_y_dst_ctb;
534    UWORD8 *pu1_uv_dst_ctb;
535    WORD32 ctb_size;
536    codec_t *ps_codec;
537    slice_header_t *ps_slice_hdr;
538    tu_t *ps_tu;
539    WORD16 *pi2_ctb_coeff;
540    WORD32 tu_cnt;
541    WORD16 *pi2_tu_coeff;
542    WORD16 *pi2_tmp;
543    WORD32 pic_strd;
544    WORD32 luma_nbr_flags;
545    WORD32 chroma_nbr_flags = 0;
546    UWORD8 u1_luma_pred_mode_first_tu = 0;
547    /* Pointers for generating 2d coeffs from coeff-map */
548    UWORD8 *pu1_tu_coeff_data;
549    /* nbr avail map for CTB */
550    /* 1st bit points to neighbor (left/top_left/bot_left) */
551    /* 1Tb starts at 2nd bit from msb of 2nd value in array, followed by number of min_tu's in that ctb */
552    UWORD32 au4_intra_nbr_avail[MAX_CTB_SIZE / MIN_TU_SIZE
553                    + 2 /* Top nbr + bot nbr */]; UWORD32
554                    top_avail_bits;
555    sps_t *ps_sps;
556    pps_t *ps_pps;
557    WORD32 intra_flag;
558    UWORD8 *pu1_pic_intra_flag;
559    /*************************************************************************/
560    /* Contanis scaling matrix offset in the following order in a 1D buffer  */
561    /* Intra 4 x 4 Y, 4 x 4 U, 4 x 4 V                                       */
562    /* Inter 4 x 4 Y, 4 x 4 U, 4 x 4 V                                       */
563    /* Intra 8 x 8 Y, 8 x 8 U, 8 x 8 V                                       */
564    /* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V                                       */
565    /* Intra 16x16 Y, 16x16 U, 16x16 V                                       */
566    /* Inter 16x16 Y, 16x16 U, 16x16 V                                       */
567    /* Intra 32x32 Y                                                         */
568    /* Inter 32x32 Y                                                         */
569    /*************************************************************************/
570    /* Only first 20 entries are used. Array is extended to avoid out of bound
571       reads. Skip CUs (64x64) read this table, but don't really use the value */
572    static const WORD32 scaling_mat_offset[] =
573      { 0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992,
574        1248, 1504, 1760, 2016, 3040, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
575
576    PROFILE_DISABLE_IQ_IT_RECON_INTRA_PRED();
577
578    ps_sps = ps_proc->ps_sps;
579    ps_pps = ps_proc->ps_pps;
580    ps_slice_hdr = ps_proc->ps_slice_hdr;
581    ps_codec = ps_proc->ps_codec;
582
583    pu1_y_dst_ctb = ps_proc->pu1_cur_ctb_luma;
584    pu1_uv_dst_ctb = ps_proc->pu1_cur_ctb_chroma;
585
586    pi2_ctb_coeff = ps_proc->pi2_invscan_out;
587
588    ctb_size = (1 << ps_sps->i1_log2_ctb_size);
589    pu1_tu_coeff_data = (UWORD8 *)ps_proc->pv_tu_coeff_data;
590
591    pic_strd = ps_codec->i4_strd;
592
593    pi2_tmp = ps_proc->pi2_itrans_intrmd_buf;
594
595    pi2_tu_coeff = pi2_ctb_coeff;
596
597    ps_tu = ps_proc->ps_tu;
598
599    if((1 == ps_sps->i1_scaling_list_enable_flag) && (1 == ps_pps->i1_pps_scaling_list_data_present_flag))
600    {
601        pi2_scaling_mat = ps_pps->pi2_scaling_mat;
602    }
603    else
604    {
605        pi2_scaling_mat = ps_sps->pi2_scaling_mat;
606    }
607
608    {
609        /* Updating the initial availability map */
610        WORD32 i;
611        UWORD8 u1_left_ctb_avail, u1_top_lt_ctb_avail, u1_top_rt_ctb_avail,
612                        u1_top_ctb_avail;
613
614        u1_left_ctb_avail = ps_proc->u1_left_ctb_avail;
615        u1_top_lt_ctb_avail = ps_proc->u1_top_lt_ctb_avail;
616        u1_top_ctb_avail = ps_proc->u1_top_ctb_avail;
617        u1_top_rt_ctb_avail = ps_proc->u1_top_rt_ctb_avail;
618
619        /* Initializing the availability array */
620        memset(au4_intra_nbr_avail, 0,
621               (MAX_CTB_SIZE / MIN_TU_SIZE + 2) * sizeof(UWORD32));
622        /* Initializing the availability array with CTB level availability flags */
623        {
624            WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size);
625            WORD32 ctb_size_left = MIN(ctb_size, rows_remaining);
626            for(i = 0; i < ctb_size_left / MIN_TU_SIZE; i++)
627            {
628                au4_intra_nbr_avail[i + 1] = ((UWORD32)u1_left_ctb_avail << 31);
629            }
630        }
631        au4_intra_nbr_avail[0] |= (((UWORD32)u1_top_rt_ctb_avail << 31)
632                        >> (1 + ctb_size / MIN_TU_SIZE)); /* 1+ctb_size/4 position bit pos from msb */
633
634        au4_intra_nbr_avail[0] |= ((UWORD32)u1_top_lt_ctb_avail << 31);
635
636        {
637            WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size);
638            WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
639            WORD32 shift = (31 - (ctb_size / MIN_TU_SIZE));
640
641            /* ctb_size_top gives number of valid pixels remaining in the current row */
642            /* Since we need pattern of 1's starting from the MSB, an additional shift */
643            /* is needed */
644            shift += ((ctb_size - ctb_size_top) / MIN_TU_SIZE);
645
646            top_avail_bits = ((1 << (ctb_size_top / MIN_TU_SIZE)) - 1)
647                            << shift;
648        }
649        au4_intra_nbr_avail[0] |= (
650                        (u1_top_ctb_avail == 1) ? top_avail_bits : 0x0);
651        /* Starting from msb 2nd bit to (1+ctb_size/4) bit, set 1 if top avail,or 0 */
652
653    }
654
655    /* Applying Inverse transform on all the TU's in CTB */
656    for(tu_cnt = 0; tu_cnt < ps_proc->i4_ctb_tu_cnt; tu_cnt++, ps_tu++)
657    {
658        WORD32 transform_skip_flag = 0;
659        WORD32 transform_skip_flag_v = 0;
660        WORD32 num_comp, c_idx, func_idx;
661        WORD32 src_strd, pred_strd, dst_strd;
662        WORD32 qp_div = 0, qp_rem = 0;
663        WORD32 qp_div_v = 0, qp_rem_v = 0;
664        UWORD32 zero_cols = 0, zero_cols_v = 0;
665        UWORD32 zero_rows = 0, zero_rows_v = 0;
666        UWORD32 coeff_type = 0, coeff_type_v = 0;
667        WORD16 i2_coeff_value, i2_coeff_value_v;
668        WORD32 trans_size = 0;
669        TRANSFORM_TYPE e_trans_type;
670        WORD32 log2_y_trans_size_minus_2, log2_uv_trans_size_minus_2;
671        WORD32 log2_trans_size;
672        WORD32 chroma_qp_idx;
673        WORD16 *pi2_src = NULL, *pi2_src_v = NULL;
674        UWORD8 *pu1_pred = NULL, *pu1_pred_v = NULL;
675        UWORD8 *pu1_dst = NULL, *pu1_dst_v = NULL;
676        WORD16 *pi2_dequant_matrix = NULL, *pi2_dequant_matrix_v = NULL;
677        WORD32 tu_x, tu_y;
678        WORD32 tu_y_offset, tu_uv_offset;
679        WORD8 i1_chroma_pic_qp_offset, i1_chroma_slice_qp_offset;
680        UWORD8 u1_cbf = 0, u1_cbf_v = 0, u1_luma_pred_mode, u1_chroma_pred_mode;
681        WORD32 luma_nbr_flags_4x4[4];
682        WORD32 offset;
683        WORD32 pcm_flag;
684        WORD32  chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU);
685        /* If 420SP_VU is chroma format, pred and dst pointer   */
686        /* will be added +1 to point to U                       */
687        WORD32 chroma_yuv420sp_vu_u_offset = 1 * chroma_yuv420sp_vu;
688        /* If 420SP_VU is chroma format, pred and dst pointer   */
689        /* will be added U offset of +1 and subtracted 2        */
690        /* to point to V                                        */
691        WORD32 chroma_yuv420sp_vu_v_offset = -2 * chroma_yuv420sp_vu;
692
693        tu_x = ps_tu->b4_pos_x * 4; /* Converting minTU unit to pixel unit */
694        tu_y = ps_tu->b4_pos_y * 4; /* Converting minTU unit to pixel unit */
695        {
696            WORD32 tu_abs_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (tu_x);
697            WORD32 tu_abs_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (tu_y);
698
699            WORD32 numbytes_row =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
700
701            pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
702            pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
703            pu1_pic_intra_flag += (tu_abs_x >> 6);
704
705            intra_flag = *pu1_pic_intra_flag;
706            intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
707        }
708
709        u1_luma_pred_mode = ps_tu->b6_luma_intra_mode;
710        u1_chroma_pred_mode = ps_tu->b3_chroma_intra_mode_idx;
711
712        if(u1_chroma_pred_mode != 7)
713            num_comp = 2; /* Y and UV */
714        else
715            num_comp = 1; /* Y */
716
717
718        pcm_flag = 0;
719
720        if((intra_flag) && (u1_luma_pred_mode == INTRA_PRED_NONE))
721        {
722            UWORD8 *pu1_buf;
723            UWORD8 *pu1_y_dst = pu1_y_dst_ctb;
724            UWORD8 *pu1_uv_dst = pu1_uv_dst_ctb;
725            WORD32 i, j;
726            tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
727            WORD32 cb_size = 1 << (ps_tu->b3_size + 2);
728
729            /* trans_size is used to update availability after reconstruction */
730            trans_size = cb_size;
731
732            pcm_flag = 1;
733
734            tu_y_offset = tu_x + tu_y * pic_strd;
735            pu1_y_dst += tu_x + tu_y * pic_strd;
736            pu1_uv_dst += tu_x + (tu_y >> 1) * pic_strd;
737
738            /* First byte points to number of coded blocks */
739            pu1_tu_coeff_data++;
740
741            /* Next byte points to scan type */
742            pu1_tu_coeff_data++;
743
744            ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)pu1_tu_coeff_data;
745
746            pu1_buf = (UWORD8 *)&ps_tu_sblk_coeff_data->ai2_level[0];
747            {
748
749                for(i = 0; i < cb_size; i++)
750                {
751                    //pu1_y_dst[i * pic_strd + j] = *pu1_buf++;
752                    memcpy(&pu1_y_dst[i * pic_strd], pu1_buf, cb_size);
753                    pu1_buf += cb_size;
754                }
755
756                pu1_uv_dst = pu1_uv_dst + chroma_yuv420sp_vu_u_offset;
757
758                /* U */
759                for(i = 0; i < cb_size / 2; i++)
760                {
761                    for(j = 0; j < cb_size / 2; j++)
762                    {
763                        pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++;
764                    }
765                }
766
767                pu1_uv_dst = pu1_uv_dst + 1 + chroma_yuv420sp_vu_v_offset;
768
769                /* V */
770                for(i = 0; i < cb_size / 2; i++)
771                {
772                    for(j = 0; j < cb_size / 2; j++)
773                    {
774                        pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++;
775                    }
776                }
777            }
778
779            pu1_tu_coeff_data = pu1_buf;
780
781        }
782
783
784
785
786
787        for(c_idx = 0; c_idx < num_comp; c_idx++)
788        {
789            if(0 == pcm_flag)
790            {
791                /* Initializing variables */
792                pred_strd = pic_strd;
793                dst_strd = pic_strd;
794
795                if(c_idx == 0) /* Y */
796                {
797                    log2_y_trans_size_minus_2 = ps_tu->b3_size;
798                    trans_size = 1 << (log2_y_trans_size_minus_2 + 2);
799                    log2_trans_size = log2_y_trans_size_minus_2 + 2;
800
801                    tu_y_offset = tu_x + tu_y * pic_strd;
802
803                    pi2_src = pi2_tu_coeff;
804                    pu1_pred = pu1_y_dst_ctb + tu_y_offset;
805                    pu1_dst = pu1_y_dst_ctb + tu_y_offset;
806
807                    /* Calculating scaling matrix offset */
808                    offset = log2_y_trans_size_minus_2 * 6
809                                    + (!intra_flag)
810                                    * ((log2_y_trans_size_minus_2
811                                                    == 3) ? 1 : 3)
812                                    + c_idx;
813                    pi2_dequant_matrix = pi2_scaling_mat
814                                    + scaling_mat_offset[offset];
815
816                    src_strd = trans_size;
817
818                    /* 4x4 transform Luma in INTRA mode is DST */
819                    if(log2_y_trans_size_minus_2 == 0 && intra_flag)
820                    {
821                        func_idx = log2_y_trans_size_minus_2;
822                        e_trans_type = DST_4x4;
823                    }
824                    else
825                    {
826                        func_idx = log2_y_trans_size_minus_2 + 1;
827                        e_trans_type = (TRANSFORM_TYPE)(log2_y_trans_size_minus_2 + 1);
828                    }
829
830                    qp_div = ps_tu->b7_qp / 6;
831                    qp_rem = ps_tu->b7_qp % 6;
832
833                    u1_cbf = ps_tu->b1_y_cbf;
834
835                    transform_skip_flag = pu1_tu_coeff_data[1] & 1;
836                    /* Unpacking coeffs */
837                    if(1 == u1_cbf)
838                    {
839                        pu1_tu_coeff_data = ihevcd_unpack_coeffs(
840                                        pi2_src, log2_y_trans_size_minus_2 + 2,
841                                        pu1_tu_coeff_data, pi2_dequant_matrix,
842                                        qp_rem, qp_div, e_trans_type,
843                                        ps_tu->b1_transquant_bypass, &zero_cols,
844                                        &zero_rows, &coeff_type,
845                                        &i2_coeff_value);
846                    }
847                }
848                else /* UV interleaved */
849                {
850                    /* Chroma :If Transform size is 4x4, keep 4x4 else do transform on (trans_size/2 x trans_size/2) */
851                    if(ps_tu->b3_size == 0)
852                    {
853                        /* Chroma 4x4 is present with 4th luma 4x4 block. For this case chroma postion has to be (luma pos x- 4,luma pos y- 4) */
854                        log2_uv_trans_size_minus_2 = ps_tu->b3_size;
855                        tu_uv_offset = (tu_x - 4) + ((tu_y - 4) / 2) * pic_strd;
856                    }
857                    else
858                    {
859                        log2_uv_trans_size_minus_2 = ps_tu->b3_size - 1;
860                        tu_uv_offset = tu_x + (tu_y >> 1) * pic_strd;
861                    }
862                    trans_size = 1 << (log2_uv_trans_size_minus_2 + 2);
863                    log2_trans_size = log2_uv_trans_size_minus_2 + 2;
864
865                    pi2_src = pi2_tu_coeff;
866                    pi2_src_v = pi2_tu_coeff + trans_size * trans_size;
867                    pu1_pred = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
868                    pu1_pred_v = pu1_pred + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
869                    pu1_dst = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
870                    pu1_dst_v = pu1_dst + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
871
872                    /*TODO: Add support for choosing different tables for U and V,
873                     * change this to a single array to handle flat/default/custom, intra/inter, luma/chroma and various sizes
874                     */
875                    /* Calculating scaling matrix offset */
876                    /* ((log2_uv_trans_size_minus_2 == 3) ? 1:3) condition check is not needed, since
877                     * max uv trans size is 16x16
878                     */
879                    offset = log2_uv_trans_size_minus_2 * 6
880                                    + (!intra_flag) * 3 + c_idx;
881                    pi2_dequant_matrix = pi2_scaling_mat
882                                    + scaling_mat_offset[offset];
883                    pi2_dequant_matrix_v = pi2_scaling_mat
884                                    + scaling_mat_offset[offset + 1];
885
886                    src_strd = trans_size;
887
888                    func_idx = 1 + 4 + log2_uv_trans_size_minus_2; /* DST func + Y funcs + cur func index*/
889
890                    /* Handle error cases where 64x64 TU is signalled which results in 32x32 chroma.
891                     * By limiting func_idx to 7, max of 16x16 chroma is called */
892                    func_idx = MIN(func_idx, 7);
893
894                    e_trans_type = (TRANSFORM_TYPE)(log2_uv_trans_size_minus_2 + 1);
895                    /* QP for U */
896                    i1_chroma_pic_qp_offset = ps_pps->i1_pic_cb_qp_offset;
897                    i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset;
898                    u1_cbf = ps_tu->b1_cb_cbf;
899
900                    chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset
901                                    + i1_chroma_slice_qp_offset;
902                    chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
903                    qp_div = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
904                    qp_rem = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
905
906                    /* QP for V */
907                    i1_chroma_pic_qp_offset = ps_pps->i1_pic_cr_qp_offset;
908                    i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cr_qp_offset;
909                    u1_cbf_v = ps_tu->b1_cr_cbf;
910
911                    chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset
912                                    + i1_chroma_slice_qp_offset;
913                    chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
914                    qp_div_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
915                    qp_rem_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
916
917                    /* Unpacking coeffs */
918                    transform_skip_flag = pu1_tu_coeff_data[1] & 1;
919                    if(1 == u1_cbf)
920                    {
921                        pu1_tu_coeff_data = ihevcd_unpack_coeffs(
922                                        pi2_src, log2_uv_trans_size_minus_2 + 2,
923                                        pu1_tu_coeff_data, pi2_dequant_matrix,
924                                        qp_rem, qp_div, e_trans_type,
925                                        ps_tu->b1_transquant_bypass, &zero_cols,
926                                        &zero_rows, &coeff_type,
927                                        &i2_coeff_value);
928                    }
929
930                    transform_skip_flag_v = pu1_tu_coeff_data[1] & 1;
931                    if(1 == u1_cbf_v)
932                    {
933                        pu1_tu_coeff_data = ihevcd_unpack_coeffs(
934                                        pi2_src_v, log2_uv_trans_size_minus_2 + 2,
935                                        pu1_tu_coeff_data, pi2_dequant_matrix_v,
936                                        qp_rem_v, qp_div_v, e_trans_type,
937                                        ps_tu->b1_transquant_bypass, &zero_cols_v,
938                                        &zero_rows_v, &coeff_type_v, &i2_coeff_value_v);
939                    }
940                }
941                /***************************************************************/
942                /******************  Intra Prediction **************************/
943                /***************************************************************/
944                if(intra_flag) /* Intra */
945                {
946                    /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actaul size needed,
947                       au1_ref_sub_out size is kept as multiple of 8,
948                       so that SIMD functions can load 64 bits */
949                    UWORD8 au1_ref_sub_out[(MAX_TU_SIZE * 2 * 2) + 8];
950                    UWORD8 *pu1_top_left, *pu1_top, *pu1_left;
951                    WORD32 luma_pred_func_idx, chroma_pred_func_idx;
952
953                    /* Get the neighbour availability flags */
954                    /* Done for only Y */
955                    if(c_idx == 0)
956                    {
957                        /* Get neighbor availability for Y only */
958                        luma_nbr_flags = ihevcd_get_intra_nbr_flag(ps_proc,
959                                                                   ps_tu,
960                                                                   au4_intra_nbr_avail,
961                                                                   ps_sps->i2_pic_width_in_luma_samples,
962                                                                   ps_pps->i1_constrained_intra_pred_flag,
963                                                                   trans_size,
964                                                                   ctb_size);
965
966                        if(trans_size == 4)
967                            luma_nbr_flags_4x4[(ps_tu->b4_pos_x % 2) + (ps_tu->b4_pos_y % 2) * 2] = luma_nbr_flags;
968
969                        if((ps_tu->b4_pos_x % 2 == 0) && (ps_tu->b4_pos_y % 2 == 0))
970                        {
971                            chroma_nbr_flags = luma_nbr_flags;
972                        }
973
974                        /* Initializing nbr pointers */
975                        pu1_top = pu1_pred - pic_strd;
976                        pu1_left = pu1_pred - 1;
977                        pu1_top_left = pu1_pred - pic_strd - 1;
978
979                        /* call reference array substitution */
980                        if(luma_nbr_flags == 0x1ffff)
981                            ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr(
982                                            pu1_top_left,
983                                            pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, au1_ref_sub_out, 1);
984                        else
985                            ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr(
986                                            pu1_top_left,
987                                            pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, au1_ref_sub_out, 1);
988
989                        /* call reference filtering */
990                        ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr(
991                                        au1_ref_sub_out, trans_size,
992                                        au1_ref_sub_out,
993                                        u1_luma_pred_mode, ps_sps->i1_strong_intra_smoothing_enable_flag);
994
995                        /* use the look up to get the function idx */
996                        luma_pred_func_idx = g_i4_ip_funcs[u1_luma_pred_mode];
997
998                        /* call the intra prediction function */
999                        ps_codec->apf_intra_pred_luma[luma_pred_func_idx](au1_ref_sub_out, 1, pu1_pred, pred_strd, trans_size, u1_luma_pred_mode);
1000                    }
1001                    else
1002                    {
1003                        /* In case of yuv420sp_vu, prediction happens as usual.         */
1004                        /* So point the pu1_pred pointer to original prediction pointer */
1005                        UWORD8 *pu1_pred_orig = pu1_pred - chroma_yuv420sp_vu_u_offset;
1006
1007                        /*    Top-Left | Top-Right | Top | Left | Bottom-Left
1008                         *      1         4         4     4         4
1009                         *
1010                         * Generating chroma_nbr_flags depending upon the transform size */
1011                        if(ps_tu->b3_size == 0)
1012                        {
1013                            /* Take TL,T,L flags of First luma 4x4 block */
1014                            chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0);
1015                            /* Take TR flags of Second luma 4x4 block */
1016                            chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000);
1017                            /* Take BL flags of Third luma 4x4 block */
1018                            chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F);
1019                        }
1020
1021                        /* Initializing nbr pointers */
1022                        pu1_top = pu1_pred_orig - pic_strd;
1023                        pu1_left = pu1_pred_orig - 2;
1024                        pu1_top_left = pu1_pred_orig - pic_strd - 2;
1025
1026                        /* Chroma pred  mode derivation from luma pred mode */
1027                        {
1028                            tu_t *ps_tu_tmp = ps_tu;
1029                            while(!ps_tu_tmp->b1_first_tu_in_cu)
1030                            {
1031                                ps_tu_tmp--;
1032                            }
1033                            u1_luma_pred_mode_first_tu = ps_tu_tmp->b6_luma_intra_mode;
1034                        }
1035                        if(4 == u1_chroma_pred_mode)
1036                            u1_chroma_pred_mode = u1_luma_pred_mode_first_tu;
1037                        else
1038                        {
1039                            u1_chroma_pred_mode = gau1_intra_pred_chroma_modes[u1_chroma_pred_mode];
1040
1041                            if(u1_chroma_pred_mode ==
1042                                                            u1_luma_pred_mode_first_tu)
1043                            {
1044                                u1_chroma_pred_mode = INTRA_ANGULAR(34);
1045                            }
1046                        }
1047
1048                        /* call the chroma reference array substitution */
1049                        ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr(
1050                                        pu1_top_left,
1051                                        pu1_top, pu1_left, pic_strd, trans_size, chroma_nbr_flags, au1_ref_sub_out, 1);
1052
1053                        /* use the look up to get the function idx */
1054                        chroma_pred_func_idx =
1055                                        g_i4_ip_funcs[u1_chroma_pred_mode];
1056
1057                        /* call the intra prediction function */
1058                        ps_codec->apf_intra_pred_chroma[chroma_pred_func_idx](au1_ref_sub_out, 1, pu1_pred_orig, pred_strd, trans_size, u1_chroma_pred_mode);
1059                    }
1060                }
1061
1062                /* Updating number of transform types */
1063                STATS_UPDATE_ALL_TRANS(e_trans_type, c_idx);
1064
1065                /* IQ, IT and Recon for Y if c_idx == 0, and U if c_idx !=0 */
1066                if(1 == u1_cbf)
1067                {
1068                    if(ps_tu->b1_transquant_bypass || transform_skip_flag)
1069                    {
1070                        /* Recon */
1071                        ps_codec->apf_recon[func_idx](pi2_src, pu1_pred, pu1_dst,
1072                                                      src_strd, pred_strd, dst_strd,
1073                                                      zero_cols);
1074                    }
1075                    else
1076                    {
1077
1078                        /* Updating coded number of transform types(excluding trans skip and trans quant skip) */
1079                        STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0);
1080
1081                        /* iQuant , iTrans and Recon */
1082                        if((0 == coeff_type))
1083                        {
1084                            ps_codec->apf_itrans_recon[func_idx](pi2_src, pi2_tmp,
1085                                                                 pu1_pred, pu1_dst,
1086                                                                 src_strd, pred_strd,
1087                                                                 dst_strd, zero_cols,
1088                                                                 zero_rows);
1089                        }
1090                        else /* DC only */
1091                        {
1092                            STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1);
1093                            ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred, pu1_dst,
1094                                                                 pred_strd, dst_strd,
1095                                                                 log2_trans_size,
1096                                                                 i2_coeff_value);
1097                        }
1098                    }
1099                }
1100                /* IQ, IT and Recon for V */
1101                if(c_idx != 0)
1102                {
1103                    if(1 == u1_cbf_v)
1104                    {
1105                        if(ps_tu->b1_transquant_bypass || transform_skip_flag_v)
1106                        {
1107                            /* Recon */
1108                            ps_codec->apf_recon[func_idx](pi2_src_v, pu1_pred_v,
1109                                                          pu1_dst_v, src_strd,
1110                                                          pred_strd, dst_strd,
1111                                                          zero_cols_v);
1112                        }
1113                        else
1114                        {
1115                            /* Updating number of transform types */
1116                            STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0);
1117
1118                            /* iQuant , iTrans and Recon */
1119                            if((0 == coeff_type_v))
1120                            {
1121                                ps_codec->apf_itrans_recon[func_idx](pi2_src_v,
1122                                                                     pi2_tmp,
1123                                                                     pu1_pred_v,
1124                                                                     pu1_dst_v,
1125                                                                     src_strd,
1126                                                                     pred_strd,
1127                                                                     dst_strd,
1128                                                                     zero_cols_v,
1129                                                                     zero_rows_v);
1130                            }
1131                            else  /* DC only */
1132                            {
1133                                STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1);
1134                                ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred_v, pu1_dst_v,
1135                                                                     pred_strd, dst_strd,
1136                                                                     log2_trans_size,
1137                                                                     i2_coeff_value_v);
1138                            }
1139                        }
1140                    }
1141                }
1142            }
1143
1144            /* Neighbor availability inside CTB */
1145            /* 1bit per 4x4. Indicates whether that 4x4 block has been reconstructed(avialable) */
1146            /* Used for neighbor availability in intra pred */
1147            if(c_idx == 0)
1148            {
1149                WORD32 i;
1150                WORD32 trans_in_min_tu;
1151                UWORD32 cur_tu_in_bits;
1152                UWORD32 cur_tu_avail_flag;
1153
1154                trans_in_min_tu = trans_size / MIN_TU_SIZE;
1155                cur_tu_in_bits = (1 << trans_in_min_tu) - 1;
1156                cur_tu_in_bits = cur_tu_in_bits << (32 - trans_in_min_tu);
1157
1158                cur_tu_avail_flag = cur_tu_in_bits >> (ps_tu->b4_pos_x + 1);
1159
1160                for(i = 0; i < trans_in_min_tu; i++)
1161                    au4_intra_nbr_avail[1 + ps_tu->b4_pos_y + i] |=
1162                                    cur_tu_avail_flag;
1163            }
1164        }
1165    }
1166    ps_proc->pv_tu_coeff_data = pu1_tu_coeff_data;
1167
1168    return ps_proc->i4_ctb_tu_cnt;
1169}
1170
1171