ihevcd_deblk.c revision 0d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098
1/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19*******************************************************************************
20* @file
21*  ihevc_deblk.c
22*
23* @brief
24*  Contains definition for the ctb level deblk function
25*
26* @author
27*  Srinivas T
28*
29* @par List of Functions:
30*   - ihevc_deblk()
31*
32* @remarks
33*  None
34*
35*******************************************************************************
36*/
37
38#include <stdio.h>
39#include <stddef.h>
40#include <stdlib.h>
41#include <string.h>
42#include <assert.h>
43
44#include "ihevc_typedefs.h"
45#include "iv.h"
46#include "ivd.h"
47#include "ihevcd_cxa.h"
48#include "ithread.h"
49
50#include "ihevc_defs.h"
51#include "ihevc_debug.h"
52#include "ihevc_defs.h"
53#include "ihevc_structs.h"
54#include "ihevc_macros.h"
55#include "ihevc_platform_macros.h"
56#include "ihevc_cabac_tables.h"
57
58#include "ihevc_error.h"
59#include "ihevc_common_tables.h"
60
61#include "ihevcd_trace.h"
62#include "ihevcd_defs.h"
63#include "ihevcd_function_selector.h"
64#include "ihevcd_structs.h"
65#include "ihevcd_error.h"
66#include "ihevcd_nal.h"
67#include "ihevcd_bitstream.h"
68#include "ihevcd_job_queue.h"
69#include "ihevcd_utils.h"
70#include "ihevcd_debug.h"
71
72#include "ihevc_deblk.h"
73#include "ihevc_deblk_tables.h"
74#include "ihevcd_profile.h"
75/**
76*******************************************************************************
77*
78* @brief
79*     Deblock CTB level function.
80*
81* @par Description:
82*     For a given CTB, deblocking on both vertical and
83*     horizontal edges is done. Both the luma and chroma
84*     blocks are processed
85*
86* @param[in] ps_deblk
87*  Pointer to the deblock context
88*
89* @returns
90*
91* @remarks
92*  None
93*
94*******************************************************************************
95*/
96
97void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk,
98                      WORD32 i4_is_last_ctb_x,
99                      WORD32 i4_is_last_ctb_y)
100{
101    WORD32 ctb_size;
102    WORD32 log2_ctb_size;
103    UWORD32 u4_bs;
104    WORD32 bs_tz; /*Leading zeros in boundary strength*/
105    WORD32 qp_p, qp_q;
106
107    WORD32 filter_p, filter_q;
108
109    UWORD8 *pu1_src;
110    WORD32 qp_strd;
111    UWORD32 *pu4_vert_bs, *pu4_horz_bs;
112    UWORD32 *pu4_ctb_vert_bs, *pu4_ctb_horz_bs;
113    WORD32 vert_bs_strd, horz_bs_strd;
114    WORD32 src_strd;
115    UWORD8 *pu1_qp;
116    UWORD16 *pu2_ctb_no_loop_filter_flag;
117    UWORD16 au2_ctb_no_loop_filter_flag[9];
118
119    WORD32 col, row;
120
121    /* Flag to indicate if QP is constant in CTB
122     * 0 - top_left, 1 - top, 2 - left, 3 - current */
123    UWORD32 u4_qp_const_in_ctb[4] = { 0, 0, 0, 0 };
124    WORD32 ctb_indx;
125    WORD32  chroma_yuv420sp_vu = ps_deblk->is_chroma_yuv420sp_vu;
126    sps_t *ps_sps;
127    pps_t *ps_pps;
128    codec_t *ps_codec;
129    slice_header_t *ps_slice_hdr;
130
131    PROFILE_DISABLE_DEBLK();
132
133    ps_sps = ps_deblk->ps_sps;
134    ps_pps = ps_deblk->ps_pps;
135    ps_codec = ps_deblk->ps_codec;
136    ps_slice_hdr = ps_deblk->ps_slice_hdr;
137
138    log2_ctb_size = ps_sps->i1_log2_ctb_size;
139    ctb_size = (1 << ps_sps->i1_log2_ctb_size);
140
141    /* strides are in units of number of bytes */
142    /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
143    vert_bs_strd = ps_sps->i2_pic_wd_in_ctb << (2 * log2_ctb_size - 7);
144    horz_bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
145    pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_vert_bs +
146                    (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
147                    ps_deblk->i4_ctb_y * vert_bs_strd);
148    pu4_ctb_vert_bs = pu4_vert_bs;
149
150    pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_horz_bs +
151                    (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
152                    ps_deblk->i4_ctb_y * horz_bs_strd);
153    pu4_ctb_horz_bs = pu4_horz_bs;
154
155    qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
156    pu1_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
157
158    pu2_ctb_no_loop_filter_flag = ps_deblk->au2_ctb_no_loop_filter_flag;
159
160    ctb_indx = ps_deblk->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_deblk->i4_ctb_y;
161    if(i4_is_last_ctb_y)
162    {
163        pu4_vert_bs = (UWORD32 *)((UWORD8 *)pu4_vert_bs + vert_bs_strd);
164        pu4_ctb_vert_bs = pu4_vert_bs;
165        /* ctb_size/8 is the number of edges per CTB
166         * ctb_size/4 is the number of BS values needed per edge
167         * divided by 8 for the number of bytes
168         * 2 is the number of bits needed for each BS value */
169        memset(pu4_vert_bs, 0, 1 << (2 * log2_ctb_size - 7));
170
171        pu1_qp += (qp_strd << (log2_ctb_size - 3));
172        pu2_ctb_no_loop_filter_flag += (ctb_size >> 3);
173        ctb_indx += ps_sps->i2_pic_wd_in_ctb;
174    }
175
176    if(i4_is_last_ctb_x)
177    {
178        pu4_horz_bs = (UWORD32 *)((UWORD8 *)pu4_horz_bs + (1 << (2 * log2_ctb_size - 7)));
179        pu4_ctb_horz_bs = pu4_horz_bs;
180        memset(pu4_horz_bs, 0, 1 << (2 * log2_ctb_size - 7));
181
182        pu1_qp += (ctb_size >> 3);
183
184        for(row = 0; row < (ctb_size >> 3) + 1; row++)
185            au2_ctb_no_loop_filter_flag[row] = ps_deblk->au2_ctb_no_loop_filter_flag[row] >> (ctb_size >> 3);
186        pu2_ctb_no_loop_filter_flag = au2_ctb_no_loop_filter_flag;
187        ctb_indx += 1;
188    }
189
190    u4_qp_const_in_ctb[3] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx) >> 3] & (1 << (ctb_indx & 7));
191
192    if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
193    {
194        u4_qp_const_in_ctb[2] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - 1) >> 3] & (1 << ((ctb_indx - 1) & 7));
195    }
196
197    if((ps_deblk->i4_ctb_x || i4_is_last_ctb_x) && (ps_deblk->i4_ctb_y || i4_is_last_ctb_y))
198    {
199        u4_qp_const_in_ctb[0] =
200                        ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) >> 3] &
201                        (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) & 7));
202    }
203
204
205
206    if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
207    {
208        u4_qp_const_in_ctb[1] =
209                        ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb) >> 3] &
210                        (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb) & 7));
211    }
212
213    src_strd = ps_codec->i4_strd;
214
215    /* Luma Vertical Edge */
216
217    if(0 == i4_is_last_ctb_x)
218    {
219        /* Top CTB's slice header */
220        slice_header_t *ps_slice_hdr_top;
221#ifdef GPU_BUILD
222//TODO GPU : Later define it for ARM only version as well
223        {
224            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
225            if(i4_is_last_ctb_y)
226                cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
227            ps_slice_hdr_top = ps_deblk->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
228        }
229#else
230        {
231            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
232            if(i4_is_last_ctb_y)
233                cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
234            ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
235        }
236#endif
237
238        pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << (log2_ctb_size));
239        pu1_src += i4_is_last_ctb_y ? ps_deblk->ps_codec->i4_strd << log2_ctb_size : 0;
240
241        /** Deblocking is done on a shifted CTB -
242         *  Vertical edge processing is done by shifting the CTB up by four pixels */
243        pu1_src -= 4 * src_strd;
244
245        for(col = 0; col < ctb_size / 8; col++)
246        {
247            WORD32 shift = 0;
248
249            /* downshift vert_bs by ctb_size/2 for each column
250             *  shift = (col & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1);
251             *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
252             *  and deblocking is done on 8x8 grid
253             */
254            if(6 != log2_ctb_size)
255                shift = (col & 1) << (log2_ctb_size - 1);
256
257            /* BS for the column - Last row is excluded and the top row is included*/
258            u4_bs = (pu4_vert_bs[0] >> shift) << 2;
259
260            if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
261            {
262                /* Picking the last BS of the previous CTB corresponding to the same column */
263                UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - vert_bs_strd);
264                UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
265                u4_bs |= u4_top_bs & 3;
266            }
267
268            for(row = 0; row < ctb_size / 4;)
269            {
270                WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
271                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
272
273                /* Trailing zeros are computed and the corresponding rows are not processed */
274                bs_tz = CTZ(u4_bs) >> 1;
275                if(0 != bs_tz)
276                {
277                    u4_bs = u4_bs >> (bs_tz << 1);
278                    if((row + bs_tz) >= (ctb_size / 4))
279                        pu1_src += 4 * (ctb_size / 4 - row) * src_strd;
280                    else
281                        pu1_src += 4 * bs_tz  * src_strd;
282
283                    row += bs_tz;
284                    continue;
285                }
286
287                if(0 == row)
288                {
289                    i1_beta_offset_div2 = ps_slice_hdr_top->i1_beta_offset_div2;
290                    i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
291
292                    if(0 == col)
293                    {
294                        qp_p = u4_qp_const_in_ctb[0] ?
295                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
296                                        pu1_qp[-qp_strd - 1];
297                    }
298                    else
299                    {
300                        qp_p = u4_qp_const_in_ctb[1] ?
301                                        pu1_qp[-ctb_size / 8 * qp_strd] :
302                                        pu1_qp[col - 1 - qp_strd];
303                    }
304
305                    qp_q = u4_qp_const_in_ctb[1] ?
306                                    pu1_qp[-ctb_size / 8 * qp_strd] :
307                                    pu1_qp[col - qp_strd];
308                }
309                else
310                {
311                    if(0 == col)
312                    {
313                        qp_p = u4_qp_const_in_ctb[2] ?
314                                        pu1_qp[-ctb_size / 8] :
315                                        pu1_qp[((row - 1) >> 1) * qp_strd - 1];
316                    }
317                    else
318                    {
319                        qp_p = u4_qp_const_in_ctb[3] ?
320                                        pu1_qp[0] :
321                                        pu1_qp[((row - 1) >> 1) * qp_strd + col - 1];
322                    }
323
324                    qp_q = u4_qp_const_in_ctb[3] ?
325                                    pu1_qp[0] :
326                                    pu1_qp[((row - 1) >> 1) * qp_strd + col];
327                }
328
329                filter_p = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 1;
330                filter_q = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 2;
331                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
332                filter_p = !filter_p;
333                filter_q = !filter_q;
334
335                if(filter_p || filter_q)
336                {
337#if DEBUG_DEBLK_LEAF_LEVEL
338                    {
339                        DUMP_DEBLK_LUMA_VERT(pu1_src, src_strd,
340                                             u4_bs & 3, qp_p, qp_q,
341                                             ps_slice_hdr->i1_beta_offset_div2,
342                                             ps_slice_hdr->i1_tc_offset_div2,
343                                             filter_p, filter_q);
344                    }
345#endif
346                    ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr(pu1_src, src_strd,
347                                                                         u4_bs & 3, qp_p, qp_q,
348                                                                         i1_beta_offset_div2,
349                                                                         i1_tc_offset_div2,
350                                                                         filter_p, filter_q);
351                }
352
353                pu1_src += 4 * src_strd;
354                u4_bs = u4_bs >> 2;
355                row++;
356            }
357
358            if((64 == ctb_size) ||
359                            ((32 == ctb_size) && (col & 1)))
360            {
361                pu4_vert_bs++;
362            }
363            pu1_src -= (src_strd << log2_ctb_size);
364            pu1_src += 8;
365        }
366        pu4_vert_bs = pu4_ctb_vert_bs;
367    }
368
369
370    /* Luma Horizontal Edge */
371
372    if(0 == i4_is_last_ctb_y)
373    {
374
375        /* Left CTB's slice header */
376        slice_header_t *ps_slice_hdr_left;
377#ifdef GPU_BUILD
378//TODO GPU : Later define it for ARM only version as well
379        {
380            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
381            if(i4_is_last_ctb_x)
382                cur_ctb_indx += 1;
383            ps_slice_hdr_left = ps_deblk->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
384        }
385#else
386        {
387            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
388            if(i4_is_last_ctb_x)
389                cur_ctb_indx += 1;
390            ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
391        }
392#endif
393        pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << log2_ctb_size);
394        pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
395
396        /** Deblocking is done on a shifted CTB -
397         *  Horizontal edge processing is done by shifting the CTB left by four pixels */
398        pu1_src -= 4;
399        for(row = 0; row < ctb_size / 8; row++)
400        {
401            WORD32 shift = 0;
402
403            /* downshift vert_bs by ctb_size/2 for each column
404             *  shift = (row & (MAX_CTB_SIZE / ctb_size - 1)) * ctb_size / 2;
405             *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
406             *  and deblocking is done on 8x8 grid
407             */
408            if(6 != log2_ctb_size)
409                shift = (row & 1) << (log2_ctb_size - 1);
410
411            /* BS for the row - Last column is excluded and the left column is included*/
412            u4_bs = (pu4_horz_bs[0] >> shift) << 2;
413
414            if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
415            {
416                /** Picking the last BS of the previous CTB corresponding to the same row
417                * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
418                */
419                UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
420                UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
421                u4_bs |= u4_left_bs & 3;
422            }
423
424            for(col = 0; col < ctb_size / 4;)
425            {
426                WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
427                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
428
429                bs_tz = CTZ(u4_bs) >> 1;
430                if(0 != bs_tz)
431                {
432                    u4_bs = u4_bs >> (bs_tz << 1);
433
434                    if((col + bs_tz) >= (ctb_size / 4))
435                        pu1_src += 4 * (ctb_size / 4 - col);
436                    else
437                        pu1_src += 4 * bs_tz;
438
439                    col += bs_tz;
440                    continue;
441                }
442
443                if(0 == col)
444                {
445                    i1_beta_offset_div2 = ps_slice_hdr_left->i1_beta_offset_div2;
446                    i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
447
448                    if(0 == row)
449                    {
450                        qp_p = u4_qp_const_in_ctb[0] ?
451                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
452                                        pu1_qp[-qp_strd - 1];
453                    }
454                    else
455                    {
456                        qp_p = u4_qp_const_in_ctb[2] ?
457                                        pu1_qp[-ctb_size / 8] :
458                                        pu1_qp[(row - 1) * qp_strd - 1];
459                    }
460
461                    qp_q = u4_qp_const_in_ctb[2] ?
462                                    pu1_qp[-ctb_size / 8] :
463                                    pu1_qp[row * qp_strd - 1];
464                }
465                else
466                {
467                    if(0 == row)
468                    {
469                        qp_p = u4_qp_const_in_ctb[1] ?
470                                        pu1_qp[-ctb_size / 8 * qp_strd] :
471                                        pu1_qp[((col - 1) >> 1) - qp_strd];
472                    }
473                    else
474                    {
475                        qp_p = u4_qp_const_in_ctb[3] ?
476                                        pu1_qp[0] :
477                                        pu1_qp[((col - 1) >> 1) + (row - 1) * qp_strd];
478                    }
479
480                    qp_q = u4_qp_const_in_ctb[3] ?
481                                    pu1_qp[0] :
482                                    pu1_qp[((col - 1) >> 1) + row * qp_strd];
483                }
484
485                filter_p = (pu2_ctb_no_loop_filter_flag[row] >> ((col + 1) >> 1)) & 1;
486                filter_q = (pu2_ctb_no_loop_filter_flag[row + 1] >> ((col + 1) >> 1)) & 1;
487                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
488                filter_p = !filter_p;
489                filter_q = !filter_q;
490
491                if(filter_p || filter_q)
492                {
493#if DEBUG_DEBLK_LEAF_LEVEL
494                    {
495                        DUMP_DEBLK_LUMA_HORZ(pu1_src, src_strd,
496                                             u4_bs & 3, qp_p, qp_q,
497                                             ps_slice_hdr->i1_beta_offset_div2,
498                                             ps_slice_hdr->i1_tc_offset_div2,
499                                             filter_p, filter_q);
500                    }
501#endif
502                    ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr(pu1_src, src_strd,
503                                                                         u4_bs & 3, qp_p, qp_q,
504                                                                         i1_beta_offset_div2,
505                                                                         i1_tc_offset_div2, filter_p, filter_q);
506                }
507
508                pu1_src += 4;
509                u4_bs = u4_bs >> 2;
510                col++;
511            }
512
513            if((64 == ctb_size) ||
514                            ((32 == ctb_size) && (row & 1)))
515            {
516                pu4_horz_bs++;
517            }
518            pu1_src -= ctb_size;
519            pu1_src += (src_strd << 3);
520        }
521        pu4_horz_bs = pu4_ctb_horz_bs;
522    }
523
524
525    /* Chroma Veritcal Edge */
526
527    if(0 == i4_is_last_ctb_x)
528    {
529
530        /* Top CTB's slice header */
531        slice_header_t *ps_slice_hdr_top;
532#ifdef GPU_BUILD
533//TODO GPU : Later define it for ARM only version as well
534        {
535            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
536            if(i4_is_last_ctb_y)
537                cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
538            ps_slice_hdr_top = ps_deblk->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
539        }
540#else
541        {
542            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
543            if(i4_is_last_ctb_y)
544                cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
545            ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
546        }
547#endif
548
549        pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
550        pu1_src += i4_is_last_ctb_y ? (ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size : 0;
551
552        /** Deblocking is done on a shifted CTB -
553         *  Vertical edge processing is done by shifting the CTB up by four pixels */
554        pu1_src -= 4 * src_strd;
555
556        for(col = 0; col < ctb_size / 16; col++)
557        {
558
559            /* BS for the column - Last row is excluded and the top row is included*/
560            u4_bs = pu4_vert_bs[0] << 2;
561
562            if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
563            {
564                /* Picking the last BS of the previous CTB corresponding to the same column */
565                UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - vert_bs_strd);
566                UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> ((1 << (log2_ctb_size - 1)) - 2);
567                u4_bs |= u4_top_bs & 3;
568            }
569
570            /* Every alternate boundary strength value is used for chroma */
571            u4_bs &= 0x22222222;
572
573            for(row = 0; row < ctb_size / 8;)
574            {
575                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
576
577                bs_tz = CTZ(u4_bs) >> 2;
578                if(0 != bs_tz)
579                {
580                    if((row + bs_tz) >= (ctb_size / 8))
581                        pu1_src += 4 * (ctb_size / 8 - row) * src_strd;
582                    else
583                        pu1_src += 4 * bs_tz  * src_strd;
584                    row += bs_tz;
585                    u4_bs = u4_bs >> (bs_tz << 2);
586                    continue;
587                }
588
589                if(0 == row)
590                {
591                    i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
592
593                    if(0 == col)
594                    {
595                        qp_p = u4_qp_const_in_ctb[0] ?
596                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
597                                        pu1_qp[-qp_strd - 1];
598                    }
599                    else
600                    {
601                        qp_p = u4_qp_const_in_ctb[1] ?
602                                        pu1_qp[-ctb_size / 8 * qp_strd] :
603                                        pu1_qp[2 * col - 1 - qp_strd];
604                    }
605
606                    qp_q = u4_qp_const_in_ctb[1] ?
607                                    pu1_qp[-ctb_size / 8 * qp_strd] :
608                                    pu1_qp[2 * col - qp_strd];
609                }
610                else
611                {
612                    if(0 == col)
613                    {
614                        qp_p = u4_qp_const_in_ctb[2] ?
615                                        pu1_qp[-ctb_size / 8] :
616                                        pu1_qp[(row - 1) * qp_strd - 1];
617                    }
618                    else
619                    {
620                        qp_p = u4_qp_const_in_ctb[3] ?
621                                        pu1_qp[0] :
622                                        pu1_qp[(row - 1) * qp_strd + 2 * col - 1];
623                    }
624
625                    qp_q = u4_qp_const_in_ctb[3] ?
626                                    pu1_qp[0] :
627                                    pu1_qp[(row - 1) * qp_strd + 2 * col];
628                }
629
630                filter_p = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 1;
631                filter_q = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 2;
632                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
633                filter_p = !filter_p;
634                filter_q = !filter_q;
635
636                if(filter_p || filter_q)
637                {
638                    ASSERT(1 == ((u4_bs & 3) >> 1));
639#if DEBUG_DEBLK_LEAF_LEVEL
640                    {
641                        DUMP_DEBLK_CHROMA_VERT(pu1_src, src_strd,
642                                               u4_bs & 3, qp_p, qp_q,
643                                               ps_pps->i1_pic_cb_qp_offset,
644                                               ps_pps->i1_pic_cr_qp_offset,
645                                               ps_slice_hdr->i1_tc_offset_div2,
646                                               filter_p, filter_q);
647                    }
648#endif
649                    if(chroma_yuv420sp_vu)
650                    {
651                        ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
652                                                                               src_strd,
653                                                                               qp_q,
654                                                                               qp_p,
655                                                                               ps_pps->i1_pic_cr_qp_offset,
656                                                                               ps_pps->i1_pic_cb_qp_offset,
657                                                                               i1_tc_offset_div2,
658                                                                               filter_q,
659                                                                               filter_p);
660                    }
661                    else
662                    {
663                        ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
664                                                                               src_strd,
665                                                                               qp_p,
666                                                                               qp_q,
667                                                                               ps_pps->i1_pic_cb_qp_offset,
668                                                                               ps_pps->i1_pic_cr_qp_offset,
669                                                                               i1_tc_offset_div2,
670                                                                               filter_p,
671                                                                               filter_q);
672                    }
673                }
674
675                pu1_src += 4 * src_strd;
676                u4_bs = u4_bs >> 4;
677                row++;
678            }
679
680            pu4_vert_bs += (64 == ctb_size) ? 2 : 1;
681            pu1_src -= ((src_strd / 2) << log2_ctb_size);
682            pu1_src += 16;
683        }
684    }
685
686    /* Chroma Horizontal Edge */
687
688    if(0 == i4_is_last_ctb_y)
689    {
690
691        /* Left CTB's slice header */
692        slice_header_t *ps_slice_hdr_left;
693#ifdef GPU_BUILD
694//TODO GPU : Later define it for ARM only version as well
695        {
696            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
697            if(i4_is_last_ctb_x)
698                cur_ctb_indx += 1;
699            ps_slice_hdr_left = ps_deblk->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
700        }
701#else
702        {
703            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
704            if(i4_is_last_ctb_x)
705                cur_ctb_indx += 1;
706            ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
707        }
708#endif
709
710        pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
711        pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
712
713        /** Deblocking is done on a shifted CTB -
714         * Vertical edge processing is done by shifting the CTB up by four pixels (8 here beacuse UV are interleaved) */
715        pu1_src -= 8;
716        for(row = 0; row < ctb_size / 16; row++)
717        {
718            /* BS for the row - Last column is excluded and the left column is included*/
719            u4_bs = pu4_horz_bs[0] << 2;
720
721            if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
722            {
723                /** Picking the last BS of the previous CTB corresponding to the same row
724                * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
725                */
726                UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
727                UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> ((1 << (log2_ctb_size - 1)) - 2);
728                u4_bs |= u4_left_bs & 3;
729            }
730
731            /* Every alternate boundary strength value is used for chroma */
732            u4_bs &= 0x22222222;
733
734            for(col = 0; col < ctb_size / 8;)
735            {
736                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
737
738                bs_tz = CTZ(u4_bs) >> 2;
739                if(0 != bs_tz)
740                {
741                    u4_bs = u4_bs >> (bs_tz << 2);
742
743                    if((col + bs_tz) >= (ctb_size / 8))
744                        pu1_src += 8 * (ctb_size / 8 - col);
745                    else
746                        pu1_src += 8 * bs_tz;
747
748                    col += bs_tz;
749                    continue;
750                }
751
752                if(0 == col)
753                {
754                    i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
755
756                    if(0 == row)
757                    {
758                        qp_p = u4_qp_const_in_ctb[0] ?
759                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
760                                        pu1_qp[-qp_strd - 1];
761                    }
762                    else
763                    {
764                        qp_p = u4_qp_const_in_ctb[2] ?
765                                        pu1_qp[-ctb_size / 8] :
766                                        pu1_qp[(2 * row - 1) * qp_strd - 1];
767                    }
768
769                    qp_q = u4_qp_const_in_ctb[2] ?
770                                    pu1_qp[-ctb_size / 8] :
771                                    pu1_qp[(2 * row) * qp_strd - 1];
772                }
773                else
774                {
775                    if(0 == row)
776                    {
777                        qp_p = u4_qp_const_in_ctb[1] ?
778                                        pu1_qp[-ctb_size / 8 * qp_strd] :
779                                        pu1_qp[col - 1 - qp_strd];
780                    }
781                    else
782                    {
783                        qp_p = u4_qp_const_in_ctb[3] ?
784                                        pu1_qp[0] :
785                                        pu1_qp[(col - 1) +  (2 * row - 1) * qp_strd];
786                    }
787
788                    qp_q = u4_qp_const_in_ctb[3] ?
789                                    pu1_qp[0] :
790                                    pu1_qp[(col - 1) + 2 * row * qp_strd];
791                }
792
793                filter_p = (pu2_ctb_no_loop_filter_flag[row << 1] >> col) & 1;
794                filter_q = (pu2_ctb_no_loop_filter_flag[(row << 1) + 1] >> col) & 1;
795                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
796                filter_p = !filter_p;
797                filter_q = !filter_q;
798
799                if(filter_p || filter_q)
800                {
801                    ASSERT(1 == ((u4_bs & 3) >> 1));
802#if DEBUG_DEBLK_LEAF_LEVEL
803                    {
804                        DUMP_DEBLK_CHROMA_HORZ(pu1_src, src_strd,
805                                               u4_bs & 3, qp_p, qp_q,
806                                               ps_pps->i1_pic_cb_qp_offset,
807                                               ps_pps->i1_pic_cr_qp_offset,
808                                               ps_slice_hdr->i1_tc_offset_div2,
809                                               filter_p, filter_q);
810                    }
811#endif
812                    if(chroma_yuv420sp_vu)
813                    {
814                        ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
815                                                                               src_strd,
816                                                                               qp_q,
817                                                                               qp_p,
818                                                                               ps_pps->i1_pic_cr_qp_offset,
819                                                                               ps_pps->i1_pic_cb_qp_offset,
820                                                                               i1_tc_offset_div2,
821                                                                               filter_q,
822                                                                               filter_p);
823                    }
824                    else
825                    {
826                        ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
827                                                                               src_strd,
828                                                                               qp_p,
829                                                                               qp_q,
830                                                                               ps_pps->i1_pic_cb_qp_offset,
831                                                                               ps_pps->i1_pic_cr_qp_offset,
832                                                                               i1_tc_offset_div2,
833                                                                               filter_p,
834                                                                               filter_q);
835                    }
836                }
837
838                pu1_src += 8;
839                u4_bs = u4_bs >> 4;
840                col++;
841            }
842
843            pu4_horz_bs += (64 == ctb_size) ? 2 : 1;
844            pu1_src -= ctb_size;
845            pu1_src += 8 * src_strd;
846
847        }
848    }
849}
850