1/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19*******************************************************************************
20* @file
21*  ihevc_deblk.c
22*
23* @brief
24*  Contains definition for the ctb level deblk function
25*
26* @author
27*  Srinivas T
28*
29* @par List of Functions:
30*   - ihevc_deblk()
31*
32* @remarks
33*  None
34*
35*******************************************************************************
36*/
37
38#include <stdio.h>
39#include <stddef.h>
40#include <stdlib.h>
41#include <string.h>
42#include <assert.h>
43
44#include "ihevc_typedefs.h"
45#include "iv.h"
46#include "ivd.h"
47#include "ihevcd_cxa.h"
48#include "ithread.h"
49
50#include "ihevc_defs.h"
51#include "ihevc_debug.h"
52#include "ihevc_defs.h"
53#include "ihevc_structs.h"
54#include "ihevc_macros.h"
55#include "ihevc_platform_macros.h"
56#include "ihevc_cabac_tables.h"
57
58#include "ihevc_error.h"
59#include "ihevc_common_tables.h"
60
61#include "ihevcd_trace.h"
62#include "ihevcd_defs.h"
63#include "ihevcd_function_selector.h"
64#include "ihevcd_structs.h"
65#include "ihevcd_error.h"
66#include "ihevcd_nal.h"
67#include "ihevcd_bitstream.h"
68#include "ihevcd_job_queue.h"
69#include "ihevcd_utils.h"
70#include "ihevcd_debug.h"
71
72#include "ihevc_deblk.h"
73#include "ihevc_deblk_tables.h"
74#include "ihevcd_profile.h"
75/**
76*******************************************************************************
77*
78* @brief
79*     Deblock CTB level function.
80*
81* @par Description:
82*     For a given CTB, deblocking on both vertical and
83*     horizontal edges is done. Both the luma and chroma
84*     blocks are processed
85*
86* @param[in] ps_deblk
87*  Pointer to the deblock context
88*
89* @returns
90*
91* @remarks
92*  None
93*
94*******************************************************************************
95*/
96
97void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk,
98                      WORD32 i4_is_last_ctb_x,
99                      WORD32 i4_is_last_ctb_y)
100{
101    WORD32 ctb_size;
102    WORD32 log2_ctb_size;
103    UWORD32 u4_bs;
104    WORD32 bs_tz; /*Leading zeros in boundary strength*/
105    WORD32 qp_p, qp_q;
106
107    WORD32 filter_p, filter_q;
108
109    UWORD8 *pu1_src;
110    WORD32 qp_strd;
111    UWORD32 *pu4_vert_bs, *pu4_horz_bs;
112    UWORD32 *pu4_ctb_vert_bs, *pu4_ctb_horz_bs;
113    WORD32 bs_strd;
114    WORD32 src_strd;
115    UWORD8 *pu1_qp;
116    UWORD16 *pu2_ctb_no_loop_filter_flag;
117    UWORD16 au2_ctb_no_loop_filter_flag[9];
118
119    WORD32 col, row;
120
121    /* Flag to indicate if QP is constant in CTB
122     * 0 - top_left, 1 - top, 2 - left, 3 - current */
123    UWORD32 u4_qp_const_in_ctb[4] = { 0, 0, 0, 0 };
124    WORD32 ctb_indx;
125    WORD32  chroma_yuv420sp_vu = ps_deblk->is_chroma_yuv420sp_vu;
126    sps_t *ps_sps;
127    pps_t *ps_pps;
128    codec_t *ps_codec;
129    slice_header_t *ps_slice_hdr;
130
131    PROFILE_DISABLE_DEBLK();
132
133    ps_sps = ps_deblk->ps_sps;
134    ps_pps = ps_deblk->ps_pps;
135    ps_codec = ps_deblk->ps_codec;
136    ps_slice_hdr = ps_deblk->ps_slice_hdr;
137
138    log2_ctb_size = ps_sps->i1_log2_ctb_size;
139    ctb_size = (1 << ps_sps->i1_log2_ctb_size);
140
141    /* strides are in units of number of bytes */
142    /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
143    bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
144
145    pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_vert_bs +
146                    (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
147                    ps_deblk->i4_ctb_y * bs_strd);
148    pu4_ctb_vert_bs = pu4_vert_bs;
149
150    pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_horz_bs +
151                    (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
152                    ps_deblk->i4_ctb_y * bs_strd);
153    pu4_ctb_horz_bs = pu4_horz_bs;
154
155    qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
156    pu1_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
157
158    pu2_ctb_no_loop_filter_flag = ps_deblk->au2_ctb_no_loop_filter_flag;
159
160    ctb_indx = ps_deblk->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_deblk->i4_ctb_y;
161    if(i4_is_last_ctb_y)
162    {
163        pu4_vert_bs = (UWORD32 *)((UWORD8 *)pu4_vert_bs + bs_strd);
164        pu4_ctb_vert_bs = pu4_vert_bs;
165        /* ctb_size/8 is the number of edges per CTB
166         * ctb_size/4 is the number of BS values needed per edge
167         * divided by 8 for the number of bytes
168         * 2 is the number of bits needed for each BS value */
169        memset(pu4_vert_bs, 0, 1 << (2 * log2_ctb_size - 7));
170
171        pu1_qp += (qp_strd << (log2_ctb_size - 3));
172        pu2_ctb_no_loop_filter_flag += (ctb_size >> 3);
173        ctb_indx += ps_sps->i2_pic_wd_in_ctb;
174    }
175
176    if(i4_is_last_ctb_x)
177    {
178        pu4_horz_bs = (UWORD32 *)((UWORD8 *)pu4_horz_bs + (1 << (2 * log2_ctb_size - 7)));
179        pu4_ctb_horz_bs = pu4_horz_bs;
180        memset(pu4_horz_bs, 0, 1 << (2 * log2_ctb_size - 7));
181
182        pu1_qp += (ctb_size >> 3);
183
184        for(row = 0; row < (ctb_size >> 3) + 1; row++)
185            au2_ctb_no_loop_filter_flag[row] = ps_deblk->au2_ctb_no_loop_filter_flag[row] >> (ctb_size >> 3);
186        pu2_ctb_no_loop_filter_flag = au2_ctb_no_loop_filter_flag;
187        ctb_indx += 1;
188    }
189
190    u4_qp_const_in_ctb[3] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx) >> 3] & (1 << (ctb_indx & 7));
191
192    if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
193    {
194        u4_qp_const_in_ctb[2] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - 1) >> 3] & (1 << ((ctb_indx - 1) & 7));
195    }
196
197    if((ps_deblk->i4_ctb_x || i4_is_last_ctb_x) && (ps_deblk->i4_ctb_y || i4_is_last_ctb_y))
198    {
199        u4_qp_const_in_ctb[0] =
200                        ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) >> 3] &
201                        (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) & 7));
202    }
203
204
205
206    if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
207    {
208        u4_qp_const_in_ctb[1] =
209                        ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb) >> 3] &
210                        (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb) & 7));
211    }
212
213    src_strd = ps_codec->i4_strd;
214
215    /* Luma Vertical Edge */
216
217    if(0 == i4_is_last_ctb_x)
218    {
219        /* Top CTB's slice header */
220        slice_header_t *ps_slice_hdr_top;
221        {
222            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
223            if(i4_is_last_ctb_y)
224                cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
225            ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
226        }
227
228        pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << (log2_ctb_size));
229        pu1_src += i4_is_last_ctb_y ? ps_deblk->ps_codec->i4_strd << log2_ctb_size : 0;
230
231        /** Deblocking is done on a shifted CTB -
232         *  Vertical edge processing is done by shifting the CTB up by four pixels */
233        pu1_src -= 4 * src_strd;
234
235        for(col = 0; col < ctb_size / 8; col++)
236        {
237            WORD32 shift = 0;
238
239            /*  downshift vert_bs by ctb_size/2 for each column
240             *  shift = (col & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1);
241             *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
242             *  and deblocking is done on 8x8 grid
243             */
244            if(6 != log2_ctb_size)
245                shift = (col & 1) << (log2_ctb_size - 1);
246
247            /* BS for the column - Last row is excluded and the top row is included*/
248            u4_bs = (pu4_vert_bs[0] >> shift) << 2;
249
250            if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
251            {
252                /* Picking the last BS of the previous CTB corresponding to the same column */
253                UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
254                UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
255                u4_bs |= u4_top_bs & 3;
256            }
257
258            for(row = 0; row < ctb_size / 4;)
259            {
260                WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
261                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
262
263                /* Trailing zeros are computed and the corresponding rows are not processed */
264                bs_tz = CTZ(u4_bs) >> 1;
265                if(0 != bs_tz)
266                {
267                    u4_bs = u4_bs >> (bs_tz << 1);
268                    if((row + bs_tz) >= (ctb_size / 4))
269                        pu1_src += 4 * (ctb_size / 4 - row) * src_strd;
270                    else
271                        pu1_src += 4 * bs_tz  * src_strd;
272
273                    row += bs_tz;
274                    continue;
275                }
276
277                if(0 == row)
278                {
279                    i1_beta_offset_div2 = ps_slice_hdr_top->i1_beta_offset_div2;
280                    i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
281
282                    if(0 == col)
283                    {
284                        qp_p = u4_qp_const_in_ctb[0] ?
285                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
286                                        pu1_qp[-qp_strd - 1];
287                    }
288                    else
289                    {
290                        qp_p = u4_qp_const_in_ctb[1] ?
291                                        pu1_qp[-ctb_size / 8 * qp_strd] :
292                                        pu1_qp[col - 1 - qp_strd];
293                    }
294
295                    qp_q = u4_qp_const_in_ctb[1] ?
296                                    pu1_qp[-ctb_size / 8 * qp_strd] :
297                                    pu1_qp[col - qp_strd];
298                }
299                else
300                {
301                    if(0 == col)
302                    {
303                        qp_p = u4_qp_const_in_ctb[2] ?
304                                        pu1_qp[-ctb_size / 8] :
305                                        pu1_qp[((row - 1) >> 1) * qp_strd - 1];
306                    }
307                    else
308                    {
309                        qp_p = u4_qp_const_in_ctb[3] ?
310                                        pu1_qp[0] :
311                                        pu1_qp[((row - 1) >> 1) * qp_strd + col - 1];
312                    }
313
314                    qp_q = u4_qp_const_in_ctb[3] ?
315                                    pu1_qp[0] :
316                                    pu1_qp[((row - 1) >> 1) * qp_strd + col];
317                }
318
319                filter_p = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 1;
320                filter_q = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 2;
321                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
322                filter_p = !filter_p;
323                filter_q = !filter_q;
324
325                if(filter_p || filter_q)
326                {
327                    DUMP_DEBLK_LUMA_VERT(pu1_src, src_strd,
328                                         u4_bs & 3, qp_p, qp_q,
329                                         ps_slice_hdr->i1_beta_offset_div2,
330                                         ps_slice_hdr->i1_tc_offset_div2,
331                                         filter_p, filter_q);
332                    ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr(pu1_src, src_strd,
333                                                                         u4_bs & 3, qp_p, qp_q,
334                                                                         i1_beta_offset_div2,
335                                                                         i1_tc_offset_div2,
336                                                                         filter_p, filter_q);
337                }
338
339                pu1_src += 4 * src_strd;
340                u4_bs = u4_bs >> 2;
341                row++;
342            }
343
344            if((64 == ctb_size) ||
345                            ((32 == ctb_size) && (col & 1)))
346            {
347                pu4_vert_bs++;
348            }
349            pu1_src -= (src_strd << log2_ctb_size);
350            pu1_src += 8;
351        }
352        pu4_vert_bs = pu4_ctb_vert_bs;
353    }
354
355
356    /* Luma Horizontal Edge */
357
358    if(0 == i4_is_last_ctb_y)
359    {
360
361        /* Left CTB's slice header */
362        slice_header_t *ps_slice_hdr_left;
363        {
364            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
365            if(i4_is_last_ctb_x)
366                cur_ctb_indx += 1;
367            ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
368        }
369        pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << log2_ctb_size);
370        pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
371
372        /** Deblocking is done on a shifted CTB -
373         *  Horizontal edge processing is done by shifting the CTB left by four pixels */
374        pu1_src -= 4;
375        for(row = 0; row < ctb_size / 8; row++)
376        {
377            WORD32 shift = 0;
378
379            /* downshift vert_bs by ctb_size/2 for each column
380             *  shift = (row & (MAX_CTB_SIZE / ctb_size - 1)) * ctb_size / 2;
381             *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
382             *  and deblocking is done on 8x8 grid
383             */
384            if(6 != log2_ctb_size)
385                shift = (row & 1) << (log2_ctb_size - 1);
386
387            /* BS for the row - Last column is excluded and the left column is included*/
388            u4_bs = (pu4_horz_bs[0] >> shift) << 2;
389
390            if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
391            {
392                /** Picking the last BS of the previous CTB corresponding to the same row
393                * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
394                */
395                UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
396                UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
397                u4_bs |= u4_left_bs & 3;
398            }
399
400            for(col = 0; col < ctb_size / 4;)
401            {
402                WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
403                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
404
405                bs_tz = CTZ(u4_bs) >> 1;
406                if(0 != bs_tz)
407                {
408                    u4_bs = u4_bs >> (bs_tz << 1);
409
410                    if((col + bs_tz) >= (ctb_size / 4))
411                        pu1_src += 4 * (ctb_size / 4 - col);
412                    else
413                        pu1_src += 4 * bs_tz;
414
415                    col += bs_tz;
416                    continue;
417                }
418
419                if(0 == col)
420                {
421                    i1_beta_offset_div2 = ps_slice_hdr_left->i1_beta_offset_div2;
422                    i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
423
424                    if(0 == row)
425                    {
426                        qp_p = u4_qp_const_in_ctb[0] ?
427                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
428                                        pu1_qp[-qp_strd - 1];
429                    }
430                    else
431                    {
432                        qp_p = u4_qp_const_in_ctb[2] ?
433                                        pu1_qp[-ctb_size / 8] :
434                                        pu1_qp[(row - 1) * qp_strd - 1];
435                    }
436
437                    qp_q = u4_qp_const_in_ctb[2] ?
438                                    pu1_qp[-ctb_size / 8] :
439                                    pu1_qp[row * qp_strd - 1];
440                }
441                else
442                {
443                    if(0 == row)
444                    {
445                        qp_p = u4_qp_const_in_ctb[1] ?
446                                        pu1_qp[-ctb_size / 8 * qp_strd] :
447                                        pu1_qp[((col - 1) >> 1) - qp_strd];
448                    }
449                    else
450                    {
451                        qp_p = u4_qp_const_in_ctb[3] ?
452                                        pu1_qp[0] :
453                                        pu1_qp[((col - 1) >> 1) + (row - 1) * qp_strd];
454                    }
455
456                    qp_q = u4_qp_const_in_ctb[3] ?
457                                    pu1_qp[0] :
458                                    pu1_qp[((col - 1) >> 1) + row * qp_strd];
459                }
460
461                filter_p = (pu2_ctb_no_loop_filter_flag[row] >> ((col + 1) >> 1)) & 1;
462                filter_q = (pu2_ctb_no_loop_filter_flag[row + 1] >> ((col + 1) >> 1)) & 1;
463                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
464                filter_p = !filter_p;
465                filter_q = !filter_q;
466
467                if(filter_p || filter_q)
468                {
469                    DUMP_DEBLK_LUMA_HORZ(pu1_src, src_strd,
470                                         u4_bs & 3, qp_p, qp_q,
471                                         ps_slice_hdr->i1_beta_offset_div2,
472                                         ps_slice_hdr->i1_tc_offset_div2,
473                                         filter_p, filter_q);
474                    ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr(pu1_src, src_strd,
475                                                                         u4_bs & 3, qp_p, qp_q,
476                                                                         i1_beta_offset_div2,
477                                                                         i1_tc_offset_div2, filter_p, filter_q);
478                }
479
480                pu1_src += 4;
481                u4_bs = u4_bs >> 2;
482                col++;
483            }
484
485            if((64 == ctb_size) ||
486                            ((32 == ctb_size) && (row & 1)))
487            {
488                pu4_horz_bs++;
489            }
490            pu1_src -= ctb_size;
491            pu1_src += (src_strd << 3);
492        }
493        pu4_horz_bs = pu4_ctb_horz_bs;
494    }
495
496
497    /* Chroma Veritcal Edge */
498
499    if(0 == i4_is_last_ctb_x)
500    {
501
502        /* Top CTB's slice header */
503        slice_header_t *ps_slice_hdr_top;
504        {
505            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
506            if(i4_is_last_ctb_y)
507                cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
508            ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
509        }
510
511        pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
512        pu1_src += i4_is_last_ctb_y ? (ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size : 0;
513
514        /** Deblocking is done on a shifted CTB -
515         *  Vertical edge processing is done by shifting the CTB up by four pixels */
516        pu1_src -= 4 * src_strd;
517
518        for(col = 0; col < ctb_size / 16; col++)
519        {
520
521            /* BS for the column - Last row is excluded and the top row is included*/
522            u4_bs = pu4_vert_bs[0] << 2;
523
524            if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
525            {
526                /* Picking the last BS of the previous CTB corresponding to the same column */
527                UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
528                UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> ((1 << (log2_ctb_size - 1)) - 2);
529                u4_bs |= u4_top_bs & 3;
530            }
531
532            /* Every alternate boundary strength value is used for chroma */
533            u4_bs &= 0x22222222;
534
535            for(row = 0; row < ctb_size / 8;)
536            {
537                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
538
539                bs_tz = CTZ(u4_bs) >> 2;
540                if(0 != bs_tz)
541                {
542                    if((row + bs_tz) >= (ctb_size / 8))
543                        pu1_src += 4 * (ctb_size / 8 - row) * src_strd;
544                    else
545                        pu1_src += 4 * bs_tz  * src_strd;
546                    row += bs_tz;
547                    u4_bs = u4_bs >> (bs_tz << 2);
548                    continue;
549                }
550
551                if(0 == row)
552                {
553                    i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
554
555                    if(0 == col)
556                    {
557                        qp_p = u4_qp_const_in_ctb[0] ?
558                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
559                                        pu1_qp[-qp_strd - 1];
560                    }
561                    else
562                    {
563                        qp_p = u4_qp_const_in_ctb[1] ?
564                                        pu1_qp[-ctb_size / 8 * qp_strd] :
565                                        pu1_qp[2 * col - 1 - qp_strd];
566                    }
567
568                    qp_q = u4_qp_const_in_ctb[1] ?
569                                    pu1_qp[-ctb_size / 8 * qp_strd] :
570                                    pu1_qp[2 * col - qp_strd];
571                }
572                else
573                {
574                    if(0 == col)
575                    {
576                        qp_p = u4_qp_const_in_ctb[2] ?
577                                        pu1_qp[-ctb_size / 8] :
578                                        pu1_qp[(row - 1) * qp_strd - 1];
579                    }
580                    else
581                    {
582                        qp_p = u4_qp_const_in_ctb[3] ?
583                                        pu1_qp[0] :
584                                        pu1_qp[(row - 1) * qp_strd + 2 * col - 1];
585                    }
586
587                    qp_q = u4_qp_const_in_ctb[3] ?
588                                    pu1_qp[0] :
589                                    pu1_qp[(row - 1) * qp_strd + 2 * col];
590                }
591
592                filter_p = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 1;
593                filter_q = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 2;
594                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
595                filter_p = !filter_p;
596                filter_q = !filter_q;
597
598                if(filter_p || filter_q)
599                {
600                    ASSERT(1 == ((u4_bs & 3) >> 1));
601                    DUMP_DEBLK_CHROMA_VERT(pu1_src, src_strd,
602                                           u4_bs & 3, qp_p, qp_q,
603                                           ps_pps->i1_pic_cb_qp_offset,
604                                           ps_pps->i1_pic_cr_qp_offset,
605                                           ps_slice_hdr->i1_tc_offset_div2,
606                                           filter_p, filter_q);
607                    if(chroma_yuv420sp_vu)
608                    {
609                        ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
610                                                                               src_strd,
611                                                                               qp_q,
612                                                                               qp_p,
613                                                                               ps_pps->i1_pic_cr_qp_offset,
614                                                                               ps_pps->i1_pic_cb_qp_offset,
615                                                                               i1_tc_offset_div2,
616                                                                               filter_q,
617                                                                               filter_p);
618                    }
619                    else
620                    {
621                        ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
622                                                                               src_strd,
623                                                                               qp_p,
624                                                                               qp_q,
625                                                                               ps_pps->i1_pic_cb_qp_offset,
626                                                                               ps_pps->i1_pic_cr_qp_offset,
627                                                                               i1_tc_offset_div2,
628                                                                               filter_p,
629                                                                               filter_q);
630                    }
631                }
632
633                pu1_src += 4 * src_strd;
634                u4_bs = u4_bs >> 4;
635                row++;
636            }
637
638            pu4_vert_bs += (64 == ctb_size) ? 2 : 1;
639            pu1_src -= ((src_strd / 2) << log2_ctb_size);
640            pu1_src += 16;
641        }
642    }
643
644    /* Chroma Horizontal Edge */
645
646    if(0 == i4_is_last_ctb_y)
647    {
648
649        /* Left CTB's slice header */
650        slice_header_t *ps_slice_hdr_left;
651        {
652            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
653            if(i4_is_last_ctb_x)
654                cur_ctb_indx += 1;
655            ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
656        }
657
658        pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
659        pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
660
661        /** Deblocking is done on a shifted CTB -
662         * Vertical edge processing is done by shifting the CTB up by four pixels (8 here beacuse UV are interleaved) */
663        pu1_src -= 8;
664        for(row = 0; row < ctb_size / 16; row++)
665        {
666            /* BS for the row - Last column is excluded and the left column is included*/
667            u4_bs = pu4_horz_bs[0] << 2;
668
669            if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
670            {
671                /** Picking the last BS of the previous CTB corresponding to the same row
672                * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
673                */
674                UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
675                UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> ((1 << (log2_ctb_size - 1)) - 2);
676                u4_bs |= u4_left_bs & 3;
677            }
678
679            /* Every alternate boundary strength value is used for chroma */
680            u4_bs &= 0x22222222;
681
682            for(col = 0; col < ctb_size / 8;)
683            {
684                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
685
686                bs_tz = CTZ(u4_bs) >> 2;
687                if(0 != bs_tz)
688                {
689                    u4_bs = u4_bs >> (bs_tz << 2);
690
691                    if((col + bs_tz) >= (ctb_size / 8))
692                        pu1_src += 8 * (ctb_size / 8 - col);
693                    else
694                        pu1_src += 8 * bs_tz;
695
696                    col += bs_tz;
697                    continue;
698                }
699
700                if(0 == col)
701                {
702                    i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
703
704                    if(0 == row)
705                    {
706                        qp_p = u4_qp_const_in_ctb[0] ?
707                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
708                                        pu1_qp[-qp_strd - 1];
709                    }
710                    else
711                    {
712                        qp_p = u4_qp_const_in_ctb[2] ?
713                                        pu1_qp[-ctb_size / 8] :
714                                        pu1_qp[(2 * row - 1) * qp_strd - 1];
715                    }
716
717                    qp_q = u4_qp_const_in_ctb[2] ?
718                                    pu1_qp[-ctb_size / 8] :
719                                    pu1_qp[(2 * row) * qp_strd - 1];
720                }
721                else
722                {
723                    if(0 == row)
724                    {
725                        qp_p = u4_qp_const_in_ctb[1] ?
726                                        pu1_qp[-ctb_size / 8 * qp_strd] :
727                                        pu1_qp[col - 1 - qp_strd];
728                    }
729                    else
730                    {
731                        qp_p = u4_qp_const_in_ctb[3] ?
732                                        pu1_qp[0] :
733                                        pu1_qp[(col - 1) +  (2 * row - 1) * qp_strd];
734                    }
735
736                    qp_q = u4_qp_const_in_ctb[3] ?
737                                    pu1_qp[0] :
738                                    pu1_qp[(col - 1) + 2 * row * qp_strd];
739                }
740
741                filter_p = (pu2_ctb_no_loop_filter_flag[row << 1] >> col) & 1;
742                filter_q = (pu2_ctb_no_loop_filter_flag[(row << 1) + 1] >> col) & 1;
743                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
744                filter_p = !filter_p;
745                filter_q = !filter_q;
746
747                if(filter_p || filter_q)
748                {
749                    ASSERT(1 == ((u4_bs & 3) >> 1));
750                    DUMP_DEBLK_CHROMA_HORZ(pu1_src, src_strd,
751                                           u4_bs & 3, qp_p, qp_q,
752                                           ps_pps->i1_pic_cb_qp_offset,
753                                           ps_pps->i1_pic_cr_qp_offset,
754                                           ps_slice_hdr->i1_tc_offset_div2,
755                                           filter_p, filter_q);
756                    if(chroma_yuv420sp_vu)
757                    {
758                        ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
759                                                                               src_strd,
760                                                                               qp_q,
761                                                                               qp_p,
762                                                                               ps_pps->i1_pic_cr_qp_offset,
763                                                                               ps_pps->i1_pic_cb_qp_offset,
764                                                                               i1_tc_offset_div2,
765                                                                               filter_q,
766                                                                               filter_p);
767                    }
768                    else
769                    {
770                        ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
771                                                                               src_strd,
772                                                                               qp_p,
773                                                                               qp_q,
774                                                                               ps_pps->i1_pic_cb_qp_offset,
775                                                                               ps_pps->i1_pic_cr_qp_offset,
776                                                                               i1_tc_offset_div2,
777                                                                               filter_p,
778                                                                               filter_q);
779                    }
780                }
781
782                pu1_src += 8;
783                u4_bs = u4_bs >> 4;
784                col++;
785            }
786
787            pu4_horz_bs += (64 == ctb_size) ? 2 : 1;
788            pu1_src -= ctb_size;
789            pu1_src += 8 * src_strd;
790
791        }
792    }
793}
794