1/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19 *******************************************************************************
20 * @file
21 *  ihevc_boundary_strength.c
22 *
23 * @brief
24 *  Contains functions for computing boundary strength
25 *
26 * @author
27 *  Harish
28 *
29 * @par List of Functions:
30 *
31 * @remarks
32 *  None
33 *
34 *******************************************************************************
35 */
36/*****************************************************************************/
37/* File Includes                                                             */
38/*****************************************************************************/
39#include <stdio.h>
40#include <stddef.h>
41#include <stdlib.h>
42#include <string.h>
43
44#include "ihevc_typedefs.h"
45#include "iv.h"
46#include "ivd.h"
47#include "ihevcd_cxa.h"
48#include "ithread.h"
49
50#include "ihevc_defs.h"
51#include "ihevc_debug.h"
52#include "ihevc_defs.h"
53#include "ihevc_structs.h"
54#include "ihevc_macros.h"
55#include "ihevc_platform_macros.h"
56#include "ihevc_cabac_tables.h"
57
58#include "ihevc_error.h"
59#include "ihevc_common_tables.h"
60
61#include "ihevcd_trace.h"
62#include "ihevcd_defs.h"
63#include "ihevcd_function_selector.h"
64#include "ihevcd_structs.h"
65#include "ihevcd_error.h"
66#include "ihevcd_nal.h"
67#include "ihevcd_bitstream.h"
68#include "ihevcd_job_queue.h"
69#include "ihevcd_utils.h"
70#include "ihevcd_profile.h"
71
72/*****************************************************************************/
73/* Function Prototypes                                                       */
74/*****************************************************************************/
75
76
77#define SET_NGBHR_ALL_AVAIL(avail)          avail = 0x1F;
78
79#define SET_NGBHR_BOTLEFT_NOTAVAIL(avail)   avail &= ~0x10;
80#define SET_NGBHR_LEFT_NOTAVAIL(avail)      avail &= ~0x8;
81#define SET_NGBHR_TOPLEFT_NOTAVAIL(avail)   avail &= ~0x4;
82#define SET_NGBHR_TOP_NOTAVAIL(avail)       avail &= ~0x2;
83#define SET_NGBHR_TOPRIGHT_NOTAVAIL(avail)  avail &= ~0x1;
84
85WORD32 ihevcd_pu_boundary_strength(pu_t *ps_pu,
86                                   pu_t *ps_ngbr_pu)
87{
88    WORD32 i4_bs;
89    UWORD32 l0_ref_pic_buf_id, l1_ref_pic_buf_id;
90    UWORD32 ngbr_l0_ref_pic_buf_id, ngbr_l1_ref_pic_buf_id;
91
92    WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1;
93    WORD16 i2_ngbr_mv_x0, i2_ngbr_mv_y0, i2_ngbr_mv_x1, i2_ngbr_mv_y1;
94
95    WORD32 num_mv, ngbr_num_mv;
96
97    num_mv = (PRED_BI == ps_pu->b2_pred_mode) ? 2 : 1;
98    ngbr_num_mv = (PRED_BI == ps_ngbr_pu->b2_pred_mode) ? 2 : 1;
99
100    l0_ref_pic_buf_id = ps_pu->mv.i1_l0_ref_pic_buf_id;
101    l1_ref_pic_buf_id = ps_pu->mv.i1_l1_ref_pic_buf_id;
102    ngbr_l0_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l0_ref_pic_buf_id;
103    ngbr_l1_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l1_ref_pic_buf_id;
104
105
106    i2_mv_x0 = ps_pu->mv.s_l0_mv.i2_mvx;
107    i2_mv_y0 = ps_pu->mv.s_l0_mv.i2_mvy;
108    i2_mv_x1 = ps_pu->mv.s_l1_mv.i2_mvx;
109    i2_mv_y1 = ps_pu->mv.s_l1_mv.i2_mvy;
110
111    i2_ngbr_mv_x0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvx;
112    i2_ngbr_mv_y0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvy;
113    i2_ngbr_mv_x1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvx;
114    i2_ngbr_mv_y1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvy;
115
116
117    /* If two motion vectors are used */
118    if((2 == num_mv) &&
119            (2 == ngbr_num_mv))
120    {
121        if((l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id) ||
122                (l0_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id))
123        {
124            if(l0_ref_pic_buf_id != l1_ref_pic_buf_id) /* Different L0 and L1 */
125            {
126                if(l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id)
127                {
128                    i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x0) < 4) &&
129                            (ABS(i2_mv_y0 - i2_ngbr_mv_y0) < 4) &&
130                            (ABS(i2_mv_x1 - i2_ngbr_mv_x1) < 4) &&
131                            (ABS(i2_mv_y1 - i2_ngbr_mv_y1) < 4) ? 0 : 1;
132                }
133                else
134                {
135                    i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x1) < 4) &&
136                            (ABS(i2_mv_y0 - i2_ngbr_mv_y1) < 4) &&
137                            (ABS(i2_mv_x1 - i2_ngbr_mv_x0) < 4) &&
138                            (ABS(i2_mv_y1 - i2_ngbr_mv_y0) < 4) ? 0 : 1;
139                }
140            }
141            else /* Same L0 and L1 */
142            {
143                i4_bs = ((ABS(i2_mv_x0 - i2_ngbr_mv_x0) >= 4) ||
144                         (ABS(i2_mv_y0 - i2_ngbr_mv_y0) >= 4) ||
145                         (ABS(i2_mv_x1 - i2_ngbr_mv_x1) >= 4) ||
146                         (ABS(i2_mv_y1 - i2_ngbr_mv_y1) >= 4)) &&
147                                ((ABS(i2_mv_x0 - i2_ngbr_mv_x1) >= 4) ||
148                                 (ABS(i2_mv_y0 - i2_ngbr_mv_y1) >= 4) ||
149                                 (ABS(i2_mv_x1 - i2_ngbr_mv_x0) >= 4) ||
150                                 (ABS(i2_mv_y1 - i2_ngbr_mv_y0) >= 4)) ? 1 : 0;
151            }
152        }
153        else /* If the reference pictures used are different */
154        {
155            i4_bs = 1;
156        }
157    }
158
159    /* If one motion vector is used in both PUs */
160    else if((1 == num_mv) &&
161            (1 == ngbr_num_mv))
162    {
163        WORD16 i2_mv_x, i2_mv_y;
164        WORD16 i2_ngbr_mv_x, i2_ngbr_mv_y;
165        UWORD32 ref_pic_buf_id, ngbr_ref_pic_buf_id;
166
167        if(PRED_L0 == ps_pu->b2_pred_mode)
168        {
169            i2_mv_x = i2_mv_x0;
170            i2_mv_y = i2_mv_y0;
171            ref_pic_buf_id = l0_ref_pic_buf_id;
172        }
173        else
174        {
175            i2_mv_x = i2_mv_x1;
176            i2_mv_y = i2_mv_y1;
177            ref_pic_buf_id = l1_ref_pic_buf_id;
178        }
179
180        if(PRED_L0 == ps_ngbr_pu->b2_pred_mode)
181        {
182            i2_ngbr_mv_x = i2_ngbr_mv_x0;
183            i2_ngbr_mv_y = i2_ngbr_mv_y0;
184            ngbr_ref_pic_buf_id = ngbr_l0_ref_pic_buf_id;
185        }
186        else
187        {
188            i2_ngbr_mv_x = i2_ngbr_mv_x1;
189            i2_ngbr_mv_y = i2_ngbr_mv_y1;
190            ngbr_ref_pic_buf_id = ngbr_l1_ref_pic_buf_id;
191        }
192
193        i4_bs = (ref_pic_buf_id == ngbr_ref_pic_buf_id) &&
194                (ABS(i2_mv_x - i2_ngbr_mv_x) < 4)  &&
195                (ABS(i2_mv_y - i2_ngbr_mv_y) < 4) ? 0 : 1;
196    }
197
198    /* If the no. of motion vectors is not the same */
199    else
200    {
201        i4_bs = 1;
202    }
203
204
205    return i4_bs;
206}
207
208/* QP is also populated in the same function */
209WORD32 ihevcd_ctb_boundary_strength_islice(bs_ctxt_t *ps_bs_ctxt)
210{
211    pps_t *ps_pps;
212    sps_t *ps_sps;
213    tu_t *ps_tu;
214    UWORD32 *pu4_vert_bs;
215    UWORD32 *pu4_horz_bs;
216    WORD32 bs_strd;
217    WORD32 vert_bs0_tmp;
218    WORD32 horz_bs0_tmp;
219    UWORD8 *pu1_qp;
220    WORD32 qp_strd;
221    UWORD32 u4_qp_const_in_ctb;
222    WORD32 ctb_indx;
223    WORD32 i4_tu_cnt;
224    WORD32 log2_ctb_size;
225    WORD32 ctb_size;
226
227    WORD8 i1_loop_filter_across_tiles_enabled_flag;
228    WORD8 i1_loop_filter_across_slices_enabled_flag;
229
230    WORD32 i;
231
232    PROFILE_DISABLE_BOUNDARY_STRENGTH();
233
234    ps_pps = ps_bs_ctxt->ps_pps;
235    ps_sps = ps_bs_ctxt->ps_sps;
236    i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
237    i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
238    i4_tu_cnt = ps_bs_ctxt->i4_ctb_tu_cnt;
239
240    log2_ctb_size = ps_sps->i1_log2_ctb_size;
241    ctb_size = (1 << log2_ctb_size);
242
243    /* strides are in units of number of bytes */
244    /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
245    bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
246
247    pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
248                    (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
249                    ps_bs_ctxt->i4_ctb_y * bs_strd);
250    pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
251                    (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
252                    ps_bs_ctxt->i4_ctb_y * bs_strd);
253
254    /* ctb_size/8 elements per CTB */
255    qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
256    pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
257
258    ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
259    u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
260
261    vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
262    horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
263
264    /* ctb_size/8 is the number of edges per CTB
265     * ctb_size/4 is the number of BS values needed per edge
266     * divided by 8 for the number of bytes
267     * 2 is the number of bits needed for each BS value */
268/*
269    memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) / 8 * 2 );
270    memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2 );
271*/
272    memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + ((ctb_size >> 5) << 1));
273    memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
274
275    /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
276    if(0 != ps_bs_ctxt->i4_ctb_x)
277    {
278        pu4_vert_bs[0] |= vert_bs0_tmp;
279    }
280
281    /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
282    if(0 != ps_bs_ctxt->i4_ctb_y)
283    {
284        pu4_horz_bs[0] |= horz_bs0_tmp;
285    }
286
287    ps_tu = ps_bs_ctxt->ps_tu;
288
289    /* Populating the QP array - if const_qp_in_ctb flag is one, set only the first element */
290    if(u4_qp_const_in_ctb)
291        pu1_qp[0] = ps_tu->b7_qp;
292
293    for(i = 0; i < i4_tu_cnt; i++)
294    {
295        WORD32 start_pos_x;
296        WORD32 start_pos_y;
297        WORD32 tu_size;
298
299
300        UWORD32 u4_bs;
301        ps_tu = ps_bs_ctxt->ps_tu + i;
302
303        /* start_pos_x and start_pos_y are in units of min TU size (4x4) */
304        start_pos_x = ps_tu->b4_pos_x;
305        start_pos_y = ps_tu->b4_pos_y;
306
307        tu_size = 1 << (ps_tu->b3_size + 2);
308        tu_size >>= 2; /* TU size divided by 4 */
309
310        u4_bs = DUP_LSB_10(tu_size);
311
312        /* Only if the current edge falls on 8 pixel grid set BS */
313        if(0 == (start_pos_x & 1))
314        {
315            WORD32 shift;
316            shift = start_pos_y * 2;
317            /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
318             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
319             *  and deblocking is done on 8x8 grid
320             */
321            if(6 != log2_ctb_size)
322                shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
323            pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
324        }
325        /* Only if the current edge falls on 8 pixel grid set BS */
326        if(0 == (start_pos_y & 1))
327        {
328            WORD32 shift;
329            shift = start_pos_x * 2;
330            /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
331             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
332             *  and deblocking is done on 8x8 grid
333             */
334            if(6 != log2_ctb_size)
335                shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
336            pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
337        }
338
339        /* Populating the QP array */
340        if(0 == u4_qp_const_in_ctb)
341        {
342            if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
343            {
344                WORD32 row, col;
345                for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
346                {
347                    for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
348                    {
349                        pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
350                    }
351                }
352            }
353        }
354
355    }
356    {
357        /*Determine if the slice is dependent, and is its left neighbor belongs to the same slice, in a different tile*/
358        UWORD32 ctb_addr;
359        WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
360        /* If left neighbor is not available, then set BS for entire first column to zero */
361        if(!ps_pps->i1_tiles_enabled_flag)
362        {
363            if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
364                            (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
365                            (0 == ps_bs_ctxt->i4_ctb_x))
366            {
367                pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
368            }
369        }
370        else
371        {
372            //If across-tiles is disabled
373            if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
374            {
375                pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
376            }
377            else
378            {
379                ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
380                slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
381                if(ps_bs_ctxt->i4_ctb_x)
382                {
383                    ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
384                    left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
385                }
386                /*If the 1st slice in a new tile is a dependent slice*/
387                if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
388                {
389                    /* Removed reduntant checks */
390                    if((0 == i1_loop_filter_across_slices_enabled_flag && (
391                                    ((slice_idx != left_slice_idx) && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
392                                    ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) ||
393                                    (0 == ps_bs_ctxt->i4_ctb_x))
394                    {
395                        pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
396                    }
397                }
398            }
399        }
400
401        ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
402        slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
403        if(ps_bs_ctxt->i4_ctb_y)
404        {
405            ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
406            top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
407        }
408
409        /* If top neighbor is not available, then set BS for entire first row to zero */
410        /* Removed reduntant checks */
411        if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
412                        || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
413                        || (0 == ps_bs_ctxt->i4_ctb_y))
414        {
415            pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
416        }
417    }
418
419    /**
420     *  Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
421     *   (They might have been set to  non zero values because of CBF of the current CTB)
422     *   This block might not be needed for I slices*/
423    {
424        WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
425        WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
426        if(num_rows_remaining < (ctb_size >> 3))
427        {
428            /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
429             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
430             *  and deblocking is done on 8x8 grid
431             */
432            WORD32 offset;
433            offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
434            if(6 != log2_ctb_size)
435                offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
436
437            memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
438        }
439
440        if(num_cols_remaining < (ctb_size >> 3))
441        {
442            /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
443             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
444             *  and deblocking is done on 8x8 grid
445             */
446
447            WORD32 offset;
448            offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
449            if(6 != log2_ctb_size)
450                offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
451
452            memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
453        }
454    }
455
456    return 0;
457}
458WORD32 ihevcd_ctb_boundary_strength_pbslice(bs_ctxt_t *ps_bs_ctxt)
459{
460    sps_t *ps_sps;
461    pps_t *ps_pps;
462    WORD32 cur_ctb_idx, next_ctb_idx = 0;
463    WORD32 i4_tu_cnt;
464    WORD32 i4_pu_cnt;
465    tu_t *ps_tu;
466
467    UWORD32 *pu4_vert_bs;
468    UWORD32 *pu4_horz_bs;
469    WORD32 bs_strd;
470    WORD32 vert_bs0_tmp;
471    WORD32 horz_bs0_tmp;
472    UWORD8 *pu1_qp;
473    WORD32 qp_strd;
474    UWORD32 u4_qp_const_in_ctb;
475    WORD32 ctb_indx;
476    WORD32 log2_ctb_size;
477    WORD32 ctb_size;
478
479    WORD32 i;
480    WORD8 i1_loop_filter_across_tiles_enabled_flag;
481    WORD8 i1_loop_filter_across_slices_enabled_flag;
482
483    PROFILE_DISABLE_BOUNDARY_STRENGTH();
484
485    ps_sps = ps_bs_ctxt->ps_sps;
486    ps_pps = ps_bs_ctxt->ps_pps;
487
488    log2_ctb_size = ps_sps->i1_log2_ctb_size;
489    ctb_size = (1 << log2_ctb_size);
490
491    /* strides are in units of number of bytes */
492    /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
493    bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
494
495    pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
496                    (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
497                    ps_bs_ctxt->i4_ctb_y * bs_strd);
498    pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
499                    (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
500                    ps_bs_ctxt->i4_ctb_y * bs_strd);
501
502    vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
503    horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
504
505    ps_tu = ps_bs_ctxt->ps_tu;
506
507    /* ctb_size/8 elements per CTB */
508    qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
509    pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
510
511    ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
512    u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
513
514    i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
515    i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
516
517    /* ctb_size/8 is the number of edges per CTB
518     * ctb_size/4 is the number of BS values needed per edge
519     * divided by 8 for the number of bytes
520     * 2 is the number of bits needed for each BS value */
521/*
522    memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) * 2 / 8 );
523    memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) * 2 / 8 );
524*/
525    memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + (ctb_size >> 4));
526    memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
527
528    /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
529    if(0 != ps_bs_ctxt->i4_ctb_x)
530    {
531        pu4_vert_bs[0] |= vert_bs0_tmp;
532    }
533
534    /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
535    if(0 != ps_bs_ctxt->i4_ctb_y)
536    {
537        pu4_horz_bs[0] |= horz_bs0_tmp;
538    }
539    /* pu4_horz_bs[bs_strd / 4] corresponds to pu4_horz_bs[0] of the bottom CTB */
540    *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) = 0;
541
542    cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
543                    + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
544    next_ctb_idx = ps_bs_ctxt->i4_next_tu_ctb_cnt;
545    if(1 == ps_bs_ctxt->ps_codec->i4_num_cores)
546    {
547        i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx % RESET_TU_BUF_NCTB];
548    }
549    else
550    {
551        i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx];
552    }
553
554    ps_tu = ps_bs_ctxt->ps_tu;
555    if(u4_qp_const_in_ctb)
556        pu1_qp[0] = ps_tu->b7_qp;
557
558    /* For all TUs in the CTB For left and top edges, check if there are coded coefficients on either sides of the edge */
559    for(i = 0; i < i4_tu_cnt; i++)
560    {
561        WORD32 start_pos_x;
562        WORD32 start_pos_y;
563        WORD32 end_pos_x;
564        WORD32 end_pos_y;
565        WORD32 tu_size;
566        UWORD32 u4_bs;
567        WORD32 intra_flag;
568        UWORD8 *pu1_pic_intra_flag;
569
570        ps_tu = ps_bs_ctxt->ps_tu + i;
571
572        start_pos_x = ps_tu->b4_pos_x;
573        start_pos_y = ps_tu->b4_pos_y;
574
575        tu_size = 1 << (ps_tu->b3_size + 2);
576        tu_size >>= 2;
577
578        end_pos_x = start_pos_x + tu_size;
579        end_pos_y = start_pos_y + tu_size;
580
581        {
582            WORD32 tu_abs_x = (ps_bs_ctxt->i4_ctb_x << log2_ctb_size) + (start_pos_x << 2);
583            WORD32 tu_abs_y = (ps_bs_ctxt->i4_ctb_y << log2_ctb_size) + (start_pos_y << 2);
584
585            WORD32 numbytes_row =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
586
587            pu1_pic_intra_flag = ps_bs_ctxt->ps_codec->pu1_pic_intra_flag;
588            pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
589            pu1_pic_intra_flag += (tu_abs_x >> 6);
590
591            intra_flag = *pu1_pic_intra_flag;
592            intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
593        }
594        if(intra_flag)
595        {
596            u4_bs = DUP_LSB_10(tu_size);
597
598            /* Only if the current edge falls on 8 pixel grid set BS */
599            if(0 == (start_pos_x & 1))
600            {
601                WORD32 shift;
602                shift = start_pos_y * 2;
603                /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
604                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
605                 *  and deblocking is done on 8x8 grid
606                 */
607                if(6 != log2_ctb_size)
608                    shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
609                pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
610            }
611            /* Only if the current edge falls on 8 pixel grid set BS */
612            if(0 == (start_pos_y & 1))
613            {
614                WORD32 shift;
615                shift = start_pos_x * 2;
616                /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
617                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
618                 *  and deblocking is done on 8x8 grid
619                 */
620                if(6 != log2_ctb_size)
621                    shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
622                pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
623            }
624        }
625
626
627        /* If the current TU is coded then set both top edge and left edge BS to 1 and go to next TU */
628        if(ps_tu->b1_y_cbf)
629        {
630            u4_bs = DUP_LSB_01(tu_size);
631
632            /* Only if the current edge falls on 8 pixel grid set BS */
633            if(0 == (start_pos_x & 1))
634            {
635                WORD32 shift;
636                shift = start_pos_y * 2;
637                /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
638                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
639                 *  and deblocking is done on 8x8 grid
640                 */
641                if(6 != log2_ctb_size)
642                    shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
643                pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
644            }
645            /* Only if the current edge falls on 8 pixel grid set BS */
646            if(0 == (start_pos_y & 1))
647            {
648                WORD32 shift;
649                shift = start_pos_x * 2;
650                /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
651                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
652                 *  and deblocking is done on 8x8 grid
653                 */
654                if(6 != log2_ctb_size)
655                    shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
656                pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
657            }
658            /* Only if the current edge falls on 8 pixel grid set BS */
659            if(0 == (end_pos_x & 1))
660            {
661                if(!(ctb_size / 8 == (end_pos_x >> 1) && ps_bs_ctxt->i4_ctb_x == ps_sps->i2_pic_wd_in_ctb - 1))
662                {
663                    WORD32 shift;
664                    shift = start_pos_y * 2;
665                    shift += (((end_pos_x >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
666                    pu4_vert_bs[end_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
667                }
668            }
669            /* Only if the current edge falls on 8 pixel grid set BS */
670            if(0 == (end_pos_y & 1))
671            {
672                /* If end_pos_y corresponds to the bottom of the CTB, write to pu4_horz_bs[0] of the bottom CTB */
673                if(ctb_size / 8 == (end_pos_y >> 1))
674                {
675                    *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) |= (u4_bs << (start_pos_x * 2));
676                }
677                else
678                {
679                    WORD32 shift;
680                    shift = start_pos_x * 2;
681                    shift += (((end_pos_y >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
682                    pu4_horz_bs[end_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
683                }
684            }
685        }
686
687        if(0 == u4_qp_const_in_ctb)
688        {
689            if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
690            {
691                WORD32 row, col;
692                for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
693                {
694                    for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
695                    {
696                        pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
697                    }
698                }
699            }
700        }
701    }
702
703    /* For all PUs in the CTB,
704    For left and top edges, compute BS */
705
706    cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
707                    + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
708
709    {
710        WORD32 next_ctb_idx;
711        next_ctb_idx = ps_bs_ctxt->i4_next_pu_ctb_cnt;
712        i4_pu_cnt = ps_bs_ctxt->pu4_pic_pu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_pu_idx[cur_ctb_idx];
713    }
714
715    for(i = 0; i < i4_pu_cnt; i++)
716    {
717        WORD32 start_pos_x;
718        WORD32 start_pos_y;
719        WORD32 end_pos_x;
720        WORD32 end_pos_y;
721        WORD32 pu_wd, pu_ht;
722        UWORD32 u4_bs;
723        pu_t *ps_pu = ps_bs_ctxt->ps_pu + i;
724        pu_t *ps_ngbr_pu;
725        UWORD32 u4_ngbr_pu_indx;
726
727        start_pos_x = ps_pu->b4_pos_x;
728        start_pos_y = ps_pu->b4_pos_y;
729
730        pu_wd = (ps_pu->b4_wd + 1);
731        pu_ht = (ps_pu->b4_ht + 1);
732
733        end_pos_x = start_pos_x + pu_wd;
734        end_pos_y = start_pos_y + pu_ht;
735
736        /* If the current PU is intra, set Boundary strength as 2 for both top and left edge */
737        /* Need not mask the BS to zero even if it was set to 1 already since BS 2 and 3 are assumed to be the same in leaf level functions */
738        if(ps_pu->b1_intra_flag)
739        {
740            u4_bs = DUP_LSB_10(pu_ht);
741
742            /* Only if the current edge falls on 8 pixel grid set BS */
743            if(0 == (start_pos_x & 1))
744            {
745                WORD32 shift;
746                shift = start_pos_y * 2;
747                /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
748                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
749                 *  and deblocking is done on 8x8 grid
750                 */
751                if(6 != log2_ctb_size)
752                    shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
753                pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
754            }
755
756            u4_bs = DUP_LSB_10(pu_wd);
757
758            /* Only if the current edge falls on 8 pixel grid set BS */
759            if(0 == (start_pos_y & 1))
760            {
761                WORD32 shift;
762                shift = start_pos_x * 2;
763                /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
764                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
765                 *  and deblocking is done on 8x8 grid
766                 */
767                if(6 != log2_ctb_size)
768                    shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
769                pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
770            }
771        }
772
773        else
774        {
775            /* Vertical edge */
776            /* Process only if the edge is not a frame edge */
777            if(0 != ps_bs_ctxt->i4_ctb_x + start_pos_x)
778            {
779                do
780                {
781                    WORD32 pu_ngbr_ht;
782                    WORD32 min_pu_ht;
783                    WORD32 ngbr_end_pos_y;
784                    UWORD32 ngbr_pu_idx_strd;
785                    ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
786                    u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y + 1) * ngbr_pu_idx_strd + (start_pos_x)];
787                    ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
788
789                    pu_ngbr_ht = ps_ngbr_pu->b4_ht + 1;
790                    ngbr_end_pos_y = ps_ngbr_pu->b4_pos_y + pu_ngbr_ht;
791
792                    min_pu_ht = MIN(ngbr_end_pos_y, end_pos_y) - start_pos_y;
793
794                    if(ps_ngbr_pu->b1_intra_flag)
795                    {
796                        u4_bs = DUP_LSB_10(min_pu_ht);
797
798                        /* Only if the current edge falls on 8 pixel grid set BS */
799                        if(0 == (start_pos_x & 1))
800                        {
801                            WORD32 shift;
802                            shift = start_pos_y * 2;
803                            /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
804                             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
805                             *  and deblocking is done on 8x8 grid
806                             */
807                            if(6 != log2_ctb_size)
808                                shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
809                            pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
810                        }
811                    }
812                    else
813                    {
814                        u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
815                        if(u4_bs)
816                        {
817                            u4_bs = DUP_LSB_01(min_pu_ht);
818                            if(0 == (start_pos_x & 1))
819                            {
820                                WORD32 shift;
821                                shift = start_pos_y * 2;
822                                /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
823                                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
824                                 *  and deblocking is done on 8x8 grid
825                                 */
826                                if(6 != log2_ctb_size)
827                                    shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
828                                pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
829                            }
830                        }
831                    }
832
833                    pu_ht -= min_pu_ht;
834                    start_pos_y += min_pu_ht;
835                }while(pu_ht > 0);
836
837                /* Reinitialising since the values are updated in the previous loop */
838                pu_ht = ps_pu->b4_ht + 1;
839                start_pos_y = ps_pu->b4_pos_y;
840            }
841
842            /* Horizontal edge */
843            /* Process only if the edge is not a frame edge */
844            if(0 != ps_bs_ctxt->i4_ctb_y + start_pos_y)
845            {
846                do
847                {
848                    WORD32 pu_ngbr_wd;
849                    WORD32 min_pu_wd;
850                    WORD32 ngbr_end_pos_x;
851                    UWORD32 ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
852                    u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y)*ngbr_pu_idx_strd + (start_pos_x + 1)];
853                    ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
854
855                    pu_ngbr_wd = ps_ngbr_pu->b4_wd + 1;
856                    ngbr_end_pos_x = ps_ngbr_pu->b4_pos_x + pu_ngbr_wd;
857
858                    min_pu_wd = MIN(ngbr_end_pos_x, end_pos_x) - start_pos_x;
859
860                    if(ps_ngbr_pu->b1_intra_flag)
861                    {
862                        u4_bs = DUP_LSB_10(min_pu_wd);
863
864                        /* Only if the current edge falls on 8 pixel grid set BS */
865                        if(0 == (start_pos_y & 1))
866                        {
867                            WORD32 shift;
868                            shift = start_pos_x * 2;
869                            /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
870                             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
871                             *  and deblocking is done on 8x8 grid
872                             */
873                            if(6 != log2_ctb_size)
874                                shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
875                            pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
876                        }
877                    }
878                    else
879                    {
880                        u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
881                        if(u4_bs)
882                        {
883                            u4_bs = DUP_LSB_01(min_pu_wd);
884
885                            /* Only if the current edge falls on 8 pixel grid set BS */
886                            if(0 == (start_pos_y & 1))
887                            {
888                                WORD32 shift;
889                                shift = start_pos_x * 2;
890                                /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
891                                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
892                                 *  and deblocking is done on 8x8 grid
893                                 */
894                                if(6 != log2_ctb_size)
895                                    shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
896                                pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
897                            }
898                        }
899                    }
900
901                    pu_wd -= min_pu_wd;
902                    start_pos_x += min_pu_wd;
903                }while(pu_wd > 0);
904
905                /* Reinitialising since the values are updated in the previous loop */
906                pu_wd = ps_pu->b4_wd + 1;
907                start_pos_x = ps_pu->b4_pos_x;
908            }
909        }
910    }
911
912    {
913        /* If left neighbor is not available, then set BS for entire first column to zero */
914        UWORD32 ctb_addr;
915        WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
916
917        if(!ps_pps->i1_tiles_enabled_flag)
918        {
919            if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
920                            (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
921                            (0 == ps_bs_ctxt->i4_ctb_x))
922            {
923                pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
924            }
925        }
926        else
927        {
928            if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
929            {
930                pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
931            }
932            else
933            {
934
935                ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
936                slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
937
938                if(ps_bs_ctxt->i4_ctb_x)
939                {
940                    ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
941                    left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
942                }
943
944                if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
945                {
946                    /* Removed reduntant checks */
947                    if((0 == i1_loop_filter_across_slices_enabled_flag && (
948                                    (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
949                                    ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) || (0 == ps_bs_ctxt->i4_ctb_x))
950                    {
951                        pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
952                    }
953                }
954            }
955        }
956
957        ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
958        slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
959        if(ps_bs_ctxt->i4_ctb_y)
960        {
961            ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
962            top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
963        }
964        /* If top neighbor is not available, then set BS for entire first row to zero */
965        /* Removed reduntant checks */
966        if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
967                        || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
968                        || (0 == ps_bs_ctxt->i4_ctb_y))
969        {
970            pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
971        }
972    }
973
974    /**
975     *  Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
976     *   (They might have set to  non zero values because of CBF of the current CTB)*/
977    {
978        WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
979        WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
980        if(num_rows_remaining < (ctb_size >> 3))
981        {
982            /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
983             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
984             *  and deblocking is done on 8x8 grid
985             */
986            WORD32 offset;
987            offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
988            if(6 != log2_ctb_size)
989                offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
990
991            memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
992        }
993
994        if(num_cols_remaining < (ctb_size >> 3))
995        {
996            /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
997             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
998             *  and deblocking is done on 8x8 grid
999             */
1000
1001            WORD32 offset;
1002            offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
1003            if(6 != log2_ctb_size)
1004                offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
1005
1006            memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
1007        }
1008    }
1009    return 0;
1010}
1011