1/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19 *******************************************************************************
20 * @file
21 *  ihevc_sao.c
22 *
23 * @brief
24 *  Contains function definitions for sample adaptive offset process
25 *
26 * @author
27 *  Srinivas T
28 *
29 * @par List of Functions:
30 *
31 * @remarks
32 *  None
33 *
34 *******************************************************************************
35 */
36
37#include <stdio.h>
38#include <stddef.h>
39#include <stdlib.h>
40#include <string.h>
41#include <assert.h>
42
43#include "ihevc_typedefs.h"
44#include "iv.h"
45#include "ivd.h"
46#include "ihevcd_cxa.h"
47#include "ithread.h"
48
49#include "ihevc_defs.h"
50#include "ihevc_debug.h"
51#include "ihevc_defs.h"
52#include "ihevc_structs.h"
53#include "ihevc_macros.h"
54#include "ihevc_platform_macros.h"
55#include "ihevc_cabac_tables.h"
56#include "ihevc_sao.h"
57#include "ihevc_mem_fns.h"
58
59#include "ihevc_error.h"
60#include "ihevc_common_tables.h"
61
62#include "ihevcd_trace.h"
63#include "ihevcd_defs.h"
64#include "ihevcd_function_selector.h"
65#include "ihevcd_structs.h"
66#include "ihevcd_error.h"
67#include "ihevcd_nal.h"
68#include "ihevcd_bitstream.h"
69#include "ihevcd_job_queue.h"
70#include "ihevcd_utils.h"
71
72#include "ihevc_deblk.h"
73#include "ihevc_deblk_tables.h"
74#include "ihevcd_profile.h"
75#include "ihevcd_sao.h"
76#include "ihevcd_debug.h"
77
78#define SAO_SHIFT_CTB    8
79
80/**
81 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82 */
83void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84{
85    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86    UWORD8 *pu1_src_luma;
87    UWORD8 *pu1_src_chroma;
88    WORD32 src_strd;
89    WORD32 ctb_size;
90    WORD32 log2_ctb_size;
91    sps_t *ps_sps;
92    sao_t *ps_sao;
93    WORD32 row, col;
94    UWORD8 au1_avail_luma[8];
95    UWORD8 au1_avail_chroma[8];
96    WORD32 i;
97    UWORD8 *pu1_src_top_luma;
98    UWORD8 *pu1_src_top_chroma;
99    UWORD8 *pu1_src_left_luma;
100    UWORD8 *pu1_src_left_chroma;
101    UWORD8 au1_src_top_right[2];
102    UWORD8 au1_src_bot_left[2];
103    UWORD8 *pu1_no_loop_filter_flag;
104    WORD32 loop_filter_strd;
105
106    WORD8 ai1_offset_y[5];
107    WORD8 ai1_offset_cb[5];
108    WORD8 ai1_offset_cr[5];
109
110    PROFILE_DISABLE_SAO();
111
112    ai1_offset_y[0] = 0;
113    ai1_offset_cb[0] = 0;
114    ai1_offset_cr[0] = 0;
115
116    ps_sps = ps_sao_ctxt->ps_sps;
117    log2_ctb_size = ps_sps->i1_log2_ctb_size;
118    ctb_size = (1 << log2_ctb_size);
119    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
120    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
121    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
122
123    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
124    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
125
126    /* Current CTB */
127    {
128        WORD32 sao_wd_luma;
129        WORD32 sao_wd_chroma;
130        WORD32 sao_ht_luma;
131        WORD32 sao_ht_chroma;
132
133        WORD32 remaining_rows;
134        WORD32 remaining_cols;
135
136        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
137        sao_wd_luma = MIN(ctb_size, remaining_cols);
138        sao_wd_chroma = MIN(ctb_size, remaining_cols);
139
140        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
141        sao_ht_luma = MIN(ctb_size, remaining_rows);
142        sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
143
144        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
145        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
146        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
147        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
148
149        pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
150                        ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
151                        ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
152
153        ai1_offset_y[1] = ps_sao->b4_y_offset_1;
154        ai1_offset_y[2] = ps_sao->b4_y_offset_2;
155        ai1_offset_y[3] = ps_sao->b4_y_offset_3;
156        ai1_offset_y[4] = ps_sao->b4_y_offset_4;
157
158        ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
159        ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
160        ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
161        ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
162
163        ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
164        ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
165        ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
166        ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
167
168        for(i = 0; i < 8; i++)
169        {
170            au1_avail_luma[i] = 255;
171            au1_avail_chroma[i] = 255;
172        }
173
174
175        if(0 == ps_sao_ctxt->i4_ctb_x)
176        {
177            au1_avail_luma[0] = 0;
178            au1_avail_luma[4] = 0;
179            au1_avail_luma[6] = 0;
180
181            au1_avail_chroma[0] = 0;
182            au1_avail_chroma[4] = 0;
183            au1_avail_chroma[6] = 0;
184        }
185
186        if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
187        {
188            au1_avail_luma[1] = 0;
189            au1_avail_luma[5] = 0;
190            au1_avail_luma[7] = 0;
191
192            au1_avail_chroma[1] = 0;
193            au1_avail_chroma[5] = 0;
194            au1_avail_chroma[7] = 0;
195        }
196
197        if(0 == ps_sao_ctxt->i4_ctb_y)
198        {
199            au1_avail_luma[2] = 0;
200            au1_avail_luma[4] = 0;
201            au1_avail_luma[5] = 0;
202
203            au1_avail_chroma[2] = 0;
204            au1_avail_chroma[4] = 0;
205            au1_avail_chroma[5] = 0;
206        }
207
208        if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
209        {
210            au1_avail_luma[3] = 0;
211            au1_avail_luma[6] = 0;
212            au1_avail_luma[7] = 0;
213
214            au1_avail_chroma[3] = 0;
215            au1_avail_chroma[6] = 0;
216            au1_avail_chroma[7] = 0;
217        }
218
219
220        if(0 == ps_sao->b3_y_type_idx)
221        {
222            /* Update left, top and top-left */
223            for(row = 0; row < sao_ht_luma; row++)
224            {
225                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
226            }
227            ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
228
229            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
230
231        }
232        else
233        {
234            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
235            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
236            WORD32 tmp_strd = MAX_CTB_SIZE + 2;
237            WORD32 no_loop_filter_enabled = 0;
238
239            /* Check the loop filter flags and copy the original values for back up */
240            {
241                UWORD32 u4_no_loop_filter_flag;
242                WORD32 min_cu = 8;
243                UWORD8 *pu1_src_tmp = pu1_src_luma;
244
245                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
246                {
247                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
248                                    ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
249                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
250
251                    if(u4_no_loop_filter_flag)
252                    {
253                        WORD32 tmp_wd = sao_wd_luma;
254                        no_loop_filter_enabled = 1;
255                        while(tmp_wd > 0)
256                        {
257                            if(CTZ(u4_no_loop_filter_flag))
258                            {
259                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
260                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
261                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
262                                tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
263                            }
264                            else
265                            {
266                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
267                                {
268                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
269                                    {
270                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
271                                    }
272                                }
273
274                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
275                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
276                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
277                                tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
278                            }
279                        }
280
281                        pu1_src_tmp -= sao_wd_luma;
282                    }
283
284                    pu1_src_tmp += min_cu * src_strd;
285                    pu1_src_copy += min_cu * tmp_strd;
286                }
287            }
288
289            if(1 == ps_sao->b3_y_type_idx)
290            {
291                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
292                                                                          src_strd,
293                                                                          pu1_src_left_luma,
294                                                                          pu1_src_top_luma,
295                                                                          ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
296                                                                          ps_sao->b5_y_band_pos,
297                                                                          ai1_offset_y,
298                                                                          sao_wd_luma,
299                                                                          sao_ht_luma);
300            }
301            else // if(2 <= ps_sao->b3_y_type_idx)
302            {
303                au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
304                au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
305                ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
306                                                                  src_strd,
307                                                                  pu1_src_left_luma,
308                                                                  pu1_src_top_luma,
309                                                                  ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
310                                                                  au1_src_top_right,
311                                                                  au1_src_bot_left,
312                                                                  au1_avail_luma,
313                                                                  ai1_offset_y,
314                                                                  sao_wd_luma,
315                                                                  sao_ht_luma);
316            }
317
318            /* Check the loop filter flags and copy the original values back if they are set */
319            if(no_loop_filter_enabled)
320            {
321                UWORD32 u4_no_loop_filter_flag;
322                WORD32 min_cu = 8;
323                UWORD8 *pu1_src_tmp = pu1_src_luma;
324
325                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
326                {
327                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
328                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
329
330                    if(u4_no_loop_filter_flag)
331                    {
332                        WORD32 tmp_wd = sao_wd_luma;
333                        while(tmp_wd > 0)
334                        {
335                            if(CTZ(u4_no_loop_filter_flag))
336                            {
337                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
338                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
339                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
340                                tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
341                            }
342                            else
343                            {
344                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
345                                {
346                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
347                                    {
348                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
349                                    }
350                                }
351
352                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
353                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
354                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
355                                tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
356                            }
357                        }
358
359                        pu1_src_tmp -= sao_wd_luma;
360                    }
361
362                    pu1_src_tmp += min_cu * src_strd;
363                    pu1_src_copy += min_cu * tmp_strd;
364                }
365            }
366
367        }
368
369        if(0 == ps_sao->b3_cb_type_idx)
370        {
371            for(row = 0; row < sao_ht_chroma; row++)
372            {
373                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
374                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
375            }
376            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
377            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
378
379            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
380        }
381        else
382        {
383            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
384            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
385            WORD32 tmp_strd = MAX_CTB_SIZE + 4;
386            WORD32 no_loop_filter_enabled = 0;
387
388            /* Check the loop filter flags and copy the original values for back up */
389            {
390                UWORD32 u4_no_loop_filter_flag;
391                WORD32 min_cu = 4;
392                UWORD8 *pu1_src_tmp = pu1_src_chroma;
393
394                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
395                {
396                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
397                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
398
399                    if(u4_no_loop_filter_flag)
400                    {
401                        WORD32 tmp_wd = sao_wd_chroma;
402                        no_loop_filter_enabled = 1;
403                        while(tmp_wd > 0)
404                        {
405                            if(CTZ(u4_no_loop_filter_flag))
406                            {
407                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
408                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
409                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
410                                tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
411                            }
412                            else
413                            {
414                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
415                                {
416                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
417                                    {
418                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
419                                    }
420                                }
421
422                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
423                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
424                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
425                                tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
426                            }
427                        }
428
429                        pu1_src_tmp -= sao_wd_chroma;
430                    }
431
432                    pu1_src_tmp += min_cu * src_strd;
433                    pu1_src_copy += min_cu * tmp_strd;
434                }
435            }
436
437            if(1 == ps_sao->b3_cb_type_idx)
438            {
439                ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
440                                                                            src_strd,
441                                                                            pu1_src_left_chroma,
442                                                                            pu1_src_top_chroma,
443                                                                            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
444                                                                            ps_sao->b5_cb_band_pos,
445                                                                            ps_sao->b5_cr_band_pos,
446                                                                            ai1_offset_cb,
447                                                                            ai1_offset_cr,
448                                                                            sao_wd_chroma,
449                                                                            sao_ht_chroma
450                                                                           );
451            }
452            else // if(2 <= ps_sao->b3_cb_type_idx)
453            {
454                au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
455                au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
456                au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
457                au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
458                ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
459                                                                     src_strd,
460                                                                     pu1_src_left_chroma,
461                                                                     pu1_src_top_chroma,
462                                                                     ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
463                                                                     au1_src_top_right,
464                                                                     au1_src_bot_left,
465                                                                     au1_avail_chroma,
466                                                                     ai1_offset_cb,
467                                                                     ai1_offset_cr,
468                                                                     sao_wd_chroma,
469                                                                     sao_ht_chroma);
470            }
471
472            /* Check the loop filter flags and copy the original values back if they are set */
473            if(no_loop_filter_enabled)
474            {
475                UWORD32 u4_no_loop_filter_flag;
476                WORD32 min_cu = 4;
477                UWORD8 *pu1_src_tmp = pu1_src_chroma;
478
479                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
480                {
481                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
482                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
483
484                    if(u4_no_loop_filter_flag)
485                    {
486                        WORD32 tmp_wd = sao_wd_chroma;
487                        while(tmp_wd > 0)
488                        {
489                            if(CTZ(u4_no_loop_filter_flag))
490                            {
491                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
492                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
493                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
494                                tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
495                            }
496                            else
497                            {
498                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
499                                {
500                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
501                                    {
502                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
503                                    }
504                                }
505
506                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
507                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
508                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
509                                tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
510                            }
511                        }
512
513                        pu1_src_tmp -= sao_wd_chroma;
514                    }
515
516                    pu1_src_tmp += min_cu * src_strd;
517                    pu1_src_copy += min_cu * tmp_strd;
518                }
519            }
520
521        }
522
523    }
524}
525
526void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
527{
528    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
529    UWORD8 *pu1_src_luma;
530    UWORD8 *pu1_src_chroma;
531    WORD32 src_strd;
532    WORD32 ctb_size;
533    WORD32 log2_ctb_size;
534    sps_t *ps_sps;
535    sao_t *ps_sao;
536    pps_t *ps_pps;
537    slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
538    tile_t *ps_tile;
539    UWORD16 *pu1_slice_idx;
540    UWORD16 *pu1_tile_idx;
541    WORD32 row, col;
542    UWORD8 au1_avail_luma[8];
543    UWORD8 au1_avail_chroma[8];
544    UWORD8 au1_tile_slice_boundary[8];
545    UWORD8 au4_ilf_across_tile_slice_enable[8];
546    WORD32 i;
547    UWORD8 *pu1_src_top_luma;
548    UWORD8 *pu1_src_top_chroma;
549    UWORD8 *pu1_src_left_luma;
550    UWORD8 *pu1_src_left_chroma;
551    UWORD8 au1_src_top_right[2];
552    UWORD8 au1_src_bot_left[2];
553    UWORD8 *pu1_no_loop_filter_flag;
554    UWORD8 *pu1_src_backup_luma;
555    UWORD8 *pu1_src_backup_chroma;
556    WORD32 backup_strd;
557    WORD32 loop_filter_strd;
558
559    WORD32 no_loop_filter_enabled_luma = 0;
560    WORD32 no_loop_filter_enabled_chroma = 0;
561    UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
562    UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
563    UWORD8 *pu1_sao_src_luma_top_left_ctb;
564    UWORD8 *pu1_sao_src_chroma_top_left_ctb;
565    UWORD8 *pu1_sao_src_top_left_luma_top_right;
566    UWORD8 *pu1_sao_src_top_left_chroma_top_right;
567    UWORD8  u1_sao_src_top_left_luma_bot_left;
568    UWORD8  *pu1_sao_src_top_left_luma_bot_left;
569    UWORD8 *au1_sao_src_top_left_chroma_bot_left;
570    UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
571
572    WORD8 ai1_offset_y[5];
573    WORD8 ai1_offset_cb[5];
574    WORD8 ai1_offset_cr[5];
575    WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
576
577    PROFILE_DISABLE_SAO();
578
579    ai1_offset_y[0] = 0;
580    ai1_offset_cb[0] = 0;
581    ai1_offset_cr[0] = 0;
582
583    ps_sps = ps_sao_ctxt->ps_sps;
584    ps_pps = ps_sao_ctxt->ps_pps;
585    ps_tile = ps_sao_ctxt->ps_tile;
586
587    log2_ctb_size = ps_sps->i1_log2_ctb_size;
588    ctb_size = (1 << log2_ctb_size);
589    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
590    ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
591    ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
592
593    pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
594    pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
595    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
596    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
597
598    /*Stores the left value for each row ctbs- Needed for column tiles*/
599    pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
600    pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
601    pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
602    pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
603    u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
604    pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
605    au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
606    pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
607    pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
608    pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
609
610    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
611    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
612    backup_strd = 2 * MAX_CTB_SIZE;
613
614    DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
615
616    {
617        /* Check the loop filter flags and copy the original values for back up */
618        /* Luma */
619        if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
620        {
621            UWORD32 u4_no_loop_filter_flag;
622            WORD32 loop_filter_bit_pos;
623            WORD32 log2_min_cu = 3;
624            WORD32 min_cu = (1 << log2_min_cu);
625            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
626            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
627            WORD32 sao_blk_wd = ctb_size;
628            WORD32 remaining_rows;
629            WORD32 remaining_cols;
630
631            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
632            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
633            if(remaining_rows <= SAO_SHIFT_CTB)
634                sao_blk_ht += remaining_rows;
635            if(remaining_cols <= SAO_SHIFT_CTB)
636                sao_blk_wd += remaining_cols;
637
638            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
639            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
640
641            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
642
643            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
644                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
645            if(ps_sao_ctxt->i4_ctb_x > 0)
646                loop_filter_bit_pos -= 1;
647
648            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
649                            (loop_filter_bit_pos >> 3);
650
651            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
652                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
653            {
654                WORD32 tmp_wd = sao_blk_wd;
655
656                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
657                                (loop_filter_bit_pos & 7);
658                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
659
660                if(u4_no_loop_filter_flag)
661                {
662                    no_loop_filter_enabled_luma = 1;
663                    while(tmp_wd > 0)
664                    {
665                        if(CTZ(u4_no_loop_filter_flag))
666                        {
667                            pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
668                            pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
669                            tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
670                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
671                        }
672                        else
673                        {
674                            for(row = 0; row < min_cu; row++)
675                            {
676                                for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
677                                {
678                                    pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
679                                }
680                            }
681                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
682                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
683                            tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
684                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
685                        }
686                    }
687
688                    pu1_src_tmp_luma -= sao_blk_wd;
689                    pu1_src_backup_luma -= sao_blk_wd;
690                }
691
692                pu1_src_tmp_luma += (src_strd << log2_min_cu);
693                pu1_src_backup_luma += (backup_strd << log2_min_cu);
694            }
695        }
696
697        /* Chroma */
698        if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
699        {
700            UWORD32 u4_no_loop_filter_flag;
701            WORD32 loop_filter_bit_pos;
702            WORD32 log2_min_cu = 3;
703            WORD32 min_cu = (1 << log2_min_cu);
704            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
705            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
706            WORD32 sao_blk_wd = ctb_size;
707            WORD32 remaining_rows;
708            WORD32 remaining_cols;
709
710            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
711            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
712            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
713                sao_blk_ht += remaining_rows;
714            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
715                sao_blk_wd += remaining_cols;
716
717            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
718            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
719
720            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
721
722            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
723                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
724            if(ps_sao_ctxt->i4_ctb_x > 0)
725                loop_filter_bit_pos -= 2;
726
727            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
728                            (loop_filter_bit_pos >> 3);
729
730            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
731                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
732            {
733                WORD32 tmp_wd = sao_blk_wd;
734
735                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
736                                (loop_filter_bit_pos & 7);
737                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
738
739                if(u4_no_loop_filter_flag)
740                {
741                    no_loop_filter_enabled_chroma = 1;
742                    while(tmp_wd > 0)
743                    {
744                        if(CTZ(u4_no_loop_filter_flag))
745                        {
746                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
747                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
748                            tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
749                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
750                        }
751                        else
752                        {
753                            for(row = 0; row < min_cu / 2; row++)
754                            {
755                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
756                                {
757                                    pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
758                                }
759                            }
760
761                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
762                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
763                            tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
764                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
765                        }
766                    }
767
768                    pu1_src_tmp_chroma -= sao_blk_wd;
769                    pu1_src_backup_chroma -= sao_blk_wd;
770                }
771
772                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
773                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
774            }
775        }
776    }
777
778    DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
779
780    /* Top-left CTB */
781    if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
782    {
783        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
784        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
785        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
786        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
787
788        WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
789        WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
790        WORD32 au4_idx_tl[8], idx_tl;
791
792
793        pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
794        pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
795        ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
796        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
797        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
798        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
799        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
800
801        if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
802        {
803            if(0 == ps_sao->b3_y_type_idx)
804            {
805                /* Update left, top and top-left */
806                for(row = 0; row < sao_ht_luma; row++)
807                {
808                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
809                }
810                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
811
812                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
813
814
815            }
816
817            else if(1 == ps_sao->b3_y_type_idx)
818            {
819                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
820                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
821                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
822                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
823
824                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
825                                                                          src_strd,
826                                                                          pu1_src_left_luma,
827                                                                          pu1_src_top_luma,
828                                                                          pu1_sao_src_luma_top_left_ctb,
829                                                                          ps_sao->b5_y_band_pos,
830                                                                          ai1_offset_y,
831                                                                          sao_wd_luma,
832                                                                          sao_ht_luma
833                                                                         );
834            }
835
836            else // if(2 <= ps_sao->b3_y_type_idx)
837            {
838                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
839                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
840                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
841                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
842
843                for(i = 0; i < 8; i++)
844                {
845                    au1_avail_luma[i] = 255;
846                    au1_tile_slice_boundary[i] = 0;
847                    au4_idx_tl[i] = 0;
848                    au4_ilf_across_tile_slice_enable[i] = 1;
849                }
850
851                /******************************************************************
852                 * Derive the  Top-left CTB's neighbor pixel's slice indices.
853                 *
854                 *          TL_T
855                 *       4  _2__5________
856                 *     0   |    |       |
857                 *    TL_L | TL | 1 TL_R|
858                 *         |____|_______|____
859                 *        6|TL_D|7      |    |
860                 *         | 3  |       |    |
861                 *         |____|_______|    |
862                 *              |            |
863                 *              |            |
864                 *              |____________|
865                 *
866                 *****************************************************************/
867
868                /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
869                {
870                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
871                    {
872                        {
873                            /*Assuming that sao shift is uniform along x and y directions*/
874                            if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
875                            {
876                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
877                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
878                            }
879                            else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
880                            {
881                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
882                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
883                            }
884                            ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
885                            ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
886
887                            ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
888                            ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
889
890                            ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
891                            ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
892
893                            ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
894                            ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
895                        }
896
897                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
898                        {
899                            /*Calculate slice indices for neighbor pixels*/
900                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
901                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
902                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
903                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
904                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
905                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
906
907                            if((0 == (1 << log2_ctb_size) - sao_wd_luma))
908                            {
909                                if(ps_sao_ctxt->i4_ctb_x == 1)
910                                {
911                                    au4_idx_tl[6] = -1;
912                                    au4_idx_tl[4] = -1;
913                                }
914                                else
915                                {
916                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
917                                }
918                                if(ps_sao_ctxt->i4_ctb_y == 1)
919                                {
920                                    au4_idx_tl[5] = -1;
921                                    au4_idx_tl[4] = -1;
922                                }
923                                else
924                                {
925                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
926                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
927                                }
928                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
929                            }
930
931                            /* Verify that the neighbor ctbs dont cross pic boundary.
932                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
933                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
934                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
935                             * the respective pixel's flags are checked
936                             */
937
938                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
939                            {
940                                au4_ilf_across_tile_slice_enable[4] = 0;
941                                au4_ilf_across_tile_slice_enable[6] = 0;
942                            }
943                            else
944                            {
945                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
946                            }
947                            if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
948                            {
949                                au4_ilf_across_tile_slice_enable[5] = 0;
950                                au4_ilf_across_tile_slice_enable[4] = 0;
951                            }
952                            else
953                            {
954                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
955                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
956                            }
957                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
958                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
959                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
960                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
961                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
962
963                            /*
964                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
965                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
966                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
967                             * the respective pixel's flags are checked
968                             */
969                            for(i = 0; i < 8; i++)
970                            {
971                                /*Sets the edges that lie on the slice/tile boundary*/
972                                if(au4_idx_tl[i] != idx_tl)
973                                {
974                                    au1_tile_slice_boundary[i] = 1;
975                                }
976                                else
977                                {
978                                    au4_ilf_across_tile_slice_enable[i] = 1;
979                                }
980                            }
981
982                            ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
983                        }
984
985                        if(ps_pps->i1_tiles_enabled_flag)
986                        {
987                            /* Calculate availability flags at slice boundary */
988                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
989                            {
990                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
991                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
992                                {
993                                    /*Set the boundary arrays*/
994                                    /*Calculate tile indices for neighbor pixels*/
995                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
996                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
997                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
998                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
999                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1000                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1001
1002                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1003                                    {
1004                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1005                                        {
1006                                            au4_idx_tl[6] = -1;
1007                                            au4_idx_tl[4] = -1;
1008                                        }
1009                                        else
1010                                        {
1011                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1012                                        }
1013                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1014                                        {
1015                                            au4_idx_tl[5] = -1;
1016                                            au4_idx_tl[4] = -1;
1017                                        }
1018                                        else
1019                                        {
1020                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1021                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1022                                        }
1023                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1024                                    }
1025                                    for(i = 0; i < 8; i++)
1026                                    {
1027                                        /*Sets the edges that lie on the tile boundary*/
1028                                        if(au4_idx_tl[i] != idx_tl)
1029                                        {
1030                                            au1_tile_slice_boundary[i] |= 1;
1031                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1032                                        }
1033                                    }
1034                                }
1035                            }
1036                        }
1037
1038
1039                        /*Set availability flags based on tile and slice boundaries*/
1040                        for(i = 0; i < 8; i++)
1041                        {
1042                            /*Sets the edges that lie on the slice/tile boundary*/
1043                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1044                            {
1045                                au1_avail_luma[i] = 0;
1046                            }
1047                        }
1048                    }
1049                }
1050
1051                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1052                {
1053                    au1_avail_luma[0] = 0;
1054                    au1_avail_luma[4] = 0;
1055                    au1_avail_luma[6] = 0;
1056                }
1057
1058                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1059                {
1060                    au1_avail_luma[1] = 0;
1061                    au1_avail_luma[5] = 0;
1062                    au1_avail_luma[7] = 0;
1063                }
1064                //y==1 case
1065                if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1066                {
1067                    au1_avail_luma[2] = 0;
1068                    au1_avail_luma[4] = 0;
1069                    au1_avail_luma[5] = 0;
1070                }
1071                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1072                {
1073                    au1_avail_luma[3] = 0;
1074                    au1_avail_luma[6] = 0;
1075                    au1_avail_luma[7] = 0;
1076                }
1077
1078                {
1079                    au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1080                    u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1081                    ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1082                                                                      src_strd,
1083                                                                      pu1_src_left_luma,
1084                                                                      pu1_src_top_luma,
1085                                                                      pu1_sao_src_luma_top_left_ctb,
1086                                                                      au1_src_top_right,
1087                                                                      &u1_sao_src_top_left_luma_bot_left,
1088                                                                      au1_avail_luma,
1089                                                                      ai1_offset_y,
1090                                                                      sao_wd_luma,
1091                                                                      sao_ht_luma);
1092                }
1093            }
1094
1095        }
1096
1097        if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1098        {
1099            if(0 == ps_sao->b3_cb_type_idx)
1100            {
1101                for(row = 0; row < sao_ht_chroma; row++)
1102                {
1103                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1104                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1105                }
1106                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1107                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1108
1109                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1110
1111            }
1112
1113            else if(1 == ps_sao->b3_cb_type_idx)
1114            {
1115                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1116                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1117                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1118                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1119
1120                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1121                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1122                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1123                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1124
1125                if(chroma_yuv420sp_vu)
1126                {
1127                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1128                                                                                src_strd,
1129                                                                                pu1_src_left_chroma,
1130                                                                                pu1_src_top_chroma,
1131                                                                                pu1_sao_src_chroma_top_left_ctb,
1132                                                                                ps_sao->b5_cr_band_pos,
1133                                                                                ps_sao->b5_cb_band_pos,
1134                                                                                ai1_offset_cr,
1135                                                                                ai1_offset_cb,
1136                                                                                sao_wd_chroma,
1137                                                                                sao_ht_chroma
1138                                                                               );
1139                }
1140                else
1141                {
1142                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1143                                                                                src_strd,
1144                                                                                pu1_src_left_chroma,
1145                                                                                pu1_src_top_chroma,
1146                                                                                pu1_sao_src_chroma_top_left_ctb,
1147                                                                                ps_sao->b5_cb_band_pos,
1148                                                                                ps_sao->b5_cr_band_pos,
1149                                                                                ai1_offset_cb,
1150                                                                                ai1_offset_cr,
1151                                                                                sao_wd_chroma,
1152                                                                                sao_ht_chroma
1153                                                                               );
1154                }
1155            }
1156
1157            else // if(2 <= ps_sao->b3_cb_type_idx)
1158            {
1159                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1160                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1161                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1162                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1163
1164                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1165                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1166                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1167                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1168                for(i = 0; i < 8; i++)
1169                {
1170                    au1_avail_chroma[i] = 255;
1171                    au1_tile_slice_boundary[i] = 0;
1172                    au4_idx_tl[i] = 0;
1173                    au4_ilf_across_tile_slice_enable[i] = 1;
1174                }
1175                /*In case of slices*/
1176                {
1177                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1178                    {
1179                        if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1180                        {
1181                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1182                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1183                        }
1184                        else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1185                        {
1186                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1187                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1188                        }
1189                        ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1190                        ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1191
1192                        ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1193                        ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1194
1195                        ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
1196                        ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
1197
1198                        ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1199                        ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1200
1201                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1202                        {
1203
1204                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1205                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1206                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1207                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1208                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1209                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1210
1211                            if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1212                            {
1213                                if(ps_sao_ctxt->i4_ctb_x == 1)
1214                                {
1215                                    au4_idx_tl[6] = -1;
1216                                    au4_idx_tl[4] = -1;
1217                                }
1218                                else
1219                                {
1220                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1221                                }
1222                                if(ps_sao_ctxt->i4_ctb_y == 1)
1223                                {
1224                                    au4_idx_tl[5] = -1;
1225                                    au4_idx_tl[4] = -1;
1226                                }
1227                                else
1228                                {
1229                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1230                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1231                                }
1232                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1233                            }
1234
1235                            /* Verify that the neighbor ctbs don't cross pic boundary
1236                             * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1237                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1238                            {
1239                                au4_ilf_across_tile_slice_enable[4] = 0;
1240                                au4_ilf_across_tile_slice_enable[6] = 0;
1241                            }
1242                            else
1243                            {
1244                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1245                            }
1246                            if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1247                            {
1248                                au4_ilf_across_tile_slice_enable[5] = 0;
1249                                au4_ilf_across_tile_slice_enable[4] = 0;
1250                            }
1251                            else
1252                            {
1253                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1254                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1255                            }
1256                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1257                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1258                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1259                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1260                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1261                            /*
1262                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1263                             * of the pixel having a greater address is checked. Accordingly, set the availability flags
1264                             */
1265                            for(i = 0; i < 8; i++)
1266                            {
1267                                /*Sets the edges that lie on the slice/tile boundary*/
1268                                if(au4_idx_tl[i] != idx_tl)
1269                                {
1270                                    au1_tile_slice_boundary[i] = 1;
1271                                }
1272                                else
1273                                {
1274                                    au4_ilf_across_tile_slice_enable[i] = 1;
1275                                }
1276                            }
1277
1278                            /*Reset indices*/
1279                            for(i = 0; i < 8; i++)
1280                            {
1281                                au4_idx_tl[i] = 0;
1282                            }
1283                        }
1284                        if(ps_pps->i1_tiles_enabled_flag)
1285                        {
1286                            /* Calculate availability flags at slice boundary */
1287                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1288                            {
1289                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1290                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1291                                {
1292                                    /*Set the boundary arrays*/
1293                                    /*Calculate tile indices for neighbor pixels*/
1294                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1295                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1296                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1297                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1298                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1299                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1300
1301                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1302                                    {
1303                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1304                                        {
1305                                            au4_idx_tl[6] = -1;
1306                                            au4_idx_tl[4] = -1;
1307                                        }
1308                                        else
1309                                        {
1310                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1311                                        }
1312                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1313                                        {
1314                                            au4_idx_tl[5] = -1;
1315                                            au4_idx_tl[4] = -1;
1316                                        }
1317                                        else
1318                                        {
1319                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1320                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1321                                        }
1322                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1323                                    }
1324                                    for(i = 0; i < 8; i++)
1325                                    {
1326                                        /*Sets the edges that lie on the tile boundary*/
1327                                        if(au4_idx_tl[i] != idx_tl)
1328                                        {
1329                                            au1_tile_slice_boundary[i] |= 1;
1330                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1331                                        }
1332                                    }
1333                                }
1334                            }
1335                        }
1336
1337                        for(i = 0; i < 8; i++)
1338                        {
1339                            /*Sets the edges that lie on the slice/tile boundary*/
1340                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1341                            {
1342                                au1_avail_chroma[i] = 0;
1343                            }
1344                        }
1345                    }
1346                }
1347
1348                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1349                {
1350                    au1_avail_chroma[0] = 0;
1351                    au1_avail_chroma[4] = 0;
1352                    au1_avail_chroma[6] = 0;
1353                }
1354                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1355                {
1356                    au1_avail_chroma[1] = 0;
1357                    au1_avail_chroma[5] = 0;
1358                    au1_avail_chroma[7] = 0;
1359                }
1360
1361                if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1362                {
1363                    au1_avail_chroma[2] = 0;
1364                    au1_avail_chroma[4] = 0;
1365                    au1_avail_chroma[5] = 0;
1366                }
1367                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1368                {
1369                    au1_avail_chroma[3] = 0;
1370                    au1_avail_chroma[6] = 0;
1371                    au1_avail_chroma[7] = 0;
1372                }
1373
1374                {
1375                    au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1376                    au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1377                    au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1378                    au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1379                    if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1380                    {
1381                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1382                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1383                    }
1384
1385                    if(chroma_yuv420sp_vu)
1386                    {
1387                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1388                                                                             src_strd,
1389                                                                             pu1_src_left_chroma,
1390                                                                             pu1_src_top_chroma,
1391                                                                             pu1_sao_src_chroma_top_left_ctb,
1392                                                                             au1_src_top_right,
1393                                                                             au1_sao_src_top_left_chroma_bot_left,
1394                                                                             au1_avail_chroma,
1395                                                                             ai1_offset_cr,
1396                                                                             ai1_offset_cb,
1397                                                                             sao_wd_chroma,
1398                                                                             sao_ht_chroma);
1399                    }
1400                    else
1401                    {
1402                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1403                                                                             src_strd,
1404                                                                             pu1_src_left_chroma,
1405                                                                             pu1_src_top_chroma,
1406                                                                             pu1_sao_src_chroma_top_left_ctb,
1407                                                                             au1_src_top_right,
1408                                                                             au1_sao_src_top_left_chroma_bot_left,
1409                                                                             au1_avail_chroma,
1410                                                                             ai1_offset_cb,
1411                                                                             ai1_offset_cr,
1412                                                                             sao_wd_chroma,
1413                                                                             sao_ht_chroma);
1414                    }
1415                }
1416            }
1417        }
1418
1419        pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1420        pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1421        ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1422    }
1423
1424
1425    /* Top CTB */
1426    if((ps_sao_ctxt->i4_ctb_y > 0))
1427    {
1428        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1429        WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1430        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1431        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1432
1433        WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1434        WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1435        WORD32 au4_idx_t[8], idx_t;
1436
1437        WORD32 remaining_cols;
1438
1439        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1440        if(remaining_cols <= SAO_SHIFT_CTB)
1441        {
1442            sao_wd_luma += remaining_cols;
1443        }
1444        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1445        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1446        {
1447            sao_wd_chroma += remaining_cols;
1448        }
1449
1450        pu1_src_luma -= (sao_ht_luma * src_strd);
1451        pu1_src_chroma -= (sao_ht_chroma * src_strd);
1452        ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1453        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1454        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1455        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1456        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1457
1458        if(0 != sao_wd_luma)
1459        {
1460            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1461            {
1462                if(0 == ps_sao->b3_y_type_idx)
1463                {
1464                    /* Update left, top and top-left */
1465                    for(row = 0; row < sao_ht_luma; row++)
1466                    {
1467                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1468                    }
1469                    pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1470
1471                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1472
1473                }
1474
1475                else if(1 == ps_sao->b3_y_type_idx)
1476                {
1477                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1478                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1479                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1480                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1481
1482                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1483                                                                              src_strd,
1484                                                                              pu1_src_left_luma,
1485                                                                              pu1_src_top_luma,
1486                                                                              pu1_sao_src_luma_top_left_ctb,
1487                                                                              ps_sao->b5_y_band_pos,
1488                                                                              ai1_offset_y,
1489                                                                              sao_wd_luma,
1490                                                                              sao_ht_luma
1491                                                                             );
1492                }
1493
1494                else // if(2 <= ps_sao->b3_y_type_idx)
1495                {
1496                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1497                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1498                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1499                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1500
1501                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1502                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1503                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1504
1505                    for(i = 0; i < 8; i++)
1506                    {
1507
1508                        au4_ilf_across_tile_slice_enable[i] = 1;
1509                    }
1510                    /******************************************************************
1511                     * Derive the  Top-left CTB's neighbor pixel's slice indices.
1512                     *
1513                     *               T_T
1514                     *          ____________
1515                     *         |    |       |
1516                     *         | T_L|  T    |T_R
1517                     *         |    | ______|____
1518                     *         |    |  T_D  |    |
1519                     *         |    |       |    |
1520                     *         |____|_______|    |
1521                     *              |            |
1522                     *              |            |
1523                     *              |____________|
1524                     *
1525                     *****************************************************************/
1526
1527                    /*In case of slices*/
1528                    {
1529                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1530                        {
1531
1532                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1533                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1534
1535                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1536                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1537
1538                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1539                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1540
1541                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1542                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1543
1544                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1545                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1546
1547                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1548                            {
1549                                /*Calculate neighbor ctb slice indices*/
1550                                if(0 == ps_sao_ctxt->i4_ctb_x)
1551                                {
1552                                    au4_idx_t[0] = -1;
1553                                    au4_idx_t[6] = -1;
1554                                    au4_idx_t[4] = -1;
1555                                }
1556                                else
1557                                {
1558                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1559                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1560                                }
1561                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1562                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1563                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1564                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1565
1566                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1567                                if(0 == ps_sao_ctxt->i4_ctb_x)
1568                                {
1569                                    au4_ilf_across_tile_slice_enable[4] = 0;
1570                                    au4_ilf_across_tile_slice_enable[6] = 0;
1571                                    au4_ilf_across_tile_slice_enable[0] = 0;
1572                                }
1573                                else
1574                                {
1575                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1576                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1577                                }
1578
1579
1580
1581                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1582                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1583                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1584                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1585                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1586                                /*
1587                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1588                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1589                                 */
1590
1591                                for(i = 0; i < 8; i++)
1592                                {
1593                                    /*Sets the edges that lie on the slice/tile boundary*/
1594                                    if(au4_idx_t[i] != idx_t)
1595                                    {
1596                                        au1_tile_slice_boundary[i] = 1;
1597                                        /*Check for slice flag at such boundaries*/
1598                                    }
1599                                    else
1600                                    {
1601                                        au4_ilf_across_tile_slice_enable[i] = 1;
1602                                    }
1603                                }
1604                                /*Reset indices*/
1605                                for(i = 0; i < 8; i++)
1606                                {
1607                                    au4_idx_t[i] = 0;
1608                                }
1609                            }
1610
1611                            if(ps_pps->i1_tiles_enabled_flag)
1612                            {
1613                                /* Calculate availability flags at slice boundary */
1614                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1615                                {
1616                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1617                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1618                                    {
1619                                        /*Calculate neighbor ctb slice indices*/
1620                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1621                                        {
1622                                            au4_idx_t[0] = -1;
1623                                            au4_idx_t[6] = -1;
1624                                            au4_idx_t[4] = -1;
1625                                        }
1626                                        else
1627                                        {
1628                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1629                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1630                                        }
1631                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1632                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1633                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1634                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1635
1636                                        for(i = 0; i < 8; i++)
1637                                        {
1638                                            /*Sets the edges that lie on the tile boundary*/
1639                                            if(au4_idx_t[i] != idx_t)
1640                                            {
1641                                                au1_tile_slice_boundary[i] |= 1;
1642                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1643                                            }
1644                                        }
1645                                    }
1646                                }
1647                            }
1648
1649                            for(i = 0; i < 8; i++)
1650                            {
1651                                /*Sets the edges that lie on the slice/tile boundary*/
1652                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1653                                {
1654                                    au1_avail_luma[i] = 0;
1655                                }
1656                            }
1657                        }
1658                    }
1659
1660
1661                    if(0 == ps_sao_ctxt->i4_ctb_x)
1662                    {
1663                        au1_avail_luma[0] = 0;
1664                        au1_avail_luma[4] = 0;
1665                        au1_avail_luma[6] = 0;
1666                    }
1667
1668                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1669                    {
1670                        au1_avail_luma[1] = 0;
1671                        au1_avail_luma[5] = 0;
1672                        au1_avail_luma[7] = 0;
1673                    }
1674
1675                    if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1676                    {
1677                        au1_avail_luma[2] = 0;
1678                        au1_avail_luma[4] = 0;
1679                        au1_avail_luma[5] = 0;
1680                    }
1681
1682                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1683                    {
1684                        au1_avail_luma[3] = 0;
1685                        au1_avail_luma[6] = 0;
1686                        au1_avail_luma[7] = 0;
1687                    }
1688
1689                    {
1690                        au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1691                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1692                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1693                                                                          src_strd,
1694                                                                          pu1_src_left_luma,
1695                                                                          pu1_src_top_luma,
1696                                                                          pu1_sao_src_luma_top_left_ctb,
1697                                                                          au1_src_top_right,
1698                                                                          &u1_sao_src_top_left_luma_bot_left,
1699                                                                          au1_avail_luma,
1700                                                                          ai1_offset_y,
1701                                                                          sao_wd_luma,
1702                                                                          sao_ht_luma);
1703                    }
1704                }
1705            }
1706        }
1707
1708        if(0 != sao_wd_chroma)
1709        {
1710            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1711            {
1712                if(0 == ps_sao->b3_cb_type_idx)
1713                {
1714
1715                    for(row = 0; row < sao_ht_chroma; row++)
1716                    {
1717                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1718                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1719                    }
1720                    pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1721                    pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1722
1723                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1724
1725                }
1726
1727                else if(1 == ps_sao->b3_cb_type_idx)
1728                {
1729                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1730                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1731                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1732                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1733
1734                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1735                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1736                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1737                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1738
1739                    if(chroma_yuv420sp_vu)
1740                    {
1741                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1742                                                                                    src_strd,
1743                                                                                    pu1_src_left_chroma,
1744                                                                                    pu1_src_top_chroma,
1745                                                                                    pu1_sao_src_chroma_top_left_ctb,
1746                                                                                    ps_sao->b5_cr_band_pos,
1747                                                                                    ps_sao->b5_cb_band_pos,
1748                                                                                    ai1_offset_cr,
1749                                                                                    ai1_offset_cb,
1750                                                                                    sao_wd_chroma,
1751                                                                                    sao_ht_chroma
1752                                                                                   );
1753                    }
1754                    else
1755                    {
1756                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1757                                                                                    src_strd,
1758                                                                                    pu1_src_left_chroma,
1759                                                                                    pu1_src_top_chroma,
1760                                                                                    pu1_sao_src_chroma_top_left_ctb,
1761                                                                                    ps_sao->b5_cb_band_pos,
1762                                                                                    ps_sao->b5_cr_band_pos,
1763                                                                                    ai1_offset_cb,
1764                                                                                    ai1_offset_cr,
1765                                                                                    sao_wd_chroma,
1766                                                                                    sao_ht_chroma
1767                                                                                   );
1768                    }
1769                }
1770                else // if(2 <= ps_sao->b3_cb_type_idx)
1771                {
1772                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1773                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1774                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1775                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1776
1777                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1778                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1779                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1780                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1781
1782                    for(i = 0; i < 8; i++)
1783                    {
1784                        au1_avail_chroma[i] = 255;
1785                        au1_tile_slice_boundary[i] = 0;
1786                        au4_idx_t[i] = 0;
1787                        au4_ilf_across_tile_slice_enable[i] = 1;
1788                    }
1789
1790                    {
1791                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1792                        {
1793                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1794                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1795
1796                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1797                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1798
1799                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1800                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1801
1802                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1803                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1804
1805                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1806                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1807
1808                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1809                            {
1810                                if(0 == ps_sao_ctxt->i4_ctb_x)
1811                                {
1812                                    au4_idx_t[0] = -1;
1813                                    au4_idx_t[6] = -1;
1814                                    au4_idx_t[4] = -1;
1815                                }
1816                                else
1817                                {
1818                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1819                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1820                                }
1821                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1822                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1823                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1824                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1825
1826                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1827
1828                                if(0 == ps_sao_ctxt->i4_ctb_x)
1829                                {
1830                                    au4_ilf_across_tile_slice_enable[4] = 0;
1831                                    au4_ilf_across_tile_slice_enable[6] = 0;
1832                                    au4_ilf_across_tile_slice_enable[0] = 0;
1833                                }
1834                                else
1835                                {
1836                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1837                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1838                                }
1839
1840                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1841                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1842                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1843                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1844                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1845                                /*
1846                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1847                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1848                                 */
1849                                for(i = 0; i < 8; i++)
1850                                {
1851                                    /*Sets the edges that lie on the slice/tile boundary*/
1852                                    if(au4_idx_t[i] != idx_t)
1853                                    {
1854                                        au1_tile_slice_boundary[i] = 1;
1855                                    }
1856                                    else
1857                                    {
1858                                        /*Indicates that the neighbour belongs to same/dependent slice*/
1859                                        au4_ilf_across_tile_slice_enable[i] = 1;
1860                                    }
1861                                }
1862                                /*Reset indices*/
1863                                for(i = 0; i < 8; i++)
1864                                {
1865                                    au4_idx_t[i] = 0;
1866                                }
1867                            }
1868                            if(ps_pps->i1_tiles_enabled_flag)
1869                            {
1870                                /* Calculate availability flags at slice boundary */
1871                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1872                                {
1873                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1874                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1875                                    {
1876                                        /*Calculate neighbor ctb slice indices*/
1877                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1878                                        {
1879                                            au4_idx_t[0] = -1;
1880                                            au4_idx_t[6] = -1;
1881                                            au4_idx_t[4] = -1;
1882                                        }
1883                                        else
1884                                        {
1885                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1886                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1887                                        }
1888                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1889                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1890                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1891                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1892
1893                                        for(i = 0; i < 8; i++)
1894                                        {
1895                                            /*Sets the edges that lie on the tile boundary*/
1896                                            if(au4_idx_t[i] != idx_t)
1897                                            {
1898                                                au1_tile_slice_boundary[i] |= 1;
1899                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1900                                            }
1901                                        }
1902                                    }
1903                                }
1904                            }
1905                            for(i = 0; i < 8; i++)
1906                            {
1907                                /*Sets the edges that lie on the slice/tile boundary*/
1908                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1909                                {
1910                                    au1_avail_chroma[i] = 0;
1911                                }
1912                            }
1913
1914                        }
1915                    }
1916                    if(0 == ps_sao_ctxt->i4_ctb_x)
1917                    {
1918                        au1_avail_chroma[0] = 0;
1919                        au1_avail_chroma[4] = 0;
1920                        au1_avail_chroma[6] = 0;
1921                    }
1922
1923                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1924                    {
1925                        au1_avail_chroma[1] = 0;
1926                        au1_avail_chroma[5] = 0;
1927                        au1_avail_chroma[7] = 0;
1928                    }
1929
1930                    if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1931                    {
1932                        au1_avail_chroma[2] = 0;
1933                        au1_avail_chroma[4] = 0;
1934                        au1_avail_chroma[5] = 0;
1935                    }
1936
1937                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1938                    {
1939                        au1_avail_chroma[3] = 0;
1940                        au1_avail_chroma[6] = 0;
1941                        au1_avail_chroma[7] = 0;
1942                    }
1943
1944                    {
1945                        au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
1946                        au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
1947                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1948                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1949
1950                        if(chroma_yuv420sp_vu)
1951                        {
1952                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1953                                                                                 src_strd,
1954                                                                                 pu1_src_left_chroma,
1955                                                                                 pu1_src_top_chroma,
1956                                                                                 pu1_sao_src_chroma_top_left_ctb,
1957                                                                                 au1_src_top_right,
1958                                                                                 au1_sao_src_top_left_chroma_bot_left,
1959                                                                                 au1_avail_chroma,
1960                                                                                 ai1_offset_cr,
1961                                                                                 ai1_offset_cb,
1962                                                                                 sao_wd_chroma,
1963                                                                                 sao_ht_chroma);
1964                        }
1965                        else
1966                        {
1967                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1968                                                                                 src_strd,
1969                                                                                 pu1_src_left_chroma,
1970                                                                                 pu1_src_top_chroma,
1971                                                                                 pu1_sao_src_chroma_top_left_ctb,
1972                                                                                 au1_src_top_right,
1973                                                                                 au1_sao_src_top_left_chroma_bot_left,
1974                                                                                 au1_avail_chroma,
1975                                                                                 ai1_offset_cb,
1976                                                                                 ai1_offset_cr,
1977                                                                                 sao_wd_chroma,
1978                                                                                 sao_ht_chroma);
1979                        }
1980                    }
1981
1982                }
1983            }
1984        }
1985
1986        pu1_src_luma += sao_ht_luma * src_strd;
1987        pu1_src_chroma += sao_ht_chroma * src_strd;
1988        ps_sao += (ps_sps->i2_pic_wd_in_ctb);
1989    }
1990
1991    /* Left CTB */
1992    if(ps_sao_ctxt->i4_ctb_x > 0)
1993    {
1994        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
1995        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
1996        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
1997        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
1998
1999        WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2000        WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2001        WORD32 au4_idx_l[8], idx_l;
2002
2003        WORD32 remaining_rows;
2004        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2005        if(remaining_rows <= SAO_SHIFT_CTB)
2006        {
2007            sao_ht_luma += remaining_rows;
2008        }
2009        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2010        if(remaining_rows <= SAO_SHIFT_CTB)
2011        {
2012            sao_ht_chroma += remaining_rows;
2013        }
2014
2015        pu1_src_luma -= sao_wd_luma;
2016        pu1_src_chroma -= sao_wd_chroma;
2017        ps_sao -= 1;
2018        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2019        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2020        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2021        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2022
2023
2024        if(0 != sao_ht_luma)
2025        {
2026            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2027            {
2028                if(0 == ps_sao->b3_y_type_idx)
2029                {
2030                    /* Update left, top and top-left */
2031                    for(row = 0; row < sao_ht_luma; row++)
2032                    {
2033                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2034                    }
2035                    /*Update in next location*/
2036                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2037
2038                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2039
2040                }
2041
2042                else if(1 == ps_sao->b3_y_type_idx)
2043                {
2044                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2045                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2046                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2047                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2048
2049                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2050                                                                              src_strd,
2051                                                                              pu1_src_left_luma,
2052                                                                              pu1_src_top_luma,
2053                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2054                                                                              ps_sao->b5_y_band_pos,
2055                                                                              ai1_offset_y,
2056                                                                              sao_wd_luma,
2057                                                                              sao_ht_luma
2058                                                                             );
2059                }
2060
2061                else // if(2 <= ps_sao->b3_y_type_idx)
2062                {
2063                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2064                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2065                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2066                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2067
2068                    for(i = 0; i < 8; i++)
2069                    {
2070                        au1_avail_luma[i] = 255;
2071                        au1_tile_slice_boundary[i] = 0;
2072                        au4_idx_l[i] = 0;
2073                        au4_ilf_across_tile_slice_enable[i] = 1;
2074                    }
2075                    /******************************************************************
2076                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2077                     *
2078                     *
2079                     *          ____________
2080                     *         |    |       |
2081                     *         | L_T|       |
2082                     *         |____|_______|____
2083                     *         |    |       |    |
2084                     *     L_L |  L |  L_R  |    |
2085                     *         |____|_______|    |
2086                     *              |            |
2087                     *          L_D |            |
2088                     *              |____________|
2089                     *
2090                     *****************************************************************/
2091
2092                    /*In case of slices or tiles*/
2093                    {
2094                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2095                        {
2096                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2097                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2098
2099                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2100                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2101
2102                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2103                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2104
2105                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2106                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2107
2108                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2109                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2110
2111                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2112                            {
2113                                if(0 == ps_sao_ctxt->i4_ctb_y)
2114                                {
2115                                    au4_idx_l[2] = -1;
2116                                    au4_idx_l[4] = -1;
2117                                    au4_idx_l[5] = -1;
2118                                }
2119                                else
2120                                {
2121                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2122                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2123                                }
2124                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2125                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2126                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2127                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2128
2129                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
2130                                if(0 == ps_sao_ctxt->i4_ctb_y)
2131                                {
2132                                    au4_ilf_across_tile_slice_enable[2] = 0;
2133                                    au4_ilf_across_tile_slice_enable[4] = 0;
2134                                    au4_ilf_across_tile_slice_enable[5] = 0;
2135                                }
2136                                else
2137                                {
2138                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2139                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2140
2141                                }
2142                                //TODO: ILF flag checks for [0] and [6] is missing.
2143                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2144                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2145                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2146                                /*
2147                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2148                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2149                                 */
2150                                for(i = 0; i < 8; i++)
2151                                {
2152                                    /*Sets the edges that lie on the slice/tile boundary*/
2153                                    if(au4_idx_l[i] != idx_l)
2154                                    {
2155                                        au1_tile_slice_boundary[i] = 1;
2156                                    }
2157                                    else
2158                                    {
2159                                        au4_ilf_across_tile_slice_enable[i] = 1;
2160                                    }
2161                                }
2162                                /*Reset indices*/
2163                                for(i = 0; i < 8; i++)
2164                                {
2165                                    au4_idx_l[i] = 0;
2166                                }
2167                            }
2168
2169                            if(ps_pps->i1_tiles_enabled_flag)
2170                            {
2171                                /* Calculate availability flags at slice boundary */
2172                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2173                                {
2174                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2175                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2176                                    {
2177                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2178                                        {
2179                                            au4_idx_l[2] = -1;
2180                                            au4_idx_l[4] = -1;
2181                                            au4_idx_l[5] = -1;
2182                                        }
2183                                        else
2184                                        {
2185                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2186                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2187                                        }
2188
2189                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2190                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2191                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2192                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2193
2194                                        for(i = 0; i < 8; i++)
2195                                        {
2196                                            /*Sets the edges that lie on the slice/tile boundary*/
2197                                            if(au4_idx_l[i] != idx_l)
2198                                            {
2199                                                au1_tile_slice_boundary[i] |= 1;
2200                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2201                                            }
2202                                        }
2203                                    }
2204                                }
2205                            }
2206
2207                            for(i = 0; i < 8; i++)
2208                            {
2209                                /*Sets the edges that lie on the slice/tile boundary*/
2210                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2211                                {
2212                                    au1_avail_luma[i] = 0;
2213                                }
2214                            }
2215                        }
2216                    }
2217                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2218                    {
2219                        au1_avail_luma[0] = 0;
2220                        au1_avail_luma[4] = 0;
2221                        au1_avail_luma[6] = 0;
2222                    }
2223                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2224                    {
2225                        au1_avail_luma[1] = 0;
2226                        au1_avail_luma[5] = 0;
2227                        au1_avail_luma[7] = 0;
2228                    }
2229
2230                    if(0 == ps_sao_ctxt->i4_ctb_y)
2231                    {
2232                        au1_avail_luma[2] = 0;
2233                        au1_avail_luma[4] = 0;
2234                        au1_avail_luma[5] = 0;
2235                    }
2236
2237                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2238                    {
2239                        au1_avail_luma[3] = 0;
2240                        au1_avail_luma[6] = 0;
2241                        au1_avail_luma[7] = 0;
2242                    }
2243
2244                    {
2245                        au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2246                        u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2247                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2248                                                                          src_strd,
2249                                                                          pu1_src_left_luma,
2250                                                                          pu1_src_top_luma,
2251                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2252                                                                          au1_src_top_right,
2253                                                                          &u1_sao_src_top_left_luma_bot_left,
2254                                                                          au1_avail_luma,
2255                                                                          ai1_offset_y,
2256                                                                          sao_wd_luma,
2257                                                                          sao_ht_luma);
2258                    }
2259
2260                }
2261            }
2262        }
2263
2264        if(0 != sao_ht_chroma)
2265        {
2266            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
2267            {
2268                if(0 == ps_sao->b3_cb_type_idx)
2269                {
2270                    for(row = 0; row < sao_ht_chroma; row++)
2271                    {
2272                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2273                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2274                    }
2275                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2276                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2277
2278                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2279                }
2280
2281                else if(1 == ps_sao->b3_cb_type_idx)
2282                {
2283                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2284                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2285                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2286                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2287
2288                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2289                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2290                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2291                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2292
2293                    if(chroma_yuv420sp_vu)
2294                    {
2295                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2296                                                                                    src_strd,
2297                                                                                    pu1_src_left_chroma,
2298                                                                                    pu1_src_top_chroma,
2299                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2300                                                                                    ps_sao->b5_cr_band_pos,
2301                                                                                    ps_sao->b5_cb_band_pos,
2302                                                                                    ai1_offset_cr,
2303                                                                                    ai1_offset_cb,
2304                                                                                    sao_wd_chroma,
2305                                                                                    sao_ht_chroma
2306                                                                                   );
2307                    }
2308                    else
2309                    {
2310                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2311                                                                                    src_strd,
2312                                                                                    pu1_src_left_chroma,
2313                                                                                    pu1_src_top_chroma,
2314                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2315                                                                                    ps_sao->b5_cb_band_pos,
2316                                                                                    ps_sao->b5_cr_band_pos,
2317                                                                                    ai1_offset_cb,
2318                                                                                    ai1_offset_cr,
2319                                                                                    sao_wd_chroma,
2320                                                                                    sao_ht_chroma
2321                                                                                   );
2322                    }
2323                }
2324
2325                else // if(2 <= ps_sao->b3_cb_type_idx)
2326                {
2327                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2328                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2329                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2330                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2331
2332                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2333                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2334                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2335                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2336
2337                    for(i = 0; i < 8; i++)
2338                    {
2339                        au1_avail_chroma[i] = 255;
2340                        au1_tile_slice_boundary[i] = 0;
2341                        au4_idx_l[i] = 0;
2342                        au4_ilf_across_tile_slice_enable[i] = 1;
2343                    }
2344                    /*In case of slices*/
2345                    {
2346                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2347                        {
2348                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2349                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2350
2351                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2352                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2353
2354                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2355                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2356
2357                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2358                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2359
2360                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2361                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2362
2363                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2364                            {
2365                                if(0 == ps_sao_ctxt->i4_ctb_y)
2366                                {
2367                                    au4_idx_l[2] = -1;
2368                                    au4_idx_l[4] = -1;
2369                                    au4_idx_l[5] = -1;
2370                                }
2371                                else
2372                                {
2373                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2374                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2375                                }
2376                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2377                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2378                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2379                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2380
2381                                /*Verify that the neighbour ctbs dont cross pic boundary.*/
2382                                if(0 == ps_sao_ctxt->i4_ctb_y)
2383                                {
2384                                    au4_ilf_across_tile_slice_enable[2] = 0;
2385                                    au4_ilf_across_tile_slice_enable[4] = 0;
2386                                    au4_ilf_across_tile_slice_enable[5] = 0;
2387                                }
2388                                else
2389                                {
2390                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2391                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2392                                }
2393                                //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2394                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2395                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2396                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2397                                /*
2398                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2399                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2400                                 */
2401                                for(i = 0; i < 8; i++)
2402                                {
2403                                    /*Sets the edges that lie on the slice/tile boundary*/
2404                                    if(au4_idx_l[i] != idx_l)
2405                                    {
2406                                        au1_tile_slice_boundary[i] = 1;
2407                                    }
2408                                    else
2409                                    {
2410                                        au4_ilf_across_tile_slice_enable[i] = 1;
2411                                    }
2412                                }
2413                                /*Reset indices*/
2414                                for(i = 0; i < 8; i++)
2415                                {
2416                                    au4_idx_l[i] = 0;
2417                                }
2418                            }
2419                            if(ps_pps->i1_tiles_enabled_flag)
2420                            {
2421                                /* Calculate availability flags at slice boundary */
2422                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2423                                {
2424                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2425                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2426                                    {
2427                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2428                                        {
2429                                            au4_idx_l[2] = -1;
2430                                            au4_idx_l[4] = -1;
2431                                            au4_idx_l[5] = -1;
2432                                        }
2433                                        else
2434                                        {
2435                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2436                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2437                                        }
2438
2439                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2440                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2441                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2442                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2443
2444                                        for(i = 0; i < 8; i++)
2445                                        {
2446                                            /*Sets the edges that lie on the slice/tile boundary*/
2447                                            if(au4_idx_l[i] != idx_l)
2448                                            {
2449                                                au1_tile_slice_boundary[i] |= 1;
2450                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2451                                            }
2452                                        }
2453                                    }
2454                                }
2455                            }
2456                            for(i = 0; i < 8; i++)
2457                            {
2458                                /*Sets the edges that lie on the slice/tile boundary*/
2459                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2460                                {
2461                                    au1_avail_chroma[i] = 0;
2462                                }
2463                            }
2464                        }
2465                    }
2466                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2467                    {
2468                        au1_avail_chroma[0] = 0;
2469                        au1_avail_chroma[4] = 0;
2470                        au1_avail_chroma[6] = 0;
2471                    }
2472
2473                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2474                    {
2475                        au1_avail_chroma[1] = 0;
2476                        au1_avail_chroma[5] = 0;
2477                        au1_avail_chroma[7] = 0;
2478                    }
2479
2480                    if(0 == ps_sao_ctxt->i4_ctb_y)
2481                    {
2482                        au1_avail_chroma[2] = 0;
2483                        au1_avail_chroma[4] = 0;
2484                        au1_avail_chroma[5] = 0;
2485                    }
2486
2487                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
2488                    {
2489                        au1_avail_chroma[3] = 0;
2490                        au1_avail_chroma[6] = 0;
2491                        au1_avail_chroma[7] = 0;
2492                    }
2493
2494                    {
2495                        au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2496                        au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2497                        au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2498                        au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2499                        //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2500                        //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2501                        if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2502                        {
2503                            au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2504                            au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2505                        }
2506
2507
2508                        if(chroma_yuv420sp_vu)
2509                        {
2510                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2511                                                                                 src_strd,
2512                                                                                 pu1_src_left_chroma,
2513                                                                                 pu1_src_top_chroma,
2514                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2515                                                                                 au1_src_top_right,
2516                                                                                 au1_src_bot_left,
2517                                                                                 au1_avail_chroma,
2518                                                                                 ai1_offset_cr,
2519                                                                                 ai1_offset_cb,
2520                                                                                 sao_wd_chroma,
2521                                                                                 sao_ht_chroma);
2522                        }
2523                        else
2524                        {
2525                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2526                                                                                 src_strd,
2527                                                                                 pu1_src_left_chroma,
2528                                                                                 pu1_src_top_chroma,
2529                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2530                                                                                 au1_src_top_right,
2531                                                                                 au1_src_bot_left,
2532                                                                                 au1_avail_chroma,
2533                                                                                 ai1_offset_cb,
2534                                                                                 ai1_offset_cr,
2535                                                                                 sao_wd_chroma,
2536                                                                                 sao_ht_chroma);
2537                        }
2538                    }
2539
2540                }
2541            }
2542
2543        }
2544        pu1_src_luma += sao_wd_luma;
2545        pu1_src_chroma += sao_wd_chroma;
2546        ps_sao += 1;
2547    }
2548
2549
2550    /* Current CTB */
2551    {
2552        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2553        WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2554        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2555        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2556        WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2557        WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2558        WORD32 au4_idx_c[8], idx_c;
2559
2560        WORD32 remaining_rows;
2561        WORD32 remaining_cols;
2562
2563        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2564        if(remaining_cols <= SAO_SHIFT_CTB)
2565        {
2566            sao_wd_luma += remaining_cols;
2567        }
2568        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2569        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2570        {
2571            sao_wd_chroma += remaining_cols;
2572        }
2573
2574        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2575        if(remaining_rows <= SAO_SHIFT_CTB)
2576        {
2577            sao_ht_luma += remaining_rows;
2578        }
2579        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2580        if(remaining_rows <= SAO_SHIFT_CTB)
2581        {
2582            sao_ht_chroma += remaining_rows;
2583        }
2584
2585        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2586        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2587        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2588        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2589
2590        if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2591        {
2592            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2593            {
2594                if(0 == ps_sao->b3_y_type_idx)
2595                {
2596                    /* Update left, top and top-left */
2597                    for(row = 0; row < sao_ht_luma; row++)
2598                    {
2599                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2600                    }
2601                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2602
2603                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2604
2605                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2606
2607                }
2608
2609                else if(1 == ps_sao->b3_y_type_idx)
2610                {
2611                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2612                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2613                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2614                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2615
2616                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2617                                                                              src_strd,
2618                                                                              pu1_src_left_luma,
2619                                                                              pu1_src_top_luma,
2620                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2621                                                                              ps_sao->b5_y_band_pos,
2622                                                                              ai1_offset_y,
2623                                                                              sao_wd_luma,
2624                                                                              sao_ht_luma
2625                                                                             );
2626                }
2627
2628                else // if(2 <= ps_sao->b3_y_type_idx)
2629                {
2630                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2631                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2632                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2633                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2634
2635                    for(i = 0; i < 8; i++)
2636                    {
2637                        au1_avail_luma[i] = 255;
2638                        au1_tile_slice_boundary[i] = 0;
2639                        au4_idx_c[i] = 0;
2640                        au4_ilf_across_tile_slice_enable[i] = 1;
2641                    }
2642                    /******************************************************************
2643                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2644                     *
2645                     *
2646                     *          ____________
2647                     *         |    |       |
2648                     *         |    | C_T   |
2649                     *         |____|_______|____
2650                     *         |    |       |    |
2651                     *         | C_L|   C   | C_R|
2652                     *         |____|_______|    |
2653                     *              |  C_D       |
2654                     *              |            |
2655                     *              |____________|
2656                     *
2657                     *****************************************************************/
2658
2659                    /*In case of slices*/
2660                    {
2661                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2662                        {
2663                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2664                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2665
2666                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2667                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2668
2669                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2670                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2671
2672                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2673                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2674
2675                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
2676                            ctby_c = ps_sao_ctxt->i4_ctb_y;
2677
2678                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2679                            {
2680                                if(0 == ps_sao_ctxt->i4_ctb_x)
2681                                {
2682                                    au4_idx_c[6] = -1;
2683                                    au4_idx_c[0] = -1;
2684                                    au4_idx_c[4] = -1;
2685                                }
2686                                else
2687                                {
2688                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2689                                }
2690
2691                                if(0 == ps_sao_ctxt->i4_ctb_y)
2692                                {
2693                                    au4_idx_c[2] = -1;
2694                                    au4_idx_c[5] = -1;
2695                                    au4_idx_c[4] = -1;
2696                                }
2697                                else
2698                                {
2699                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2700                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2701                                }
2702                                idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2703                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2704                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2705
2706                                if(0 == ps_sao_ctxt->i4_ctb_x)
2707                                {
2708                                    au4_ilf_across_tile_slice_enable[6] = 0;
2709                                    au4_ilf_across_tile_slice_enable[0] = 0;
2710                                    au4_ilf_across_tile_slice_enable[4] = 0;
2711                                }
2712                                else
2713                                {
2714                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2715                                    au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2716                                }
2717                                if(0 == ps_sao_ctxt->i4_ctb_y)
2718                                {
2719                                    au4_ilf_across_tile_slice_enable[2] = 0;
2720                                    au4_ilf_across_tile_slice_enable[4] = 0;
2721                                    au4_ilf_across_tile_slice_enable[5] = 0;
2722                                }
2723                                else
2724                                {
2725                                    au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2726                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2727                                }
2728                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2729                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2730                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2731
2732                                /*
2733                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2734                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2735                                 */
2736                                for(i = 0; i < 8; i++)
2737                                {
2738                                    /*Sets the edges that lie on the slice/tile boundary*/
2739                                    if(au4_idx_c[i] != idx_c)
2740                                    {
2741                                        au1_tile_slice_boundary[i] = 1;
2742                                    }
2743                                    else
2744                                    {
2745                                        au4_ilf_across_tile_slice_enable[i] = 1;
2746                                    }
2747                                }
2748                                /*Reset indices*/
2749                                for(i = 0; i < 8; i++)
2750                                {
2751                                    au4_idx_c[i] = 0;
2752                                }
2753                            }
2754
2755                            if(ps_pps->i1_tiles_enabled_flag)
2756                            {
2757                                /* Calculate availability flags at slice boundary */
2758                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2759                                {
2760                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2761                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2762                                    {
2763                                        if(0 == ps_sao_ctxt->i4_ctb_x)
2764                                        {
2765                                            au4_idx_c[6] = -1;
2766                                            au4_idx_c[0] = -1;
2767                                            au4_idx_c[4] = -1;
2768                                        }
2769                                        else
2770                                        {
2771                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2772                                        }
2773
2774                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2775                                        {
2776                                            au4_idx_c[2] = -1;
2777                                            au4_idx_c[5] = -1;
2778                                            au4_idx_c[4] = -1;
2779                                        }
2780                                        else
2781                                        {
2782                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2783                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2784                                        }
2785                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2786                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2787                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2788
2789                                        for(i = 0; i < 8; i++)
2790                                        {
2791                                            /*Sets the edges that lie on the slice/tile boundary*/
2792                                            if(au4_idx_c[i] != idx_c)
2793                                            {
2794                                                au1_tile_slice_boundary[i] |= 1;
2795                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2796                                            }
2797                                        }
2798                                    }
2799                                }
2800                            }
2801
2802                            for(i = 0; i < 8; i++)
2803                            {
2804                                /*Sets the edges that lie on the slice/tile boundary*/
2805                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2806                                {
2807                                    au1_avail_luma[i] = 0;
2808                                }
2809                            }
2810
2811                        }
2812                    }
2813                    if(0 == ps_sao_ctxt->i4_ctb_x)
2814                    {
2815                        au1_avail_luma[0] = 0;
2816                        au1_avail_luma[4] = 0;
2817                        au1_avail_luma[6] = 0;
2818                    }
2819
2820                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2821                    {
2822                        au1_avail_luma[1] = 0;
2823                        au1_avail_luma[5] = 0;
2824                        au1_avail_luma[7] = 0;
2825                    }
2826
2827                    if(0 == ps_sao_ctxt->i4_ctb_y)
2828                    {
2829                        au1_avail_luma[2] = 0;
2830                        au1_avail_luma[4] = 0;
2831                        au1_avail_luma[5] = 0;
2832                    }
2833
2834                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2835                    {
2836                        au1_avail_luma[3] = 0;
2837                        au1_avail_luma[6] = 0;
2838                        au1_avail_luma[7] = 0;
2839                    }
2840
2841                    {
2842                        au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2843                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2844
2845                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2846                                                                          src_strd,
2847                                                                          pu1_src_left_luma,
2848                                                                          pu1_src_top_luma,
2849                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2850                                                                          au1_src_top_right,
2851                                                                          &u1_sao_src_top_left_luma_bot_left,
2852                                                                          au1_avail_luma,
2853                                                                          ai1_offset_y,
2854                                                                          sao_wd_luma,
2855                                                                          sao_ht_luma);
2856                    }
2857                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2858                    pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2859                }
2860            }
2861        }
2862
2863        if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
2864        {
2865            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
2866            {
2867                if(0 == ps_sao->b3_cb_type_idx)
2868                {
2869                    for(row = 0; row < sao_ht_chroma; row++)
2870                    {
2871                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2872                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2873                    }
2874                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2875                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2876
2877                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2878
2879                    pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
2880                    pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
2881                }
2882
2883                else if(1 == ps_sao->b3_cb_type_idx)
2884                {
2885                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2886                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2887                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2888                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2889
2890                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2891                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2892                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2893                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2894
2895                    if(chroma_yuv420sp_vu)
2896                    {
2897                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2898                                                                                    src_strd,
2899                                                                                    pu1_src_left_chroma,
2900                                                                                    pu1_src_top_chroma,
2901                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2902                                                                                    ps_sao->b5_cr_band_pos,
2903                                                                                    ps_sao->b5_cb_band_pos,
2904                                                                                    ai1_offset_cr,
2905                                                                                    ai1_offset_cb,
2906                                                                                    sao_wd_chroma,
2907                                                                                    sao_ht_chroma
2908                                                                                   );
2909                    }
2910                    else
2911                    {
2912                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2913                                                                                    src_strd,
2914                                                                                    pu1_src_left_chroma,
2915                                                                                    pu1_src_top_chroma,
2916                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2917                                                                                    ps_sao->b5_cb_band_pos,
2918                                                                                    ps_sao->b5_cr_band_pos,
2919                                                                                    ai1_offset_cb,
2920                                                                                    ai1_offset_cr,
2921                                                                                    sao_wd_chroma,
2922                                                                                    sao_ht_chroma
2923                                                                                   );
2924                    }
2925                }
2926
2927                else // if(2 <= ps_sao->b3_cb_type_idx)
2928                {
2929                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2930                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2931                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2932                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2933
2934                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2935                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2936                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2937                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2938
2939                    for(i = 0; i < 8; i++)
2940                    {
2941                        au1_avail_chroma[i] = 255;
2942                        au1_tile_slice_boundary[i] = 0;
2943                        au4_idx_c[i] = 0;
2944                        au4_ilf_across_tile_slice_enable[i] = 1;
2945                    }
2946                    {
2947                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2948                        {
2949                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2950                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2951
2952                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2953                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2954
2955                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2956                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2957
2958                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2959                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2960
2961                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
2962                            ctby_c = ps_sao_ctxt->i4_ctb_y;
2963
2964                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2965                            {
2966                                if(0 == ps_sao_ctxt->i4_ctb_x)
2967                                {
2968                                    au4_idx_c[0] = -1;
2969                                    au4_idx_c[4] = -1;
2970                                    au4_idx_c[6] = -1;
2971                                }
2972                                else
2973                                {
2974                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2975                                }
2976
2977                                if(0 == ps_sao_ctxt->i4_ctb_y)
2978                                {
2979                                    au4_idx_c[2] = -1;
2980                                    au4_idx_c[4] = -1;
2981                                    au4_idx_c[5] = -1;
2982                                }
2983                                else
2984                                {
2985                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2986                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2987                                }
2988                                idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2989                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2990                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2991
2992                                if(0 == ps_sao_ctxt->i4_ctb_x)
2993                                {
2994                                    au4_ilf_across_tile_slice_enable[0] = 0;
2995                                    au4_ilf_across_tile_slice_enable[4] = 0;
2996                                    au4_ilf_across_tile_slice_enable[6] = 0;
2997                                }
2998                                else
2999                                {
3000                                    au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3001                                    au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3002                                }
3003
3004                                if(0 == ps_sao_ctxt->i4_ctb_y)
3005                                {
3006                                    au4_ilf_across_tile_slice_enable[2] = 0;
3007                                    au4_ilf_across_tile_slice_enable[4] = 0;
3008                                    au4_ilf_across_tile_slice_enable[5] = 0;
3009                                }
3010                                else
3011                                {
3012                                    au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3013                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3014                                }
3015
3016                                au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3017                                au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3018                                au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3019
3020                                /*
3021                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3022                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
3023                                 */
3024                                for(i = 0; i < 8; i++)
3025                                {
3026                                    /*Sets the edges that lie on the slice/tile boundary*/
3027                                    if(au4_idx_c[i] != idx_c)
3028                                    {
3029                                        au1_tile_slice_boundary[i] = 1;
3030                                    }
3031                                    else
3032                                    {
3033                                        au4_ilf_across_tile_slice_enable[i] = 1;
3034                                    }
3035                                }
3036                                /*Reset indices*/
3037                                for(i = 0; i < 8; i++)
3038                                {
3039                                    au4_idx_c[i] = 0;
3040                                }
3041                            }
3042
3043                            if(ps_pps->i1_tiles_enabled_flag)
3044                            {
3045                                /* Calculate availability flags at slice boundary */
3046                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3047                                {
3048                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3049                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3050                                    {
3051                                        if(0 == ps_sao_ctxt->i4_ctb_x)
3052                                        {
3053                                            au4_idx_c[6] = -1;
3054                                            au4_idx_c[0] = -1;
3055                                            au4_idx_c[4] = -1;
3056                                        }
3057                                        else
3058                                        {
3059                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3060                                        }
3061
3062                                        if(0 == ps_sao_ctxt->i4_ctb_y)
3063                                        {
3064                                            au4_idx_c[2] = -1;
3065                                            au4_idx_c[5] = -1;
3066                                            au4_idx_c[4] = -1;
3067                                        }
3068                                        else
3069                                        {
3070                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3071                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3072                                        }
3073                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3074                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3075                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3076
3077                                        for(i = 0; i < 8; i++)
3078                                        {
3079                                            /*Sets the edges that lie on the slice/tile boundary*/
3080                                            if(au4_idx_c[i] != idx_c)
3081                                            {
3082                                                au1_tile_slice_boundary[i] |= 1;
3083                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3084                                            }
3085                                        }
3086                                    }
3087                                }
3088                            }
3089
3090                            for(i = 0; i < 8; i++)
3091                            {
3092                                /*Sets the edges that lie on the slice/tile boundary*/
3093                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3094                                {
3095                                    au1_avail_chroma[i] = 0;
3096                                }
3097                            }
3098                        }
3099                    }
3100
3101                    if(0 == ps_sao_ctxt->i4_ctb_x)
3102                    {
3103                        au1_avail_chroma[0] = 0;
3104                        au1_avail_chroma[4] = 0;
3105                        au1_avail_chroma[6] = 0;
3106                    }
3107
3108                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3109                    {
3110                        au1_avail_chroma[1] = 0;
3111                        au1_avail_chroma[5] = 0;
3112                        au1_avail_chroma[7] = 0;
3113                    }
3114
3115                    if(0 == ps_sao_ctxt->i4_ctb_y)
3116                    {
3117                        au1_avail_chroma[2] = 0;
3118                        au1_avail_chroma[4] = 0;
3119                        au1_avail_chroma[5] = 0;
3120                    }
3121
3122                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
3123                    {
3124                        au1_avail_chroma[3] = 0;
3125                        au1_avail_chroma[6] = 0;
3126                        au1_avail_chroma[7] = 0;
3127                    }
3128
3129                    {
3130                        au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3131                        au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3132
3133                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3134                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3135
3136                        if(chroma_yuv420sp_vu)
3137                        {
3138                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3139                                                                                 src_strd,
3140                                                                                 pu1_src_left_chroma,
3141                                                                                 pu1_src_top_chroma,
3142                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3143                                                                                 au1_src_top_right,
3144                                                                                 au1_sao_src_top_left_chroma_bot_left,
3145                                                                                 au1_avail_chroma,
3146                                                                                 ai1_offset_cr,
3147                                                                                 ai1_offset_cb,
3148                                                                                 sao_wd_chroma,
3149                                                                                 sao_ht_chroma);
3150                        }
3151                        else
3152                        {
3153                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3154                                                                                 src_strd,
3155                                                                                 pu1_src_left_chroma,
3156                                                                                 pu1_src_top_chroma,
3157                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3158                                                                                 au1_src_top_right,
3159                                                                                 au1_sao_src_top_left_chroma_bot_left,
3160                                                                                 au1_avail_chroma,
3161                                                                                 ai1_offset_cb,
3162                                                                                 ai1_offset_cr,
3163                                                                                 sao_wd_chroma,
3164                                                                                 sao_ht_chroma);
3165                        }
3166                    }
3167
3168                }
3169                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3170                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3171
3172                pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3173                pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3174            }
3175
3176        }
3177    }
3178
3179
3180
3181
3182/* If no loop filter is enabled copy the backed up values */
3183    {
3184        /* Luma */
3185        if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && no_loop_filter_enabled_luma)
3186        {
3187            UWORD32 u4_no_loop_filter_flag;
3188            WORD32 loop_filter_bit_pos;
3189            WORD32 log2_min_cu = 3;
3190            WORD32 min_cu = (1 << log2_min_cu);
3191            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3192            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3193            WORD32 sao_blk_wd = ctb_size;
3194            WORD32 remaining_rows;
3195            WORD32 remaining_cols;
3196
3197            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3198            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3199            if(remaining_rows <= SAO_SHIFT_CTB)
3200                sao_blk_ht += remaining_rows;
3201            if(remaining_cols <= SAO_SHIFT_CTB)
3202                sao_blk_wd += remaining_cols;
3203
3204            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3205            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3206
3207            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3208
3209            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3210                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3211            if(ps_sao_ctxt->i4_ctb_x > 0)
3212                loop_filter_bit_pos -= 1;
3213
3214            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3215                            (loop_filter_bit_pos >> 3);
3216
3217            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3218                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3219            {
3220                WORD32 tmp_wd = sao_blk_wd;
3221
3222                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3223                                (loop_filter_bit_pos & 7);
3224                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3225
3226                if(u4_no_loop_filter_flag)
3227                {
3228                    while(tmp_wd > 0)
3229                    {
3230                        if(CTZ(u4_no_loop_filter_flag))
3231                        {
3232                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3233                            pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3234                            tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
3235                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3236                        }
3237                        else
3238                        {
3239                            for(row = 0; row < min_cu; row++)
3240                            {
3241                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3242                                {
3243                                    pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3244                                }
3245                            }
3246                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3247                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3248                            tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
3249                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3250                        }
3251                    }
3252
3253                    pu1_src_tmp_luma -= sao_blk_wd;
3254                    pu1_src_backup_luma -= sao_blk_wd;
3255                }
3256
3257                pu1_src_tmp_luma += (src_strd << log2_min_cu);
3258                pu1_src_backup_luma += (backup_strd << log2_min_cu);
3259            }
3260        }
3261
3262        /* Chroma */
3263        if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && no_loop_filter_enabled_chroma)
3264        {
3265            UWORD32 u4_no_loop_filter_flag;
3266            WORD32 loop_filter_bit_pos;
3267            WORD32 log2_min_cu = 3;
3268            WORD32 min_cu = (1 << log2_min_cu);
3269            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3270            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3271            WORD32 sao_blk_wd = ctb_size;
3272            WORD32 remaining_rows;
3273            WORD32 remaining_cols;
3274
3275            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3276            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3277            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3278                sao_blk_ht += remaining_rows;
3279            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3280                sao_blk_wd += remaining_cols;
3281
3282            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3283            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3284
3285            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3286
3287            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3288                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3289            if(ps_sao_ctxt->i4_ctb_x > 0)
3290                loop_filter_bit_pos -= 2;
3291
3292            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3293                            (loop_filter_bit_pos >> 3);
3294
3295            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3296                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3297            {
3298                WORD32 tmp_wd = sao_blk_wd;
3299
3300                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3301                                (loop_filter_bit_pos & 7);
3302                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3303
3304                if(u4_no_loop_filter_flag)
3305                {
3306                    while(tmp_wd > 0)
3307                    {
3308                        if(CTZ(u4_no_loop_filter_flag))
3309                        {
3310                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3311                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3312                            tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
3313                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3314                        }
3315                        else
3316                        {
3317                            for(row = 0; row < min_cu / 2; row++)
3318                            {
3319                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3320                                {
3321                                    pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3322                                }
3323                            }
3324
3325                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3326                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3327                            tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
3328                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3329                        }
3330                    }
3331
3332                    pu1_src_tmp_chroma -= sao_blk_wd;
3333                    pu1_src_backup_chroma -= sao_blk_wd;
3334                }
3335
3336                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3337                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3338            }
3339        }
3340    }
3341
3342}
3343
3344