1/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19*******************************************************************************
20* @file
21*  ihevc_sao.c
22*
23* @brief
24*  Contains leaf level function definitions for sample adaptive offset process
25*
26* @author
27*  Srinivas T
28*
29* @par List of Functions:
30*   - ihevc_sao_band_offset_luma()
31*   - ihevc_sao_band_offset_chroma()
32*   - ihevc_sao_edge_offset_class0()
33*   - ihevc_sao_edge_offset_class0_chroma()
34*   - ihevc_sao_edge_offset_class1()
35*   - ihevc_sao_edge_offset_class1_chroma()
36*   - ihevc_sao_edge_offset_class2()
37*   - ihevc_sao_edge_offset_class2_chroma()
38*   - ihevc_sao_edge_offset_class3()
39*   - ihevc_sao_edge_offset_class3_chroma()
40* @remarks
41*  None
42*
43*******************************************************************************
44*/
45#include <stdlib.h>
46#include <assert.h>
47#include <string.h>
48#include "ihevc_typedefs.h"
49#include "ihevc_macros.h"
50#include "ihevc_platform_macros.h"
51#include "ihevc_func_selector.h"
52#include "ihevc_defs.h"
53#include "ihevc_structs.h"
54#include "ihevc_sao.h"
55
56#define NUM_BAND_TABLE  32
57
58const WORD32 gi4_ihevc_table_edge_idx[5] = { 1, 2, 0, 3, 4 };
59/**
60 * au4_avail is an array of flags - one for each neighboring block specifying if the block is available
61 * au4_avail[0] - left
62 * au4_avail[1] - right
63 * au4_avail[2] - top
64 * au4_avail[3] - bottom
65 * au4_avail[4] - top-left
66 * au4_avail[5] - top-right
67 * au4_avail[6] - bottom-left
68 * au4_avail[7] - bottom-right
69 */
70
71
72void ihevc_sao_band_offset_luma(UWORD8 *pu1_src,
73                                WORD32 src_strd,
74                                UWORD8 *pu1_src_left,
75                                UWORD8 *pu1_src_top,
76                                UWORD8 *pu1_src_top_left,
77                                WORD32 sao_band_pos,
78                                WORD8 *pi1_sao_offset,
79                                WORD32 wd,
80                                WORD32 ht)
81{
82    WORD32 band_shift;
83    WORD32 band_table[NUM_BAND_TABLE];
84    WORD32 i;
85    WORD32 row, col;
86
87    /* Updating left and top and top-left */
88    for(row = 0; row < ht; row++)
89    {
90        pu1_src_left[row] = pu1_src[row * src_strd + (wd - 1)];
91    }
92    pu1_src_top_left[0] = pu1_src_top[wd - 1];
93    for(col = 0; col < wd; col++)
94    {
95        pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
96    }
97
98    band_shift = BIT_DEPTH_LUMA - 5;
99    for(i = 0; i < NUM_BAND_TABLE; i++)
100    {
101        band_table[i] = 0;
102    }
103    for(i = 0; i < 4; i++)
104    {
105        band_table[(i + sao_band_pos) & 31] = i + 1;
106    }
107
108    for(row = 0; row < ht; row++)
109    {
110        for(col = 0; col < wd; col++)
111        {
112            WORD32 band_idx;
113
114            band_idx = band_table[pu1_src[col] >> band_shift];
115            pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
116        }
117        pu1_src += src_strd;
118    }
119}
120
121
122
123/* input 'wd' has to be for the interleaved block and not for each color component */
124void ihevc_sao_band_offset_chroma(UWORD8 *pu1_src,
125                                  WORD32 src_strd,
126                                  UWORD8 *pu1_src_left,
127                                  UWORD8 *pu1_src_top,
128                                  UWORD8 *pu1_src_top_left,
129                                  WORD32 sao_band_pos_u,
130                                  WORD32 sao_band_pos_v,
131                                  WORD8 *pi1_sao_offset_u,
132                                  WORD8 *pi1_sao_offset_v,
133                                  WORD32 wd,
134                                  WORD32 ht)
135{
136    WORD32 band_shift;
137    WORD32 band_table_u[NUM_BAND_TABLE];
138    WORD32 band_table_v[NUM_BAND_TABLE];
139    WORD32 i;
140    WORD32 row, col;
141
142    /* Updating left and top and top-left */
143    for(row = 0; row < ht; row++)
144    {
145        pu1_src_left[2 * row] = pu1_src[row * src_strd + (wd - 2)];
146        pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + (wd - 1)];
147    }
148    pu1_src_top_left[0] = pu1_src_top[wd - 2];
149    pu1_src_top_left[1] = pu1_src_top[wd - 1];
150    for(col = 0; col < wd; col++)
151    {
152        pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
153    }
154
155
156    band_shift = BIT_DEPTH_CHROMA - 5;
157    for(i = 0; i < NUM_BAND_TABLE; i++)
158    {
159        band_table_u[i] = 0;
160        band_table_v[i] = 0;
161    }
162    for(i = 0; i < 4; i++)
163    {
164        band_table_u[(i + sao_band_pos_u) & 31] = i + 1;
165        band_table_v[(i + sao_band_pos_v) & 31] = i + 1;
166    }
167
168    for(row = 0; row < ht; row++)
169    {
170        for(col = 0; col < wd; col++)
171        {
172            WORD32 band_idx;
173            WORD8 *pi1_sao_offset;
174
175            pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
176            band_idx = (0 == col % 2) ? band_table_u[pu1_src[col] >> band_shift] : band_table_v[pu1_src[col] >> band_shift];
177            pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
178        }
179        pu1_src += src_strd;
180    }
181}
182
183
184
185/* Horizontal filtering */
186void ihevc_sao_edge_offset_class0(UWORD8 *pu1_src,
187                                  WORD32 src_strd,
188                                  UWORD8 *pu1_src_left,
189                                  UWORD8 *pu1_src_top,
190                                  UWORD8 *pu1_src_top_left,
191                                  UWORD8 *pu1_src_top_right,
192                                  UWORD8 *pu1_src_bot_left,
193                                  UWORD8 *pu1_avail,
194                                  WORD8 *pi1_sao_offset,
195                                  WORD32 wd,
196                                  WORD32 ht)
197{
198    WORD32 row, col;
199    UWORD8 au1_mask[MAX_CTB_SIZE];
200    UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
201    WORD8 u1_sign_left, u1_sign_right;
202    WORD32 bit_depth;
203    UNUSED(pu1_src_top_right);
204    UNUSED(pu1_src_bot_left);
205    bit_depth = BIT_DEPTH_LUMA;
206
207    /* Initialize the mask values */
208    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
209
210    /* Update top and top-left arrays */
211    *pu1_src_top_left = pu1_src_top[wd - 1];
212    for(row = 0; row < ht; row++)
213    {
214        au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
215    }
216    for(col = 0; col < wd; col++)
217    {
218        pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
219    }
220
221    /* Update masks based on the availability flags */
222    if(0 == pu1_avail[0])
223    {
224        au1_mask[0] = 0;
225    }
226    if(0 == pu1_avail[1])
227    {
228        au1_mask[wd - 1] = 0;
229    }
230
231    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
232    {
233        for(row = 0; row < ht; row++)
234        {
235            u1_sign_left = SIGN(pu1_src[0] - pu1_src_left[row]);
236            for(col = 0; col < wd; col++)
237            {
238                WORD32 edge_idx;
239
240                u1_sign_right = SIGN(pu1_src[col] - pu1_src[col + 1]);
241                edge_idx = 2 + u1_sign_left + u1_sign_right;
242                u1_sign_left = -u1_sign_right;
243
244                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
245
246                if(0 != edge_idx)
247                {
248                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
249                }
250            }
251
252            pu1_src += src_strd;
253        }
254    }
255
256    /* Update left array */
257    for(row = 0; row < ht; row++)
258    {
259        pu1_src_left[row] = au1_src_left_tmp[row];
260    }
261
262}
263
264
265
266
267/* input 'wd' has to be for the interleaved block and not for each color component */
268void ihevc_sao_edge_offset_class0_chroma(UWORD8 *pu1_src,
269                                         WORD32 src_strd,
270                                         UWORD8 *pu1_src_left,
271                                         UWORD8 *pu1_src_top,
272                                         UWORD8 *pu1_src_top_left,
273                                         UWORD8 *pu1_src_top_right,
274                                         UWORD8 *pu1_src_bot_left,
275                                         UWORD8 *pu1_avail,
276                                         WORD8 *pi1_sao_offset_u,
277                                         WORD8 *pi1_sao_offset_v,
278                                         WORD32 wd,
279                                         WORD32 ht)
280{
281    WORD32 row, col;
282    UWORD8 au1_mask[MAX_CTB_SIZE];
283    UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE];
284    WORD8 u1_sign_left_u, u1_sign_right_u;
285    WORD8 u1_sign_left_v, u1_sign_right_v;
286    WORD32 bit_depth;
287    UNUSED(pu1_src_top_right);
288    UNUSED(pu1_src_bot_left);
289    bit_depth = BIT_DEPTH_CHROMA;
290
291    /* Initialize the mask values */
292    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
293
294    /* Update left, top and top-left arrays */
295    pu1_src_top_left[0] = pu1_src_top[wd - 2];
296    pu1_src_top_left[1] = pu1_src_top[wd - 1];
297    for(row = 0; row < ht; row++)
298    {
299        au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
300        au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
301    }
302    for(col = 0; col < wd; col++)
303    {
304        pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
305    }
306
307    /* Update masks based on the availability flags */
308    if(0 == pu1_avail[0])
309    {
310        au1_mask[0] = 0;
311    }
312    if(0 == pu1_avail[1])
313    {
314        au1_mask[(wd - 1) >> 1] = 0;
315    }
316
317    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
318    {
319        for(row = 0; row < ht; row++)
320        {
321            u1_sign_left_u = SIGN(pu1_src[0] - pu1_src_left[2 * row]);
322            u1_sign_left_v = SIGN(pu1_src[1] - pu1_src_left[2 * row + 1]);
323            for(col = 0; col < wd; col++)
324            {
325                WORD32 edge_idx;
326                WORD8 *pi1_sao_offset;
327
328                if(0 == col % 2)
329                {
330                    pi1_sao_offset = pi1_sao_offset_u;
331                    u1_sign_right_u = SIGN(pu1_src[col] - pu1_src[col + 2]);
332                    edge_idx = 2 + u1_sign_left_u + u1_sign_right_u;
333                    u1_sign_left_u = -u1_sign_right_u;
334                }
335                else
336                {
337                    pi1_sao_offset = pi1_sao_offset_v;
338                    u1_sign_right_v = SIGN(pu1_src[col] - pu1_src[col + 2]);
339                    edge_idx = 2 + u1_sign_left_v + u1_sign_right_v;
340                    u1_sign_left_v = -u1_sign_right_v;
341                }
342
343                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
344
345                if(0 != edge_idx)
346                {
347                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
348                }
349            }
350
351            pu1_src += src_strd;
352        }
353    }
354
355    for(row = 0; row < 2 * ht; row++)
356    {
357        pu1_src_left[row] = au1_src_left_tmp[row];
358    }
359
360}
361
362
363
364/* Vertical filtering */
365void ihevc_sao_edge_offset_class1(UWORD8 *pu1_src,
366                                  WORD32 src_strd,
367                                  UWORD8 *pu1_src_left,
368                                  UWORD8 *pu1_src_top,
369                                  UWORD8 *pu1_src_top_left,
370                                  UWORD8 *pu1_src_top_right,
371                                  UWORD8 *pu1_src_bot_left,
372                                  UWORD8 *pu1_avail,
373                                  WORD8 *pi1_sao_offset,
374                                  WORD32 wd,
375                                  WORD32 ht)
376{
377    WORD32 row, col;
378    UWORD8 au1_mask[MAX_CTB_SIZE];
379    UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
380    WORD8 au1_sign_up[MAX_CTB_SIZE];
381    WORD8 u1_sign_down;
382    WORD32 bit_depth;
383    UNUSED(pu1_src_top_right);
384    UNUSED(pu1_src_bot_left);
385
386    bit_depth = BIT_DEPTH_LUMA;
387
388    /* Initialize the mask values */
389    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
390
391    /* Update left, top and top-left arrays */
392    *pu1_src_top_left = pu1_src_top[wd - 1];
393    for(row = 0; row < ht; row++)
394    {
395        pu1_src_left[row] = pu1_src[row * src_strd + wd - 1];
396    }
397    for(col = 0; col < wd; col++)
398    {
399        au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
400    }
401
402    /* Update height and source pointers based on the availability flags */
403    if(0 == pu1_avail[2])
404    {
405        pu1_src += src_strd;
406        ht--;
407        for(col = 0; col < wd; col++)
408        {
409            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
410        }
411    }
412    else
413    {
414        for(col = 0; col < wd; col++)
415        {
416            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
417        }
418    }
419    if(0 == pu1_avail[3])
420    {
421        ht--;
422    }
423
424    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
425    {
426        for(row = 0; row < ht; row++)
427        {
428            for(col = 0; col < wd; col++)
429            {
430                WORD32 edge_idx;
431
432                u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
433                edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
434                au1_sign_up[col] = -u1_sign_down;
435
436                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
437
438                if(0 != edge_idx)
439                {
440                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
441                }
442            }
443
444            pu1_src += src_strd;
445        }
446    }
447
448    for(col = 0; col < wd; col++)
449    {
450        pu1_src_top[col] = au1_src_top_tmp[col];
451    }
452
453}
454
455
456
457/* input 'wd' has to be for the interleaved block and not for each color component */
458void ihevc_sao_edge_offset_class1_chroma(UWORD8 *pu1_src,
459                                         WORD32 src_strd,
460                                         UWORD8 *pu1_src_left,
461                                         UWORD8 *pu1_src_top,
462                                         UWORD8 *pu1_src_top_left,
463                                         UWORD8 *pu1_src_top_right,
464                                         UWORD8 *pu1_src_bot_left,
465                                         UWORD8 *pu1_avail,
466                                         WORD8 *pi1_sao_offset_u,
467                                         WORD8 *pi1_sao_offset_v,
468                                         WORD32 wd,
469                                         WORD32 ht)
470{
471    WORD32 row, col;
472    UWORD8 au1_mask[MAX_CTB_SIZE];
473    UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
474    WORD8 au1_sign_up[MAX_CTB_SIZE];
475    WORD8 u1_sign_down;
476    WORD32 bit_depth;
477    UNUSED(pu1_src_top_right);
478    UNUSED(pu1_src_bot_left);
479
480    bit_depth = BIT_DEPTH_CHROMA;
481
482    /* Initialize the mask values */
483    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
484
485    /* Update left, top and top-left arrays */
486    pu1_src_top_left[0] = pu1_src_top[wd - 2];
487    pu1_src_top_left[1] = pu1_src_top[wd - 1];
488    for(row = 0; row < ht; row++)
489    {
490        pu1_src_left[2 * row] = pu1_src[row * src_strd + wd - 2];
491        pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
492    }
493    for(col = 0; col < wd; col++)
494    {
495        au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
496    }
497
498    /* Update height and source pointers based on the availability flags */
499    if(0 == pu1_avail[2])
500    {
501        pu1_src += src_strd;
502        ht--;
503        for(col = 0; col < wd; col++)
504        {
505            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
506        }
507    }
508    else
509    {
510        for(col = 0; col < wd; col++)
511        {
512            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
513        }
514    }
515    if(0 == pu1_avail[3])
516    {
517        ht--;
518    }
519
520    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
521    {
522        for(row = 0; row < ht; row++)
523        {
524            for(col = 0; col < wd; col++)
525            {
526                WORD32 edge_idx;
527                WORD8 *pi1_sao_offset;
528
529                pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
530
531                u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
532                edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
533                au1_sign_up[col] = -u1_sign_down;
534
535                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
536
537                if(0 != edge_idx)
538                {
539                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
540                }
541            }
542
543            pu1_src += src_strd;
544        }
545    }
546
547    for(col = 0; col < wd; col++)
548    {
549        pu1_src_top[col] = au1_src_top_tmp[col];
550    }
551
552}
553
554
555
556/* 135 degree filtering */
557void ihevc_sao_edge_offset_class2(UWORD8 *pu1_src,
558                                  WORD32 src_strd,
559                                  UWORD8 *pu1_src_left,
560                                  UWORD8 *pu1_src_top,
561                                  UWORD8 *pu1_src_top_left,
562                                  UWORD8 *pu1_src_top_right,
563                                  UWORD8 *pu1_src_bot_left,
564                                  UWORD8 *pu1_avail,
565                                  WORD8 *pi1_sao_offset,
566                                  WORD32 wd,
567                                  WORD32 ht)
568{
569    WORD32 row, col;
570    UWORD8 au1_mask[MAX_CTB_SIZE];
571    UWORD8 au1_src_left_tmp[MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
572    UWORD8 u1_src_top_left_tmp;
573    WORD8 au1_sign_up[MAX_CTB_SIZE + 1], au1_sign_up_tmp[MAX_CTB_SIZE + 1];
574    WORD8 u1_sign_down;
575    WORD8 *pu1_sign_up;
576    WORD8 *pu1_sign_up_tmp;
577    UWORD8 *pu1_src_left_cpy;
578
579    WORD32 bit_depth;
580    UWORD8 u1_pos_0_0_tmp;
581    UWORD8 u1_pos_wd_ht_tmp;
582    UNUSED(pu1_src_top_right);
583    UNUSED(pu1_src_bot_left);
584
585    bit_depth = BIT_DEPTH_LUMA;
586    pu1_sign_up = au1_sign_up;
587    pu1_sign_up_tmp = au1_sign_up_tmp;
588    pu1_src_left_cpy = pu1_src_left;
589
590    /* Initialize the mask values */
591    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
592
593    /* Update left, top and top-left arrays */
594    u1_src_top_left_tmp = pu1_src_top[wd - 1];
595    for(row = 0; row < ht; row++)
596    {
597        au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
598    }
599    for(col = 0; col < wd; col++)
600    {
601        au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
602    }
603
604
605    /* If top-left is available, process separately */
606    if(0 != pu1_avail[4])
607    {
608        WORD32 edge_idx;
609
610        edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
611                        SIGN(pu1_src[0] - pu1_src[1 + src_strd]);
612
613        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
614
615        if(0 != edge_idx)
616        {
617            u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
618        }
619        else
620        {
621            u1_pos_0_0_tmp = pu1_src[0];
622        }
623    }
624    else
625    {
626        u1_pos_0_0_tmp = pu1_src[0];
627    }
628
629    /* If bottom-right is available, process separately */
630    if(0 != pu1_avail[7])
631    {
632        WORD32 edge_idx;
633
634        edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 1 - src_strd]) +
635                        SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 1 + src_strd]);
636
637        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
638
639        if(0 != edge_idx)
640        {
641            u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
642        }
643        else
644        {
645            u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
646        }
647    }
648    else
649    {
650        u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
651    }
652
653    /* If Left is not available */
654    if(0 == pu1_avail[0])
655    {
656        au1_mask[0] = 0;
657    }
658
659    /* If Top is not available */
660    if(0 == pu1_avail[2])
661    {
662        pu1_src += src_strd;
663        ht--;
664        pu1_src_left_cpy += 1;
665        for(col = 1; col < wd; col++)
666        {
667            pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 1 - src_strd]);
668        }
669    }
670    else
671    {
672        for(col = 1; col < wd; col++)
673        {
674            pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 1]);
675        }
676    }
677
678    /* If Right is not available */
679    if(0 == pu1_avail[1])
680    {
681        au1_mask[wd - 1] = 0;
682    }
683
684    /* If Bottom is not available */
685    if(0 == pu1_avail[3])
686    {
687        ht--;
688    }
689
690    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
691    {
692        for(row = 0; row < ht; row++)
693        {
694            pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[row - 1]);
695            for(col = 0; col < wd; col++)
696            {
697                WORD32 edge_idx;
698
699                u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 1 + src_strd]);
700                edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
701                pu1_sign_up_tmp[col + 1] = -u1_sign_down;
702
703                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
704
705                if(0 != edge_idx)
706                {
707                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
708                }
709            }
710
711            /* Swapping pu1_sign_up_tmp and pu1_sign_up */
712            {
713                WORD8 *pu1_swap_tmp = pu1_sign_up;
714                pu1_sign_up = pu1_sign_up_tmp;
715                pu1_sign_up_tmp = pu1_swap_tmp;
716            }
717
718            pu1_src += src_strd;
719        }
720
721        pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp;
722        pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp;
723    }
724
725    if(0 == pu1_avail[2])
726        ht++;
727    if(0 == pu1_avail[3])
728        ht++;
729    *pu1_src_top_left = u1_src_top_left_tmp;
730    for(row = 0; row < ht; row++)
731    {
732        pu1_src_left[row] = au1_src_left_tmp[row];
733    }
734    for(col = 0; col < wd; col++)
735    {
736        pu1_src_top[col] = au1_src_top_tmp[col];
737    }
738
739}
740
741
742
743
744/* 135 degree filtering */
745void ihevc_sao_edge_offset_class2_chroma(UWORD8 *pu1_src,
746                                         WORD32 src_strd,
747                                         UWORD8 *pu1_src_left,
748                                         UWORD8 *pu1_src_top,
749                                         UWORD8 *pu1_src_top_left,
750                                         UWORD8 *pu1_src_top_right,
751                                         UWORD8 *pu1_src_bot_left,
752                                         UWORD8 *pu1_avail,
753                                         WORD8 *pi1_sao_offset_u,
754                                         WORD8 *pi1_sao_offset_v,
755                                         WORD32 wd,
756                                         WORD32 ht)
757{
758    WORD32 row, col;
759    UWORD8 au1_mask[MAX_CTB_SIZE];
760    UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
761    UWORD8 au1_src_top_left_tmp[2];
762    WORD8 au1_sign_up[MAX_CTB_SIZE + 2], au1_sign_up_tmp[MAX_CTB_SIZE + 2];
763    WORD8 u1_sign_down;
764    WORD8 *pu1_sign_up;
765    WORD8 *pu1_sign_up_tmp;
766    UWORD8 *pu1_src_left_cpy;
767
768    WORD32 bit_depth;
769
770    UWORD8 u1_pos_0_0_tmp_u;
771    UWORD8 u1_pos_0_0_tmp_v;
772    UWORD8 u1_pos_wd_ht_tmp_u;
773    UWORD8 u1_pos_wd_ht_tmp_v;
774    UNUSED(pu1_src_top_right);
775    UNUSED(pu1_src_bot_left);
776
777
778    bit_depth = BIT_DEPTH_CHROMA;
779    pu1_sign_up = au1_sign_up;
780    pu1_sign_up_tmp = au1_sign_up_tmp;
781    pu1_src_left_cpy = pu1_src_left;
782
783    /* Initialize the mask values */
784    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
785
786    /* Update left, top and top-left arrays */
787    au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
788    au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
789    for(row = 0; row < ht; row++)
790    {
791        au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
792        au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
793    }
794    for(col = 0; col < wd; col++)
795    {
796        au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
797    }
798
799
800    /* If top-left is available, process separately */
801    if(0 != pu1_avail[4])
802    {
803        WORD32 edge_idx;
804
805        /* U */
806        edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
807                        SIGN(pu1_src[0] - pu1_src[2 + src_strd]);
808
809        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
810
811        if(0 != edge_idx)
812        {
813            u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
814        }
815        else
816        {
817            u1_pos_0_0_tmp_u = pu1_src[0];
818        }
819
820        /* V */
821        edge_idx = 2 + SIGN(pu1_src[1] - pu1_src_top_left[1]) +
822                        SIGN(pu1_src[1] - pu1_src[1 + 2 + src_strd]);
823
824        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
825
826        if(0 != edge_idx)
827        {
828            u1_pos_0_0_tmp_v = CLIP3(pu1_src[1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
829        }
830        else
831        {
832            u1_pos_0_0_tmp_v = pu1_src[1];
833        }
834    }
835    else
836    {
837        u1_pos_0_0_tmp_u = pu1_src[0];
838        u1_pos_0_0_tmp_v = pu1_src[1];
839    }
840
841    /* If bottom-right is available, process separately */
842    if(0 != pu1_avail[7])
843    {
844        WORD32 edge_idx;
845
846        /* U */
847        edge_idx = 2 + SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd - 2 - src_strd]) +
848                        SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd + 2 + src_strd]);
849
850        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
851
852        if(0 != edge_idx)
853        {
854            u1_pos_wd_ht_tmp_u = CLIP3(pu1_src[wd - 2 + (ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
855        }
856        else
857        {
858            u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
859        }
860
861        /* V */
862        edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 2 - src_strd]) +
863                        SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 2 + src_strd]);
864
865        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
866
867        if(0 != edge_idx)
868        {
869            u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
870        }
871        else
872        {
873            u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
874        }
875    }
876    else
877    {
878        u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
879        u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
880    }
881
882    /* If Left is not available */
883    if(0 == pu1_avail[0])
884    {
885        au1_mask[0] = 0;
886    }
887
888    /* If Top is not available */
889    if(0 == pu1_avail[2])
890    {
891        pu1_src += src_strd;
892        pu1_src_left_cpy += 2;
893        ht--;
894        for(col = 2; col < wd; col++)
895        {
896            pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 2 - src_strd]);
897        }
898    }
899    else
900    {
901        for(col = 2; col < wd; col++)
902        {
903            pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 2]);
904        }
905    }
906
907    /* If Right is not available */
908    if(0 == pu1_avail[1])
909    {
910        au1_mask[(wd - 1) >> 1] = 0;
911    }
912
913    /* If Bottom is not available */
914    if(0 == pu1_avail[3])
915    {
916        ht--;
917    }
918
919    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
920    {
921        for(row = 0; row < ht; row++)
922        {
923            pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[2 * (row - 1)]);
924            pu1_sign_up[1] = SIGN(pu1_src[1] - pu1_src_left_cpy[2 * (row - 1) + 1]);
925            for(col = 0; col < wd; col++)
926            {
927                WORD32 edge_idx;
928                WORD8 *pi1_sao_offset;
929
930                pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
931
932                u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 2 + src_strd]);
933                edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
934                pu1_sign_up_tmp[col + 2] = -u1_sign_down;
935
936                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
937
938                if(0 != edge_idx)
939                {
940                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
941                }
942            }
943
944            /* Swapping pu1_sign_up_tmp and pu1_sign_up */
945            {
946                WORD8 *pu1_swap_tmp = pu1_sign_up;
947                pu1_sign_up = pu1_sign_up_tmp;
948                pu1_sign_up_tmp = pu1_swap_tmp;
949            }
950
951            pu1_src += src_strd;
952        }
953
954        pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp_u;
955        pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + 1] = u1_pos_0_0_tmp_v;
956        pu1_src[(pu1_avail[3] ? wd - 2 - src_strd : wd - 2)] = u1_pos_wd_ht_tmp_u;
957        pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp_v;
958    }
959
960    if(0 == pu1_avail[2])
961        ht++;
962    if(0 == pu1_avail[3])
963        ht++;
964    pu1_src_top_left[0] = au1_src_top_left_tmp[0];
965    pu1_src_top_left[1] = au1_src_top_left_tmp[1];
966    for(row = 0; row < 2 * ht; row++)
967    {
968        pu1_src_left[row] = au1_src_left_tmp[row];
969    }
970    for(col = 0; col < wd; col++)
971    {
972        pu1_src_top[col] = au1_src_top_tmp[col];
973    }
974
975}
976
977
978
979
980/* 45 degree filtering */
981void ihevc_sao_edge_offset_class3(UWORD8 *pu1_src,
982                                  WORD32 src_strd,
983                                  UWORD8 *pu1_src_left,
984                                  UWORD8 *pu1_src_top,
985                                  UWORD8 *pu1_src_top_left,
986                                  UWORD8 *pu1_src_top_right,
987                                  UWORD8 *pu1_src_bot_left,
988                                  UWORD8 *pu1_avail,
989                                  WORD8 *pi1_sao_offset,
990                                  WORD32 wd,
991                                  WORD32 ht)
992{
993    WORD32 row, col;
994    UWORD8 au1_mask[MAX_CTB_SIZE];
995    UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
996    UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
997    UWORD8 u1_src_top_left_tmp;
998    WORD8 au1_sign_up[MAX_CTB_SIZE];
999    UWORD8 *pu1_src_left_cpy;
1000    WORD8 u1_sign_down;
1001    WORD32 bit_depth;
1002
1003    UWORD8 u1_pos_0_ht_tmp;
1004    UWORD8 u1_pos_wd_0_tmp;
1005
1006    bit_depth = BIT_DEPTH_LUMA;
1007    pu1_src_left_cpy = pu1_src_left;
1008
1009    /* Initialize the mask values */
1010    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
1011
1012    /* Update left, top and top-left arrays */
1013    u1_src_top_left_tmp = pu1_src_top[wd - 1];
1014    for(row = 0; row < ht; row++)
1015    {
1016        au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
1017    }
1018    for(col = 0; col < wd; col++)
1019    {
1020        au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
1021    }
1022
1023    /* If top-right is available, process separately */
1024    if(0 != pu1_avail[5])
1025    {
1026        WORD32 edge_idx;
1027
1028        edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[0]) +
1029                        SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 1 + src_strd]);
1030
1031        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1032
1033        if(0 != edge_idx)
1034        {
1035            u1_pos_wd_0_tmp = CLIP3(pu1_src[wd - 1] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1036        }
1037        else
1038        {
1039            u1_pos_wd_0_tmp = pu1_src[wd - 1];
1040        }
1041    }
1042    else
1043    {
1044        u1_pos_wd_0_tmp = pu1_src[wd - 1];
1045    }
1046
1047    /* If bottom-left is available, process separately */
1048    if(0 != pu1_avail[6])
1049    {
1050        WORD32 edge_idx;
1051
1052        edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 1 - src_strd]) +
1053                        SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
1054
1055        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1056
1057        if(0 != edge_idx)
1058        {
1059            u1_pos_0_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1060        }
1061        else
1062        {
1063            u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
1064        }
1065    }
1066    else
1067    {
1068        u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
1069    }
1070
1071    /* If Left is not available */
1072    if(0 == pu1_avail[0])
1073    {
1074        au1_mask[0] = 0;
1075    }
1076
1077    /* If Top is not available */
1078    if(0 == pu1_avail[2])
1079    {
1080        pu1_src += src_strd;
1081        ht--;
1082        pu1_src_left_cpy += 1;
1083        for(col = 0; col < wd - 1; col++)
1084        {
1085            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 1 - src_strd]);
1086        }
1087    }
1088    else
1089    {
1090        for(col = 0; col < wd - 1; col++)
1091        {
1092            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 1]);
1093        }
1094    }
1095
1096    /* If Right is not available */
1097    if(0 == pu1_avail[1])
1098    {
1099        au1_mask[wd - 1] = 0;
1100    }
1101
1102    /* If Bottom is not available */
1103    if(0 == pu1_avail[3])
1104    {
1105        ht--;
1106    }
1107
1108    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
1109    {
1110        for(row = 0; row < ht; row++)
1111        {
1112            au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 1 - src_strd]);
1113            for(col = 0; col < wd; col++)
1114            {
1115                WORD32 edge_idx;
1116
1117                u1_sign_down = SIGN(pu1_src[col] - ((col == 0) ? pu1_src_left_cpy[row + 1] :
1118                                                                 pu1_src[col - 1 + src_strd]));
1119                edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
1120                if(col > 0)
1121                    au1_sign_up[col - 1] = -u1_sign_down;
1122
1123                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
1124
1125                if(0 != edge_idx)
1126                {
1127                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1128                }
1129            }
1130
1131            pu1_src += src_strd;
1132        }
1133
1134        pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp;
1135        pu1_src[(pu1_avail[3] ?  (-src_strd) : 0)] = u1_pos_0_ht_tmp;
1136    }
1137
1138    if(0 == pu1_avail[2])
1139        ht++;
1140    if(0 == pu1_avail[3])
1141        ht++;
1142    *pu1_src_top_left = u1_src_top_left_tmp;
1143    for(row = 0; row < ht; row++)
1144    {
1145        pu1_src_left[row] = au1_src_left_tmp[row];
1146    }
1147    for(col = 0; col < wd; col++)
1148    {
1149        pu1_src_top[col] = au1_src_top_tmp[col];
1150    }
1151
1152}
1153
1154
1155
1156
1157void ihevc_sao_edge_offset_class3_chroma(UWORD8 *pu1_src,
1158                                         WORD32 src_strd,
1159                                         UWORD8 *pu1_src_left,
1160                                         UWORD8 *pu1_src_top,
1161                                         UWORD8 *pu1_src_top_left,
1162                                         UWORD8 *pu1_src_top_right,
1163                                         UWORD8 *pu1_src_bot_left,
1164                                         UWORD8 *pu1_avail,
1165                                         WORD8 *pi1_sao_offset_u,
1166                                         WORD8 *pi1_sao_offset_v,
1167                                         WORD32 wd,
1168                                         WORD32 ht)
1169{
1170    WORD32 row, col;
1171    UWORD8 au1_mask[MAX_CTB_SIZE];
1172    UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
1173    UWORD8 au1_src_top_left_tmp[2];
1174    WORD8 au1_sign_up[MAX_CTB_SIZE];
1175    UWORD8 *pu1_src_left_cpy;
1176    WORD8 u1_sign_down;
1177    WORD32 bit_depth;
1178
1179    UWORD8 u1_pos_wd_0_tmp_u;
1180    UWORD8 u1_pos_wd_0_tmp_v;
1181    UWORD8 u1_pos_0_ht_tmp_u;
1182    UWORD8 u1_pos_0_ht_tmp_v;
1183
1184    bit_depth = BIT_DEPTH_CHROMA;
1185    pu1_src_left_cpy = pu1_src_left;
1186
1187    /* Initialize the mask values */
1188    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
1189
1190    /* Update left, top and top-left arrays */
1191    au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
1192    au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
1193    for(row = 0; row < ht; row++)
1194    {
1195        au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
1196        au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
1197    }
1198    for(col = 0; col < wd; col++)
1199    {
1200        au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
1201    }
1202
1203
1204    /* If top-right is available, process separately */
1205    if(0 != pu1_avail[5])
1206    {
1207        WORD32 edge_idx;
1208
1209        /* U */
1210        edge_idx = 2 + SIGN(pu1_src[wd - 2] - pu1_src_top_right[0]) +
1211                        SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 - 2 + src_strd]);
1212
1213        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1214
1215        if(0 != edge_idx)
1216        {
1217            u1_pos_wd_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
1218        }
1219        else
1220        {
1221            u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
1222        }
1223
1224        /* V */
1225        edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[1]) +
1226                        SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 2 + src_strd]);
1227
1228        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1229
1230        if(0 != edge_idx)
1231        {
1232            u1_pos_wd_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
1233        }
1234        else
1235        {
1236            u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
1237        }
1238    }
1239    else
1240    {
1241        u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
1242        u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
1243    }
1244
1245    /* If bottom-left is available, process separately */
1246    if(0 != pu1_avail[6])
1247    {
1248        WORD32 edge_idx;
1249
1250        /* U */
1251        edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 2 - src_strd]) +
1252                        SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
1253
1254        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1255
1256        if(0 != edge_idx)
1257        {
1258            u1_pos_0_ht_tmp_u = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
1259        }
1260        else
1261        {
1262            u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
1263        }
1264
1265        /* V */
1266        edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src[(ht - 1) * src_strd + 1 + 2 - src_strd]) +
1267                        SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src_bot_left[1]);
1268
1269        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1270
1271        if(0 != edge_idx)
1272        {
1273            u1_pos_0_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd + 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
1274        }
1275        else
1276        {
1277            u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
1278        }
1279    }
1280    else
1281    {
1282        u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
1283        u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
1284    }
1285
1286    /* If Left is not available */
1287    if(0 == pu1_avail[0])
1288    {
1289        au1_mask[0] = 0;
1290    }
1291
1292    /* If Top is not available */
1293    if(0 == pu1_avail[2])
1294    {
1295        pu1_src += src_strd;
1296        ht--;
1297        pu1_src_left_cpy += 2;
1298        for(col = 0; col < wd - 2; col++)
1299        {
1300            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 2 - src_strd]);
1301        }
1302    }
1303    else
1304    {
1305        for(col = 0; col < wd - 2; col++)
1306        {
1307            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 2]);
1308        }
1309    }
1310
1311    /* If Right is not available */
1312    if(0 == pu1_avail[1])
1313    {
1314        au1_mask[(wd - 1) >> 1] = 0;
1315    }
1316
1317    /* If Bottom is not available */
1318    if(0 == pu1_avail[3])
1319    {
1320        ht--;
1321    }
1322
1323    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
1324    {
1325        for(row = 0; row < ht; row++)
1326        {
1327            au1_sign_up[wd - 2] = SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 + 2 - src_strd]);
1328            au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 2 - src_strd]);
1329            for(col = 0; col < wd; col++)
1330            {
1331                WORD32 edge_idx;
1332                WORD8 *pi1_sao_offset;
1333
1334                pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
1335
1336                u1_sign_down = SIGN(pu1_src[col] - ((col < 2) ? pu1_src_left_cpy[2 * (row + 1) + col] :
1337                                                                pu1_src[col - 2 + src_strd]));
1338                edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
1339                if(col > 1)
1340                    au1_sign_up[col - 2] = -u1_sign_down;
1341
1342                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
1343
1344                if(0 != edge_idx)
1345                {
1346                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1347                }
1348            }
1349
1350            pu1_src += src_strd;
1351        }
1352
1353        pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 2] = u1_pos_wd_0_tmp_u;
1354        pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp_v;
1355        pu1_src[(pu1_avail[3] ?  (-src_strd) : 0)] = u1_pos_0_ht_tmp_u;
1356        pu1_src[(pu1_avail[3] ?  (-src_strd) : 0) + 1] = u1_pos_0_ht_tmp_v;
1357    }
1358
1359    if(0 == pu1_avail[2])
1360        ht++;
1361    if(0 == pu1_avail[3])
1362        ht++;
1363    pu1_src_top_left[0] = au1_src_top_left_tmp[0];
1364    pu1_src_top_left[1] = au1_src_top_left_tmp[1];
1365    for(row = 0; row < 2 * ht; row++)
1366    {
1367        pu1_src_left[row] = au1_src_left_tmp[row];
1368    }
1369    for(col = 0; col < wd; col++)
1370    {
1371        pu1_src_top[col] = au1_src_top_tmp[col];
1372    }
1373
1374}
1375