1/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19*******************************************************************************
20* @file
21*  ihevc_deblk_edge_filter.c
22*
23* @brief
24*  Contains function definitions for deblocking filters
25*
26* @author
27*  Srinivas T
28*
29* @par List of Functions:
30*   - ihevc_deblk_luma_vert()
31*   - ihevc_deblk_luma_horz()
32*   - ihevc_deblk_chroma_vert()
33*   - ihevc_deblk_chroma_horz()
34*   - ihevc_hbd_deblk_luma_vert()
35*   - ihevc_hbd_deblk_luma_horz()
36*   - ihevc_hbd_deblk_chroma_vert()
37*   - ihevc_hbd_deblk_chroma_horz()
38* @remarks
39*  None
40*
41*******************************************************************************
42*/
43#include <stdlib.h>
44#include <stdio.h>
45#include <assert.h>
46#include "ihevc_typedefs.h"
47#include "ihevc_macros.h"
48#include "ihevc_platform_macros.h"
49#include "ihevc_func_selector.h"
50#include "ihevc_deblk.h"
51#include "ihevc_deblk_tables.h"
52#include "ihevc_debug.h"
53
54
55/**
56*******************************************************************************
57*
58* @brief
59*       Decision process and filtering for the luma block vertical edge.
60*
61* @par Description:
62*     The decision process for the luma block vertical edge is  carried out and
63*     an appropriate filter is applied. The  boundary filter strength, bs should
64*     be greater than 0.  The pcm flags and the transquant bypass flags should
65*     be  taken care of by the calling function.
66*
67* @param[in] pu1_src
68*  Pointer to the src sample q(0,0)
69*
70* @param[in] src_strd
71*  Source stride
72*
73* @param[in] bs
74*  Boundary filter strength of q(0,0)
75*
76* @param[in] quant_param_p
77*  quantization parameter of p block
78*
79* @param[in] quant_param_q
80*  quantization parameter of p block
81*
82* @param[in] beta_offset_div2
83*
84*
85* @param[in] tc_offset_div2
86*
87*
88* @param[in] filter_flag_p
89*  flag whether to filter the p block
90*
91* @param[in] filter_flag_q
92*  flag whether to filter the q block
93*
94* @returns
95*
96* @remarks
97*  None
98*
99*******************************************************************************
100*/
101
102void ihevc_deblk_luma_vert(UWORD8 *pu1_src,
103                           WORD32 src_strd,
104                           WORD32 bs,
105                           WORD32 quant_param_p,
106                           WORD32 quant_param_q,
107                           WORD32 beta_offset_div2,
108                           WORD32 tc_offset_div2,
109                           WORD32 filter_flag_p,
110                           WORD32 filter_flag_q)
111{
112    WORD32 qp_luma, beta_indx, tc_indx;
113    WORD32 beta, tc;
114    WORD32 dp0, dp3, dq0, dq3, d0, d3, dp, dq, d;
115    WORD32 d_sam0, d_sam3;
116    WORD32 de, dep, deq;
117    WORD32 row;
118    WORD32 tmp_p0, tmp_p1, tmp_p2, tmp_q0, tmp_q1, tmp_q2;
119    WORD32 delta, delta_p, delta_q;
120
121    ASSERT((bs > 0) && (bs <= 3));
122    ASSERT(filter_flag_p || filter_flag_q);
123
124    qp_luma = (quant_param_p + quant_param_q + 1) >> 1;
125    beta_indx = CLIP3(qp_luma + (beta_offset_div2 << 1), 0, 51);
126
127    /* BS based on implementation can take value 3 if it is intra/inter egde          */
128    /* based on BS, tc index is calcuated by adding 2 * ( bs - 1) to QP and tc_offset */
129    /* for BS = 1 adding factor is (0*2), BS = 2 or 3 adding factor is (1*2)          */
130    /* the above desired functionallity is achieved by doing (2*(bs>>1))              */
131
132    tc_indx = CLIP3(qp_luma + (2 * (bs >> 1)) + (tc_offset_div2 << 1), 0, 53);
133
134    beta = gai4_ihevc_beta_table[beta_indx];
135    tc = gai4_ihevc_tc_table[tc_indx];
136    if(0 == tc)
137    {
138        return;
139    }
140
141    dq0 = ABS(pu1_src[2] - 2 * pu1_src[1] + pu1_src[0]);
142    dq3 = ABS(pu1_src[3 * src_strd + 2] - 2 * pu1_src[3 * src_strd + 1]
143                    + pu1_src[3 * src_strd + 0]);
144    dp0 = ABS(pu1_src[-3] - 2 * pu1_src[-2] + pu1_src[-1]);
145    dp3 = ABS(pu1_src[3 * src_strd - 3] - 2 * pu1_src[3 * src_strd - 2]
146                    + pu1_src[3 * src_strd - 1]);
147
148    d0 = dp0 + dq0;
149    d3 = dp3 + dq3;
150
151    dp = dp0 + dp3;
152    dq = dq0 + dq3;
153
154    d = d0 + d3;
155
156    de = 0;
157    dep = 0;
158    deq = 0;
159
160    if(d < beta)
161    {
162        d_sam0 = 0;
163        if((2 * d0 < (beta >> 2))
164                        && (ABS(pu1_src[3] - pu1_src[0]) + ABS(pu1_src[-1] - pu1_src[-4])
165                                        < (beta >> 3))
166                        && ABS(pu1_src[0] - pu1_src[-1]) < ((5 * tc + 1) >> 1))
167        {
168            d_sam0 = 1;
169        }
170
171        pu1_src += 3 * src_strd;
172        d_sam3 = 0;
173        if((2 * d3 < (beta >> 2))
174                        && (ABS(pu1_src[3] - pu1_src[0]) + ABS(pu1_src[-1] - pu1_src[-4])
175                                        < (beta >> 3))
176                        && ABS(pu1_src[0] - pu1_src[-1]) < ((5 * tc + 1) >> 1))
177        {
178            d_sam3 = 1;
179        }
180        pu1_src -= 3 * src_strd;
181
182        de = (d_sam0 == 1 && d_sam3 == 1) ? 2 : 1;
183        dep = (dp < (beta + (beta >> 1)) >> 3) ? 1 : 0;
184        deq = (dq < (beta + (beta >> 1)) >> 3) ? 1 : 0;
185        if(tc <= 1)
186        {
187            dep = 0;
188            deq = 0;
189        }
190    }
191
192    if(de != 0)
193    {
194        for(row = 0; row < 4; row++)
195        {
196            tmp_p0 = pu1_src[-1];
197            tmp_p1 = pu1_src[-2];
198            tmp_p2 = pu1_src[-3];
199
200            tmp_q0 = pu1_src[0];
201            tmp_q1 = pu1_src[1];
202            tmp_q2 = pu1_src[2];
203
204            if(de == 2)
205            {
206                tmp_q0 = CLIP3((pu1_src[2] + 2 * pu1_src[1] +
207                                2 * pu1_src[0] + 2 * pu1_src[-1] +
208                                pu1_src[-2] + 4) >> 3,
209                                pu1_src[0] - 2 * tc,
210                                pu1_src[0] + 2 * tc);
211
212                tmp_q1 = CLIP3((pu1_src[2] + pu1_src[1] + pu1_src[0] +
213                                pu1_src[-1] + 2) >> 2,
214                                pu1_src[1] - 2 * tc,
215                                pu1_src[1] + 2 * tc);
216
217                tmp_q2 = CLIP3((2 * pu1_src[3] + 3 * pu1_src[2] +
218                                pu1_src[1] + pu1_src[0] +
219                                pu1_src[-1] + 4) >> 3,
220                                pu1_src[2] - 2 * tc,
221                                pu1_src[2] + 2 * tc);
222
223                tmp_p0 = CLIP3((pu1_src[1] + 2 * pu1_src[0] +
224                                2 * pu1_src[-1] + 2 * pu1_src[-2] +
225                                pu1_src[-3] + 4) >> 3,
226                                pu1_src[-1] - 2 * tc,
227                                pu1_src[-1] + 2 * tc);
228
229                tmp_p1 = CLIP3((pu1_src[0] + pu1_src[-1] +
230                                pu1_src[-2] + pu1_src[-3] + 2) >> 2,
231                                pu1_src[-2] - 2 * tc,
232                                pu1_src[-2] + 2 * tc);
233
234                tmp_p2 = CLIP3((pu1_src[0] + pu1_src[-1] +
235                                pu1_src[-2] + 3 * pu1_src[-3] +
236                                2 * pu1_src[-4] + 4) >> 3,
237                                pu1_src[-3] - 2 * tc,
238                                pu1_src[-3] + 2 * tc);
239            }
240            else
241            {
242                delta = (9 * (pu1_src[0] - pu1_src[-1]) -
243                                3 * (pu1_src[1] - pu1_src[-2]) + 8) >> 4;
244                if(ABS(delta) < 10 * tc)
245                {
246                    delta = CLIP3(delta, -tc, tc);
247
248                    tmp_p0 = CLIP_U8(pu1_src[-1] + delta);
249                    tmp_q0 = CLIP_U8(pu1_src[0] - delta);
250
251                    if(dep == 1)
252                    {
253                        delta_p = CLIP3((((pu1_src[-3] + pu1_src[-1] + 1) >> 1)
254                                        - pu1_src[-2] + delta) >> 1,
255                                        -(tc >> 1),
256                                        (tc >> 1));
257                        tmp_p1 = CLIP_U8(pu1_src[-2] + delta_p);
258                    }
259
260                    if(deq == 1)
261                    {
262                        delta_q = CLIP3((((pu1_src[2] + pu1_src[0] + 1) >> 1)
263                                        - pu1_src[1] - delta) >> 1,
264                                        -(tc >> 1),
265                                        (tc >> 1));
266                        tmp_q1 = CLIP_U8(pu1_src[1] + delta_q);
267                    }
268                }
269            }
270
271            if(filter_flag_p != 0)
272            {
273                pu1_src[-3] = tmp_p2;
274                pu1_src[-2] = tmp_p1;
275                pu1_src[-1] = tmp_p0;
276            }
277
278            if(filter_flag_q != 0)
279            {
280                pu1_src[0] = tmp_q0;
281                pu1_src[1] = tmp_q1;
282                pu1_src[2] = tmp_q2;
283            }
284
285            pu1_src += src_strd;
286        }
287    }
288
289}
290
291
292/**
293*******************************************************************************
294*
295* @brief
296*       Decision process and filtering for the luma block vertical edge for high bit depth.
297*
298* @par Description:
299*     The decision process for the luma block vertical edge is  carried out and
300*     an appropriate filter is applied. The  boundary filter strength, bs should
301*     be greater than 0.  The pcm flags and the transquant bypass flags should
302*     be  taken care of by the calling function.
303*
304* @param[in] pu2_src
305*  Pointer to the src sample q(0,0)
306*
307* @param[in] src_strd
308*  Source stride
309*
310* @param[in] bs
311*  Boundary filter strength of q(0,0)
312*
313* @param[in] quant_param_p
314*  quantization parameter of p block
315*
316* @param[in] quant_param_q
317*  quantization parameter of p block
318*
319* @param[in] beta_offset_div2
320*
321*
322* @param[in] tc_offset_div2
323*
324*
325* @param[in] filter_flag_p
326*  flag whether to filter the p block
327*
328* @param[in] filter_flag_q
329*  flag whether to filter the q block
330*
331* @returns
332*
333* @remarks
334*  None
335*
336*******************************************************************************
337*/
338
339void ihevc_hbd_deblk_luma_vert(UWORD16 *pu2_src,
340                               WORD32 src_strd,
341                               WORD32 bs,
342                               WORD32 quant_param_p,
343                               WORD32 quant_param_q,
344                               WORD32 beta_offset_div2,
345                               WORD32 tc_offset_div2,
346                               WORD32 filter_flag_p,
347                               WORD32 filter_flag_q,
348                               UWORD8 bit_depth)
349{
350    WORD32 qp_luma, beta_indx, tc_indx;
351    WORD32 beta, tc;
352    WORD32 dp0, dp3, dq0, dq3, d0, d3, dp, dq, d;
353    WORD32 d_sam0, d_sam3;
354    WORD32 de, dep, deq;
355    WORD32 row;
356    WORD32 tmp_p0, tmp_p1, tmp_p2, tmp_q0, tmp_q1, tmp_q2;
357    WORD32 delta, delta_p, delta_q;
358
359    ASSERT((bs > 0) && (bs <= 3));
360    ASSERT(filter_flag_p || filter_flag_q);
361
362    qp_luma = (quant_param_p + quant_param_q + 1) >> 1;
363    beta_indx = CLIP3(qp_luma + (beta_offset_div2 << 1), 0, 51);
364
365    /* BS based on implementation can take value 3 if it is intra/inter egde          */
366    /* based on BS, tc index is calcuated by adding 2 * ( bs - 1) to QP and tc_offset */
367    /* for BS = 1 adding factor is (0*2), BS = 2 or 3 adding factor is (1*2)          */
368    /* the above desired functionallity is achieved by doing (2*(bs>>1))              */
369
370    tc_indx = CLIP3(qp_luma + (2 * (bs >> 1)) + (tc_offset_div2 << 1), 0, 53);
371
372    beta = gai4_ihevc_beta_table[beta_indx] * (1 << (bit_depth - 8));
373    tc = gai4_ihevc_tc_table[tc_indx] * (1 << (bit_depth - 8));
374    if(0 == tc)
375    {
376        return;
377    }
378
379    dq0 = ABS(pu2_src[2] - 2 * pu2_src[1] + pu2_src[0]);
380    dq3 = ABS(pu2_src[3 * src_strd + 2] - 2 * pu2_src[3 * src_strd + 1]
381                    + pu2_src[3 * src_strd + 0]);
382    dp0 = ABS(pu2_src[-3] - 2 * pu2_src[-2] + pu2_src[-1]);
383    dp3 = ABS(pu2_src[3 * src_strd - 3] - 2 * pu2_src[3 * src_strd - 2]
384                    + pu2_src[3 * src_strd - 1]);
385
386    d0 = dp0 + dq0;
387    d3 = dp3 + dq3;
388
389    dp = dp0 + dp3;
390    dq = dq0 + dq3;
391
392    d = d0 + d3;
393
394    de = 0;
395    dep = 0;
396    deq = 0;
397
398    if(d < beta)
399    {
400        d_sam0 = 0;
401        if((2 * d0 < (beta >> 2))
402                        && (ABS(pu2_src[3] - pu2_src[0]) + ABS(pu2_src[-1] - pu2_src[-4])
403                                        < (beta >> 3))
404                        && ABS(pu2_src[0] - pu2_src[-1]) < ((5 * tc + 1) >> 1))
405        {
406            d_sam0 = 1;
407        }
408
409        pu2_src += 3 * src_strd;
410        d_sam3 = 0;
411        if((2 * d3 < (beta >> 2))
412                        && (ABS(pu2_src[3] - pu2_src[0]) + ABS(pu2_src[-1] - pu2_src[-4])
413                                        < (beta >> 3))
414                        && ABS(pu2_src[0] - pu2_src[-1]) < ((5 * tc + 1) >> 1))
415        {
416            d_sam3 = 1;
417        }
418        pu2_src -= 3 * src_strd;
419
420        de = (d_sam0 == 1 && d_sam3 == 1) ? 2 : 1;
421        dep = (dp < (beta + (beta >> 1)) >> 3) ? 1 : 0;
422        deq = (dq < (beta + (beta >> 1)) >> 3) ? 1 : 0;
423        if(tc <= 1)
424        {
425            dep = 0;
426            deq = 0;
427        }
428    }
429
430    if(de != 0)
431    {
432        for(row = 0; row < 4; row++)
433        {
434            tmp_p0 = pu2_src[-1];
435            tmp_p1 = pu2_src[-2];
436            tmp_p2 = pu2_src[-3];
437
438            tmp_q0 = pu2_src[0];
439            tmp_q1 = pu2_src[1];
440            tmp_q2 = pu2_src[2];
441
442            if(de == 2)
443            {
444                tmp_q0 = CLIP3((pu2_src[2] + 2 * pu2_src[1] +
445                                2 * pu2_src[0] + 2 * pu2_src[-1] +
446                                pu2_src[-2] + 4) >> 3,
447                                pu2_src[0] - 2 * tc,
448                                pu2_src[0] + 2 * tc);
449
450                tmp_q1 = CLIP3((pu2_src[2] + pu2_src[1] + pu2_src[0] +
451                                pu2_src[-1] + 2) >> 2,
452                                pu2_src[1] - 2 * tc,
453                                pu2_src[1] + 2 * tc);
454
455                tmp_q2 = CLIP3((2 * pu2_src[3] + 3 * pu2_src[2] +
456                                pu2_src[1] + pu2_src[0] +
457                                pu2_src[-1] + 4) >> 3,
458                                pu2_src[2] - 2 * tc,
459                                pu2_src[2] + 2 * tc);
460
461                tmp_p0 = CLIP3((pu2_src[1] + 2 * pu2_src[0] +
462                                2 * pu2_src[-1] + 2 * pu2_src[-2] +
463                                pu2_src[-3] + 4) >> 3,
464                                pu2_src[-1] - 2 * tc,
465                                pu2_src[-1] + 2 * tc);
466
467                tmp_p1 = CLIP3((pu2_src[0] + pu2_src[-1] +
468                                pu2_src[-2] + pu2_src[-3] + 2) >> 2,
469                                pu2_src[-2] - 2 * tc,
470                                pu2_src[-2] + 2 * tc);
471
472                tmp_p2 = CLIP3((pu2_src[0] + pu2_src[-1] +
473                                pu2_src[-2] + 3 * pu2_src[-3] +
474                                2 * pu2_src[-4] + 4) >> 3,
475                                pu2_src[-3] - 2 * tc,
476                                pu2_src[-3] + 2 * tc);
477            }
478            else
479            {
480                delta = (9 * (pu2_src[0] - pu2_src[-1]) -
481                                3 * (pu2_src[1] - pu2_src[-2]) + 8) >> 4;
482                if(ABS(delta) < 10 * tc)
483                {
484                    delta = CLIP3(delta, -tc, tc);
485
486                    tmp_p0 = CLIP3(pu2_src[-1] + delta, 0, ((1 << bit_depth) - 1));
487                    tmp_q0 = CLIP3(pu2_src[0] - delta, 0, ((1 << bit_depth) - 1));
488                    if(dep == 1)
489                    {
490                        delta_p = CLIP3((((pu2_src[-3] + pu2_src[-1] + 1) >> 1)
491                                        - pu2_src[-2] + delta) >> 1,
492                                        -(tc >> 1),
493                                        (tc >> 1));
494                        tmp_p1 = CLIP3(pu2_src[-2] + delta_p, 0, ((1 << bit_depth) - 1));
495                    }
496
497                    if(deq == 1)
498                    {
499                        delta_q = CLIP3((((pu2_src[2] + pu2_src[0] + 1) >> 1)
500                                        - pu2_src[1] - delta) >> 1,
501                                        -(tc >> 1),
502                                        (tc >> 1));
503                        tmp_q1 = CLIP3(pu2_src[1] + delta_q, 0, ((1 << bit_depth) - 1));
504                    }
505                }
506            }
507
508            if(filter_flag_p != 0)
509            {
510                pu2_src[-3] = tmp_p2;
511                pu2_src[-2] = tmp_p1;
512                pu2_src[-1] = tmp_p0;
513            }
514
515            if(filter_flag_q != 0)
516            {
517                pu2_src[0] = tmp_q0;
518                pu2_src[1] = tmp_q1;
519                pu2_src[2] = tmp_q2;
520            }
521
522            pu2_src += src_strd;
523        }
524    }
525
526}
527
528
529/**
530*******************************************************************************
531*
532* @brief
533*
534*     Decision process and filtering for the luma block horizontal edge
535*
536* @par Description:
537*     The decision process for the luma block horizontal edge  is carried out
538*    and an appropriate filter is applied. The  boundary filter strength, bs
539*    should be greater than 0.  The pcm flags and the transquant bypass flags
540*    should be  taken care of by the calling function.
541*
542* @param[in] pu1_src
543*  Pointer to the src sample q(0,0)
544*
545* @param[in] src_strd
546*  Source stride
547*
548* @param[in] bs
549*  Boundary filter strength of q(0,0)
550*
551* @param[in] quant_param_p
552*  quantization parameter of p block
553*
554* @param[in] quant_param_q
555*  quantization parameter of p block
556*
557* @param[in] beta_offset_div2
558*
559*
560* @param[in] tc_offset_div2
561*
562*
563* @param[in] filter_flag_p
564*  flag whether to filter the p block
565*
566* @param[in] filter_flag_q
567*  flag whether to filter the q block
568*
569* @returns
570*
571* @remarks
572*  None
573*
574*******************************************************************************
575*/
576
577void ihevc_deblk_luma_horz(UWORD8 *pu1_src,
578                           WORD32 src_strd,
579                           WORD32 bs,
580                           WORD32 quant_param_p,
581                           WORD32 quant_param_q,
582                           WORD32 beta_offset_div2,
583                           WORD32 tc_offset_div2,
584                           WORD32 filter_flag_p,
585                           WORD32 filter_flag_q)
586{
587    WORD32 qp_luma, beta_indx, tc_indx;
588    WORD32 beta, tc;
589    WORD32 dp0, dp3, dq0, dq3, d0, d3, dp, dq, d;
590    WORD32 d_sam0, d_sam3;
591    WORD32 de, dep, deq;
592    WORD32 col;
593    WORD32 tmp_p0, tmp_p1, tmp_p2, tmp_q0, tmp_q1, tmp_q2;
594    WORD32 delta, delta_p, delta_q;
595
596    ASSERT((bs > 0));
597    ASSERT(filter_flag_p || filter_flag_q);
598
599    qp_luma = (quant_param_p + quant_param_q + 1) >> 1;
600    beta_indx = CLIP3(qp_luma + (beta_offset_div2 << 1), 0, 51);
601
602    /* BS based on implementation can take value 3 if it is intra/inter egde          */
603    /* based on BS, tc index is calcuated by adding 2 * ( bs - 1) to QP and tc_offset */
604    /* for BS = 1 adding factor is (0*2), BS = 2 or 3 adding factor is (1*2)          */
605    /* the above desired functionallity is achieved by doing (2*(bs>>1))              */
606
607    tc_indx = CLIP3(qp_luma + 2 * (bs >> 1) + (tc_offset_div2 << 1), 0, 53);
608
609    beta = gai4_ihevc_beta_table[beta_indx];
610    tc = gai4_ihevc_tc_table[tc_indx];
611    if(0 == tc)
612    {
613        return;
614    }
615
616    dq0 = ABS(pu1_src[2 * src_strd] - 2 * pu1_src[1 * src_strd] +
617                    pu1_src[0 * src_strd]);
618
619    dq3 = ABS(pu1_src[3 + 2 * src_strd] - 2 * pu1_src[3 + 1 * src_strd] +
620                    pu1_src[3 + 0 * src_strd]);
621
622    dp0 = ABS(pu1_src[-3 * src_strd] - 2 * pu1_src[-2 * src_strd] +
623                    pu1_src[-1 * src_strd]);
624
625    dp3 = ABS(pu1_src[3 - 3 * src_strd] - 2 * pu1_src[3 - 2 * src_strd] +
626                    pu1_src[3 - 1 * src_strd]);
627
628    d0 = dp0 + dq0;
629    d3 = dp3 + dq3;
630
631    dp = dp0 + dp3;
632    dq = dq0 + dq3;
633
634    d = d0 + d3;
635
636    de = 0;
637    dep = 0;
638    deq = 0;
639
640    if(d < beta)
641    {
642        d_sam0 = 0;
643        if((2 * d0 < (beta >> 2))
644                        && (ABS(pu1_src[3 * src_strd] - pu1_src[0 * src_strd]) +
645                                        ABS(pu1_src[-1 * src_strd] - pu1_src[-4 * src_strd])
646                                        < (beta >> 3))
647                        && ABS(pu1_src[0 * src_strd] - pu1_src[-1 * src_strd])
648                        < ((5 * tc + 1) >> 1))
649        {
650            d_sam0 = 1;
651        }
652
653        pu1_src += 3;
654        d_sam3 = 0;
655        if((2 * d3 < (beta >> 2))
656                        && (ABS(pu1_src[3 * src_strd] - pu1_src[0 * src_strd]) +
657                                        ABS(pu1_src[-1 * src_strd] - pu1_src[-4 * src_strd])
658                                        < (beta >> 3))
659                        && ABS(pu1_src[0 * src_strd] - pu1_src[-1 * src_strd])
660                        < ((5 * tc + 1) >> 1))
661        {
662            d_sam3 = 1;
663        }
664        pu1_src -= 3;
665
666        de = (d_sam0 == 1 && d_sam3 == 1) ? 2 : 1;
667        dep = (dp < ((beta + (beta >> 1)) >> 3)) ? 1 : 0;
668        deq = (dq < ((beta + (beta >> 1)) >> 3)) ? 1 : 0;
669        if(tc <= 1)
670        {
671            dep = 0;
672            deq = 0;
673        }
674    }
675
676    if(de != 0)
677    {
678        for(col = 0; col < 4; col++)
679        {
680            tmp_p0 = pu1_src[-1 * src_strd];
681            tmp_p1 = pu1_src[-2 * src_strd];
682            tmp_p2 = pu1_src[-3 * src_strd];
683
684            tmp_q0 = pu1_src[0 * src_strd];
685            tmp_q1 = pu1_src[1 * src_strd];
686            tmp_q2 = pu1_src[2 * src_strd];
687            if(de == 2)
688            {
689                tmp_q0 = CLIP3((pu1_src[2 * src_strd] +
690                                2 * pu1_src[1 * src_strd] +
691                                2 * pu1_src[0 * src_strd] +
692                                2 * pu1_src[-1 * src_strd] +
693                                pu1_src[-2 * src_strd] + 4) >> 3,
694                                pu1_src[0 * src_strd] - 2 * tc,
695                                pu1_src[0 * src_strd] + 2 * tc);
696
697                tmp_q1 = CLIP3((pu1_src[2 * src_strd] +
698                                pu1_src[1 * src_strd] +
699                                pu1_src[0 * src_strd] +
700                                pu1_src[-1 * src_strd] + 2) >> 2,
701                                pu1_src[1 * src_strd] - 2 * tc,
702                                pu1_src[1 * src_strd] + 2 * tc);
703
704                tmp_q2 = CLIP3((2 * pu1_src[3 * src_strd] +
705                                3 * pu1_src[2 * src_strd] +
706                                pu1_src[1 * src_strd] +
707                                pu1_src[0 * src_strd] +
708                                pu1_src[-1 * src_strd] + 4) >> 3,
709                                pu1_src[2 * src_strd] - 2 * tc,
710                                pu1_src[2 * src_strd] + 2 * tc);
711
712                tmp_p0 = CLIP3((pu1_src[1 * src_strd] +
713                                2 * pu1_src[0 * src_strd] +
714                                2 * pu1_src[-1 * src_strd] +
715                                2 * pu1_src[-2 * src_strd] +
716                                pu1_src[-3 * src_strd] + 4) >> 3,
717                                pu1_src[-1 * src_strd] - 2 * tc,
718                                pu1_src[-1 * src_strd] + 2 * tc);
719
720                tmp_p1 = CLIP3((pu1_src[0 * src_strd] +
721                                pu1_src[-1 * src_strd] +
722                                pu1_src[-2 * src_strd] +
723                                pu1_src[-3 * src_strd] + 2) >> 2,
724                                pu1_src[-2 * src_strd] - 2 * tc,
725                                pu1_src[-2 * src_strd] + 2 * tc);
726
727                tmp_p2 = CLIP3((pu1_src[0 * src_strd] +
728                                pu1_src[-1 * src_strd] +
729                                pu1_src[-2 * src_strd] +
730                                3 * pu1_src[-3 * src_strd] +
731                                2 * pu1_src[-4 * src_strd] + 4) >> 3,
732                                pu1_src[-3 * src_strd] - 2 * tc,
733                                pu1_src[-3 * src_strd] + 2 * tc);
734            }
735            else
736            {
737                delta = (9 * (pu1_src[0 * src_strd] - pu1_src[-1 * src_strd]) -
738                                3 * (pu1_src[1 * src_strd] - pu1_src[-2 * src_strd]) +
739                                8) >> 4;
740                if(ABS(delta) < 10 * tc)
741                {
742                    delta = CLIP3(delta, -tc, tc);
743
744                    tmp_p0 = CLIP_U8(pu1_src[-1 * src_strd] + delta);
745                    tmp_q0 = CLIP_U8(pu1_src[0 * src_strd] - delta);
746
747                    if(dep == 1)
748                    {
749                        delta_p = CLIP3((((pu1_src[-3 * src_strd] +
750                                        pu1_src[-1 * src_strd] + 1) >> 1) -
751                                        pu1_src[-2 * src_strd] + delta) >> 1,
752                                        -(tc >> 1),
753                                        (tc >> 1));
754                        tmp_p1 = CLIP_U8(pu1_src[-2 * src_strd] + delta_p);
755                    }
756
757                    if(deq == 1)
758                    {
759                        delta_q = CLIP3((((pu1_src[2 * src_strd] +
760                                        pu1_src[0 * src_strd] + 1) >> 1) -
761                                        pu1_src[1 * src_strd] - delta) >> 1,
762                                        -(tc >> 1),
763                                        (tc >> 1));
764                        tmp_q1 = CLIP_U8(pu1_src[1 * src_strd] + delta_q);
765                    }
766                }
767            }
768
769            if(filter_flag_p != 0)
770            {
771                pu1_src[-3 * src_strd] = tmp_p2;
772                pu1_src[-2 * src_strd] = tmp_p1;
773                pu1_src[-1 * src_strd] = tmp_p0;
774            }
775
776            if(filter_flag_q != 0)
777            {
778                pu1_src[0 * src_strd] = tmp_q0;
779                pu1_src[1 * src_strd] = tmp_q1;
780                pu1_src[2 * src_strd] = tmp_q2;
781            }
782
783            pu1_src += 1;
784        }
785    }
786
787}
788
789
790/**
791*******************************************************************************
792*
793* @brief
794*
795*     Decision process and filtering for the luma block horizontal edge for high bit depth
796*
797* @par Description:
798*     The decision process for the luma block horizontal edge  is carried out
799*    and an appropriate filter is applied. The  boundary filter strength, bs
800*    should be greater than 0.  The pcm flags and the transquant bypass flags
801*    should be  taken care of by the calling function.
802*
803* @param[in] pu1_src
804*  Pointer to the src sample q(0,0)
805*
806* @param[in] src_strd
807*  Source stride
808*
809* @param[in] bs
810*  Boundary filter strength of q(0,0)
811*
812* @param[in] quant_param_p
813*  quantization parameter of p block
814*
815* @param[in] quant_param_q
816*  quantization parameter of p block
817*
818* @param[in] beta_offset_div2
819*
820*
821* @param[in] tc_offset_div2
822*
823*
824* @param[in] filter_flag_p
825*  flag whether to filter the p block
826*
827* @param[in] filter_flag_q
828*  flag whether to filter the q block
829*
830* @returns
831*
832* @remarks
833*  None
834*
835*******************************************************************************
836*/
837
838void ihevc_hbd_deblk_luma_horz(UWORD16 *pu2_src,
839                               WORD32 src_strd,
840                               WORD32 bs,
841                               WORD32 quant_param_p,
842                               WORD32 quant_param_q,
843                               WORD32 beta_offset_div2,
844                               WORD32 tc_offset_div2,
845                               WORD32 filter_flag_p,
846                               WORD32 filter_flag_q,
847                               UWORD8 bit_depth)
848{
849    WORD32 qp_luma, beta_indx, tc_indx;
850    WORD32 beta, tc;
851    WORD32 dp0, dp3, dq0, dq3, d0, d3, dp, dq, d;
852    WORD32 d_sam0, d_sam3;
853    WORD32 de, dep, deq;
854    WORD32 col;
855    WORD32 tmp_p0, tmp_p1, tmp_p2, tmp_q0, tmp_q1, tmp_q2;
856    WORD32 delta, delta_p, delta_q;
857
858    ASSERT((bs > 0));
859    ASSERT(filter_flag_p || filter_flag_q);
860
861    qp_luma = (quant_param_p + quant_param_q + 1) >> 1;
862    beta_indx = CLIP3(qp_luma + (beta_offset_div2 << 1), 0, 51);
863
864    /* BS based on implementation can take value 3 if it is intra/inter egde          */
865    /* based on BS, tc index is calcuated by adding 2 * ( bs - 1) to QP and tc_offset */
866    /* for BS = 1 adding factor is (0*2), BS = 2 or 3 adding factor is (1*2)          */
867    /* the above desired functionallity is achieved by doing (2*(bs>>1))              */
868
869    tc_indx = CLIP3(qp_luma + 2 * (bs >> 1) + (tc_offset_div2 << 1), 0, 53);
870
871    beta = gai4_ihevc_beta_table[beta_indx] * (1 << (bit_depth - 8));
872    tc = gai4_ihevc_tc_table[tc_indx] * (1 << (bit_depth - 8));
873    if(0 == tc)
874    {
875        return;
876    }
877
878    dq0 = ABS(pu2_src[2 * src_strd] - 2 * pu2_src[1 * src_strd] +
879                    pu2_src[0 * src_strd]);
880
881    dq3 = ABS(pu2_src[3 + 2 * src_strd] - 2 * pu2_src[3 + 1 * src_strd] +
882                    pu2_src[3 + 0 * src_strd]);
883
884    dp0 = ABS(pu2_src[-3 * src_strd] - 2 * pu2_src[-2 * src_strd] +
885                    pu2_src[-1 * src_strd]);
886
887    dp3 = ABS(pu2_src[3 - 3 * src_strd] - 2 * pu2_src[3 - 2 * src_strd] +
888                    pu2_src[3 - 1 * src_strd]);
889
890    d0 = dp0 + dq0;
891    d3 = dp3 + dq3;
892
893    dp = dp0 + dp3;
894    dq = dq0 + dq3;
895
896    d = d0 + d3;
897
898    de = 0;
899    dep = 0;
900    deq = 0;
901
902    if(d < beta)
903    {
904        d_sam0 = 0;
905        if((2 * d0 < (beta >> 2))
906                        && (ABS(pu2_src[3 * src_strd] - pu2_src[0 * src_strd]) +
907                                        ABS(pu2_src[-1 * src_strd] - pu2_src[-4 * src_strd])
908                                        < (beta >> 3))
909                        && ABS(pu2_src[0 * src_strd] - pu2_src[-1 * src_strd])
910                        < ((5 * tc + 1) >> 1))
911        {
912            d_sam0 = 1;
913        }
914
915        pu2_src += 3;
916        d_sam3 = 0;
917        if((2 * d3 < (beta >> 2))
918                        && (ABS(pu2_src[3 * src_strd] - pu2_src[0 * src_strd]) +
919                                        ABS(pu2_src[-1 * src_strd] - pu2_src[-4 * src_strd])
920                                        < (beta >> 3))
921                        && ABS(pu2_src[0 * src_strd] - pu2_src[-1 * src_strd])
922                        < ((5 * tc + 1) >> 1))
923        {
924            d_sam3 = 1;
925        }
926        pu2_src -= 3;
927
928        de = (d_sam0 == 1 && d_sam3 == 1) ? 2 : 1;
929        dep = (dp < ((beta + (beta >> 1)) >> 3)) ? 1 : 0;
930        deq = (dq < ((beta + (beta >> 1)) >> 3)) ? 1 : 0;
931        if(tc <= 1)
932        {
933            dep = 0;
934            deq = 0;
935        }
936    }
937
938    if(de != 0)
939    {
940        for(col = 0; col < 4; col++)
941        {
942            tmp_p0 = pu2_src[-1 * src_strd];
943            tmp_p1 = pu2_src[-2 * src_strd];
944            tmp_p2 = pu2_src[-3 * src_strd];
945
946            tmp_q0 = pu2_src[0 * src_strd];
947            tmp_q1 = pu2_src[1 * src_strd];
948            tmp_q2 = pu2_src[2 * src_strd];
949            if(de == 2)
950            {
951                tmp_q0 = CLIP3((pu2_src[2 * src_strd] +
952                                2 * pu2_src[1 * src_strd] +
953                                2 * pu2_src[0 * src_strd] +
954                                2 * pu2_src[-1 * src_strd] +
955                                pu2_src[-2 * src_strd] + 4) >> 3,
956                                pu2_src[0 * src_strd] - 2 * tc,
957                                pu2_src[0 * src_strd] + 2 * tc);
958
959                tmp_q1 = CLIP3((pu2_src[2 * src_strd] +
960                                pu2_src[1 * src_strd] +
961                                pu2_src[0 * src_strd] +
962                                pu2_src[-1 * src_strd] + 2) >> 2,
963                                pu2_src[1 * src_strd] - 2 * tc,
964                                pu2_src[1 * src_strd] + 2 * tc);
965
966                tmp_q2 = CLIP3((2 * pu2_src[3 * src_strd] +
967                                3 * pu2_src[2 * src_strd] +
968                                pu2_src[1 * src_strd] +
969                                pu2_src[0 * src_strd] +
970                                pu2_src[-1 * src_strd] + 4) >> 3,
971                                pu2_src[2 * src_strd] - 2 * tc,
972                                pu2_src[2 * src_strd] + 2 * tc);
973
974                tmp_p0 = CLIP3((pu2_src[1 * src_strd] +
975                                2 * pu2_src[0 * src_strd] +
976                                2 * pu2_src[-1 * src_strd] +
977                                2 * pu2_src[-2 * src_strd] +
978                                pu2_src[-3 * src_strd] + 4) >> 3,
979                                pu2_src[-1 * src_strd] - 2 * tc,
980                                pu2_src[-1 * src_strd] + 2 * tc);
981
982                tmp_p1 = CLIP3((pu2_src[0 * src_strd] +
983                                pu2_src[-1 * src_strd] +
984                                pu2_src[-2 * src_strd] +
985                                pu2_src[-3 * src_strd] + 2) >> 2,
986                                pu2_src[-2 * src_strd] - 2 * tc,
987                                pu2_src[-2 * src_strd] + 2 * tc);
988
989                tmp_p2 = CLIP3((pu2_src[0 * src_strd] +
990                                pu2_src[-1 * src_strd] +
991                                pu2_src[-2 * src_strd] +
992                                3 * pu2_src[-3 * src_strd] +
993                                2 * pu2_src[-4 * src_strd] + 4) >> 3,
994                                pu2_src[-3 * src_strd] - 2 * tc,
995                                pu2_src[-3 * src_strd] + 2 * tc);
996            }
997            else
998            {
999                delta = (9 * (pu2_src[0 * src_strd] - pu2_src[-1 * src_strd]) -
1000                                3 * (pu2_src[1 * src_strd] - pu2_src[-2 * src_strd]) +
1001                                8) >> 4;
1002                if(ABS(delta) < 10 * tc)
1003                {
1004                    delta = CLIP3(delta, -tc, tc);
1005                    tmp_p0 = CLIP3(pu2_src[-1 * src_strd] + delta, 0, ((1 << bit_depth) - 1));
1006                    tmp_q0 = CLIP3(pu2_src[0 * src_strd] - delta, 0, ((1 << bit_depth) - 1));
1007                    if(dep == 1)
1008                    {
1009                        delta_p = CLIP3((((pu2_src[-3 * src_strd] +
1010                                        pu2_src[-1 * src_strd] + 1) >> 1) -
1011                                        pu2_src[-2 * src_strd] + delta) >> 1,
1012                                        -(tc >> 1),
1013                                        (tc >> 1));
1014                        tmp_p1 = CLIP3(pu2_src[-2 * src_strd] + delta_p, 0, ((1 << bit_depth) - 1));
1015                    }
1016
1017                    if(deq == 1)
1018                    {
1019                        delta_q = CLIP3((((pu2_src[2 * src_strd] +
1020                                        pu2_src[0 * src_strd] + 1) >> 1) -
1021                                        pu2_src[1 * src_strd] - delta) >> 1,
1022                                        -(tc >> 1),
1023                                        (tc >> 1));
1024                        tmp_q1 = CLIP3(pu2_src[1 * src_strd] + delta_q, 0, ((1 << bit_depth) - 1));
1025                    }
1026                }
1027            }
1028
1029            if(filter_flag_p != 0)
1030            {
1031                pu2_src[-3 * src_strd] = tmp_p2;
1032                pu2_src[-2 * src_strd] = tmp_p1;
1033                pu2_src[-1 * src_strd] = tmp_p0;
1034            }
1035
1036            if(filter_flag_q != 0)
1037            {
1038                pu2_src[0 * src_strd] = tmp_q0;
1039                pu2_src[1 * src_strd] = tmp_q1;
1040                pu2_src[2 * src_strd] = tmp_q2;
1041            }
1042
1043            pu2_src += 1;
1044        }
1045    }
1046}
1047
1048
1049/**
1050*******************************************************************************
1051*
1052* @brief
1053*     Filtering for the chroma block vertical edge.
1054*
1055* @par Description:
1056*     Filter for chroma vertical edge. The  boundary filter strength, bs
1057*    should be greater than 1.  The pcm flags and the transquant bypass flags
1058*    should be  taken care of by the calling function.
1059*
1060* @param[in] pu1_src
1061*  Pointer to the src sample q(0,0)
1062*
1063* @param[in] src_strd
1064*  Source stride
1065*
1066* @param[in] bs
1067*  Boundary filter strength of q(0,0)
1068*
1069* @param[in] quant_param_p
1070*  quantization parameter of p block
1071*
1072* @param[in] quant_param_q
1073*  quantization parameter of p block
1074*
1075* @param[in] beta_offset_div2
1076*
1077*
1078* @param[in] tc_offset_div2
1079*
1080*
1081* @param[in] filter_flag_p
1082*  flag whether to filter the p block
1083*
1084* @param[in] filter_flag_q
1085*  flag whether to filter the q block
1086*
1087* @returns
1088*
1089* @remarks
1090*  None
1091*
1092*******************************************************************************
1093*/
1094
1095void ihevc_deblk_chroma_vert(UWORD8 *pu1_src,
1096                             WORD32 src_strd,
1097                             WORD32 quant_param_p,
1098                             WORD32 quant_param_q,
1099                             WORD32 qp_offset_u,
1100                             WORD32 qp_offset_v,
1101                             WORD32 tc_offset_div2,
1102                             WORD32 filter_flag_p,
1103                             WORD32 filter_flag_q)
1104{
1105    WORD32 qp_indx_u, qp_chroma_u;
1106    WORD32 qp_indx_v, qp_chroma_v;
1107    WORD32 tc_indx_u, tc_u;
1108    WORD32 tc_indx_v, tc_v;
1109    WORD32 delta_u, tmp_p0_u, tmp_q0_u;
1110    WORD32 delta_v, tmp_p0_v, tmp_q0_v;
1111    WORD32 row;
1112
1113    ASSERT(filter_flag_p || filter_flag_q);
1114
1115    /* chroma processing is done only if BS is 2             */
1116    /* this function is assumed to be called only if BS is 2 */
1117    qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
1118    qp_chroma_u = qp_indx_u < 0 ? qp_indx_u : (qp_indx_u > 57 ? qp_indx_u - 6 : gai4_ihevc_qp_table[qp_indx_u]);
1119
1120    qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
1121    qp_chroma_v = qp_indx_v < 0 ? qp_indx_v : (qp_indx_v > 57 ? qp_indx_v - 6 : gai4_ihevc_qp_table[qp_indx_v]);
1122
1123    tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
1124    tc_u = gai4_ihevc_tc_table[tc_indx_u];
1125
1126    tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
1127    tc_v = gai4_ihevc_tc_table[tc_indx_v];
1128
1129    if(0 == tc_u && 0 == tc_v)
1130    {
1131        return;
1132    }
1133
1134    for(row = 0; row < 4; row++)
1135    {
1136        delta_u = CLIP3((((pu1_src[0] - pu1_src[-2]) << 2) +
1137                        pu1_src[-4] - pu1_src[2] + 4) >> 3,
1138                        -tc_u, tc_u);
1139
1140        tmp_p0_u = CLIP_U8(pu1_src[-2] + delta_u);
1141        tmp_q0_u = CLIP_U8(pu1_src[0] - delta_u);
1142
1143        delta_v = CLIP3((((pu1_src[1] - pu1_src[-1]) << 2) +
1144                        pu1_src[-3] - pu1_src[3] + 4) >> 3,
1145                        -tc_v, tc_v);
1146
1147        tmp_p0_v = CLIP_U8(pu1_src[-1] + delta_v);
1148        tmp_q0_v = CLIP_U8(pu1_src[1] - delta_v);
1149
1150        if(filter_flag_p != 0)
1151        {
1152            pu1_src[-2] = tmp_p0_u;
1153            pu1_src[-1] = tmp_p0_v;
1154        }
1155
1156        if(filter_flag_q != 0)
1157        {
1158            pu1_src[0] = tmp_q0_u;
1159            pu1_src[1] = tmp_q0_v;
1160        }
1161
1162        pu1_src += src_strd;
1163    }
1164
1165}
1166
1167
1168/**
1169*******************************************************************************
1170*
1171* @brief
1172*     Filtering for the chroma block vertical edge.
1173*
1174* @par Description:
1175*     Filter for chroma vertical edge. The  boundary filter strength, bs
1176*    should be greater than 1.  The pcm flags and the transquant bypass flags
1177*    should be  taken care of by the calling function.
1178*
1179* @param[in] pu2_src
1180*  Pointer to the src sample q(0,0)
1181*
1182* @param[in] src_strd
1183*  Source stride
1184*
1185* @param[in] bs
1186*  Boundary filter strength of q(0,0)
1187*
1188* @param[in] quant_param_p
1189*  quantization parameter of p block
1190*
1191* @param[in] quant_param_q
1192*  quantization parameter of p block
1193*
1194* @param[in] beta_offset_div2
1195*
1196*
1197* @param[in] tc_offset_div2
1198*
1199*
1200* @param[in] filter_flag_p
1201*  flag whether to filter the p block
1202*
1203* @param[in] filter_flag_q
1204*  flag whether to filter the q block
1205*
1206* @returns
1207*
1208* @remarks
1209*  None
1210*
1211*******************************************************************************
1212*/
1213
1214void ihevc_hbd_deblk_chroma_vert(UWORD16 *pu2_src,
1215                                 WORD32 src_strd,
1216                                 WORD32 quant_param_p,
1217                                 WORD32 quant_param_q,
1218                                 WORD32 qp_offset_u,
1219                                 WORD32 qp_offset_v,
1220                                 WORD32 tc_offset_div2,
1221                                 WORD32 filter_flag_p,
1222                                 WORD32 filter_flag_q,
1223                                 UWORD8 bit_depth)
1224{
1225    WORD32 qp_indx_u, qp_chroma_u;
1226    WORD32 qp_indx_v, qp_chroma_v;
1227    WORD32 tc_indx_u, tc_u;
1228    WORD32 tc_indx_v, tc_v;
1229    WORD32 delta_u, tmp_p0_u, tmp_q0_u;
1230    WORD32 delta_v, tmp_p0_v, tmp_q0_v;
1231    WORD32 row;
1232
1233    ASSERT(filter_flag_p || filter_flag_q);
1234
1235    /* chroma processing is done only if BS is 2             */
1236    /* this function is assumed to be called only if BS is 2 */
1237    qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
1238    qp_chroma_u = qp_indx_u < 0 ? qp_indx_u : (qp_indx_u > 57 ? qp_indx_u - 6 : gai4_ihevc_qp_table[qp_indx_u]);
1239
1240    qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
1241    qp_chroma_v = qp_indx_v < 0 ? qp_indx_v : (qp_indx_v > 57 ? qp_indx_v - 6 : gai4_ihevc_qp_table[qp_indx_v]);
1242
1243    tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
1244    tc_u = gai4_ihevc_tc_table[tc_indx_u] * (1 << (bit_depth - 8));
1245
1246    tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
1247    tc_v = gai4_ihevc_tc_table[tc_indx_v] * (1 << (bit_depth - 8));
1248
1249    if(0 == tc_u && 0 == tc_v)
1250    {
1251        return;
1252    }
1253
1254    for(row = 0; row < 4; row++)
1255    {
1256        delta_u = CLIP3((((pu2_src[0] - pu2_src[-2]) << 2) +
1257                        pu2_src[-4] - pu2_src[2] + 4) >> 3,
1258                        -tc_u, tc_u);
1259        tmp_p0_u = CLIP3(pu2_src[-2] + delta_u, 0, ((1 << bit_depth) - 1));
1260        tmp_q0_u = CLIP3(pu2_src[0] - delta_u, 0, ((1 << bit_depth) - 1));
1261
1262        delta_v = CLIP3((((pu2_src[1] - pu2_src[-1]) << 2) +
1263                        pu2_src[-3] - pu2_src[3] + 4) >> 3,
1264                        -tc_v, tc_v);
1265        tmp_p0_v = CLIP3(pu2_src[-1] + delta_v, 0, ((1 << bit_depth) - 1));
1266        tmp_q0_v = CLIP3(pu2_src[1] - delta_v, 0, ((1 << bit_depth) - 1));
1267        if(filter_flag_p != 0)
1268        {
1269            pu2_src[-2] = tmp_p0_u;
1270            pu2_src[-1] = tmp_p0_v;
1271        }
1272
1273        if(filter_flag_q != 0)
1274        {
1275            pu2_src[0] = tmp_q0_u;
1276            pu2_src[1] = tmp_q0_v;
1277        }
1278
1279        pu2_src += src_strd;
1280    }
1281
1282}
1283
1284
1285/**
1286*******************************************************************************
1287*
1288* @brief
1289*   Filtering for the chroma block horizontal edge.
1290*
1291* @par Description:
1292*     Filter for chroma horizontal edge. The  boundary filter strength, bs
1293*    should be greater than 1.  The pcm flags and the transquant bypass flags
1294*    should be  taken care of by the calling function.
1295*
1296* @param[in] pu1_src
1297*  Pointer to the src sample q(0,0)
1298*
1299* @param[in] src_strd
1300*  Source stride
1301*
1302* @param[in] bs
1303*  Boundary filter strength of q(0,0)
1304*
1305* @param[in] quant_param_p
1306*  quantization parameter of p block
1307*
1308* @param[in] quant_param_q
1309*  quantization parameter of p block
1310*
1311* @param[in] beta_offset_div2
1312*
1313*
1314* @param[in] tc_offset_div2
1315*
1316*
1317* @param[in] filter_flag_p
1318*  flag whether to filter the p block
1319*
1320* @param[in] filter_flag_q
1321*  flag whether to filter the q block
1322*
1323* @returns
1324*
1325* @remarks
1326*  None
1327*
1328*******************************************************************************
1329*/
1330
1331void ihevc_deblk_chroma_horz(UWORD8 *pu1_src,
1332                             WORD32 src_strd,
1333                             WORD32 quant_param_p,
1334                             WORD32 quant_param_q,
1335                             WORD32 qp_offset_u,
1336                             WORD32 qp_offset_v,
1337                             WORD32 tc_offset_div2,
1338                             WORD32 filter_flag_p,
1339                             WORD32 filter_flag_q)
1340{
1341    WORD32 qp_indx_u, qp_chroma_u;
1342    WORD32 qp_indx_v, qp_chroma_v;
1343    WORD32 tc_indx_u, tc_u;
1344    WORD32 tc_indx_v, tc_v;
1345    WORD32 tc;
1346
1347    WORD32 delta, tmp_p0, tmp_q0;
1348    WORD32 col;
1349
1350    ASSERT(filter_flag_p || filter_flag_q);
1351
1352    /* chroma processing is done only if BS is 2             */
1353    /* this function is assumed to be called only if BS is 2 */
1354    qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
1355    qp_chroma_u = qp_indx_u < 0 ? qp_indx_u : (qp_indx_u > 57 ? qp_indx_u - 6 : gai4_ihevc_qp_table[qp_indx_u]);
1356
1357    qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
1358    qp_chroma_v = qp_indx_v < 0 ? qp_indx_v : (qp_indx_v > 57 ? qp_indx_v - 6 : gai4_ihevc_qp_table[qp_indx_v]);
1359
1360    tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
1361    tc_u = gai4_ihevc_tc_table[tc_indx_u];
1362
1363    tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
1364    tc_v = gai4_ihevc_tc_table[tc_indx_v];
1365
1366    if(0 == tc_u && 0 == tc_v)
1367    {
1368        return;
1369    }
1370
1371    for(col = 0; col < 8; col++)
1372    {
1373        tc = (col & 1) ? tc_v : tc_u;
1374        delta = CLIP3((((pu1_src[0 * src_strd] -
1375                      pu1_src[-1 * src_strd]) << 2) +
1376                      pu1_src[-2 * src_strd] -
1377                      pu1_src[1 * src_strd] + 4) >> 3,
1378                      -tc, tc);
1379
1380        tmp_p0 = CLIP_U8(pu1_src[-1 * src_strd] + delta);
1381        tmp_q0 = CLIP_U8(pu1_src[0 * src_strd] - delta);
1382
1383        if(filter_flag_p != 0)
1384        {
1385            pu1_src[-1 * src_strd] = tmp_p0;
1386        }
1387
1388        if(filter_flag_q != 0)
1389        {
1390            pu1_src[0 * src_strd] = tmp_q0;
1391        }
1392
1393        pu1_src += 1;
1394    }
1395
1396}
1397
1398
1399/**
1400*******************************************************************************
1401*
1402* @brief
1403*   Filtering for the chroma block horizontal edge.
1404*
1405* @par Description:
1406*     Filter for chroma horizontal edge. The  boundary filter strength, bs
1407*    should be greater than 1.  The pcm flags and the transquant bypass flags
1408*    should be  taken care of by the calling function.
1409*
1410* @param[in] pu2_src
1411*  Pointer to the src sample q(0,0)
1412*
1413* @param[in] src_strd
1414*  Source stride
1415*
1416* @param[in] bs
1417*  Boundary filter strength of q(0,0)
1418*
1419* @param[in] quant_param_p
1420*  quantization parameter of p block
1421*
1422* @param[in] quant_param_q
1423*  quantization parameter of p block
1424*
1425* @param[in] beta_offset_div2
1426*
1427*
1428* @param[in] tc_offset_div2
1429*
1430*
1431* @param[in] filter_flag_p
1432*  flag whether to filter the p block
1433*
1434* @param[in] filter_flag_q
1435*  flag whether to filter the q block
1436*
1437* @returns
1438*
1439* @remarks
1440*  None
1441*
1442*******************************************************************************
1443*/
1444
1445void ihevc_hbd_deblk_chroma_horz(UWORD16 *pu2_src,
1446                                 WORD32 src_strd,
1447                                 WORD32 quant_param_p,
1448                                 WORD32 quant_param_q,
1449                                 WORD32 qp_offset_u,
1450                                 WORD32 qp_offset_v,
1451                                 WORD32 tc_offset_div2,
1452                                 WORD32 filter_flag_p,
1453                                 WORD32 filter_flag_q,
1454                                 UWORD8 bit_depth)
1455{
1456    WORD32 qp_indx_u, qp_chroma_u;
1457    WORD32 qp_indx_v, qp_chroma_v;
1458    WORD32 tc_indx_u, tc_u;
1459    WORD32 tc_indx_v, tc_v;
1460    WORD32 tc;
1461
1462    WORD32 delta, tmp_p0, tmp_q0;
1463    WORD32 col;
1464
1465    ASSERT(filter_flag_p || filter_flag_q);
1466
1467    /* chroma processing is done only if BS is 2             */
1468    /* this function is assumed to be called only if BS is 2 */
1469    qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
1470    qp_chroma_u = qp_indx_u < 0 ? qp_indx_u : (qp_indx_u > 57 ? qp_indx_u - 6 : gai4_ihevc_qp_table[qp_indx_u]);
1471
1472    qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
1473    qp_chroma_v = qp_indx_v < 0 ? qp_indx_v : (qp_indx_v > 57 ? qp_indx_v - 6 : gai4_ihevc_qp_table[qp_indx_v]);
1474
1475    tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
1476    tc_u = gai4_ihevc_tc_table[tc_indx_u] * (1 << (bit_depth - 8));
1477
1478    tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
1479    tc_v = gai4_ihevc_tc_table[tc_indx_v] * (1 << (bit_depth - 8));
1480
1481    if(0 == tc_u && 0 == tc_v)
1482    {
1483        return;
1484    }
1485
1486    for(col = 0; col < 8; col++)
1487    {
1488        tc = (col & 1) ? tc_v : tc_u;
1489        delta = CLIP3((((pu2_src[0 * src_strd] -
1490                      pu2_src[-1 * src_strd]) << 2) +
1491                      pu2_src[-2 * src_strd] -
1492                      pu2_src[1 * src_strd] + 4) >> 3,
1493                      -tc, tc);
1494        tmp_p0 = CLIP3(pu2_src[-1 * src_strd] + delta, 0, ((1 << bit_depth) - 1));
1495        tmp_q0 = CLIP3(pu2_src[0 * src_strd] - delta, 0, ((1 << bit_depth) - 1));
1496
1497        if(filter_flag_p != 0)
1498        {
1499            pu2_src[-1 * src_strd] = tmp_p0;
1500        }
1501
1502        if(filter_flag_q != 0)
1503        {
1504            pu2_src[0 * src_strd] = tmp_q0;
1505        }
1506
1507        pu2_src += 1;
1508    }
1509
1510}
1511