1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18
19#include <string.h>
20
21#include "avclib_common.h"
22
23#define MAX_QP 51
24#define MB_BLOCK_SIZE 16
25
26// NOTE: these 3 tables are for funtion GetStrength() only
27const static int ININT_STRENGTH[4] = {0x04040404, 0x03030303, 0x03030303, 0x03030303};
28
29
30// NOTE: these 3 tables are for funtion EdgeLoop() only
31// NOTE: to change the tables below for instance when the QP doubling is changed from 6 to 8 values
32
33const static int ALPHA_TABLE[52]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,  7, 8, 9, 10, 12, 13, 15, 17,  20, 22, 25, 28, 32, 36, 40, 45,  50, 56, 63, 71, 80, 90, 101, 113,  127, 144, 162, 182, 203, 226, 255, 255} ;
34const static int BETA_TABLE[52]   = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,  3, 3, 3, 4, 4, 4, 6, 6,   7, 7, 8, 8, 9, 9, 10, 10,  11, 11, 12, 12, 13, 13, 14, 14,   15, 15, 16, 16, 17, 17, 18, 18} ;
35const static int CLIP_TAB[52][5]  =
36{
37    { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0},
38    { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0},
39    { 0, 0, 0, 0, 0}, { 0, 0, 0, 1, 1}, { 0, 0, 0, 1, 1}, { 0, 0, 0, 1, 1}, { 0, 0, 0, 1, 1}, { 0, 0, 1, 1, 1}, { 0, 0, 1, 1, 1}, { 0, 1, 1, 1, 1},
40    { 0, 1, 1, 1, 1}, { 0, 1, 1, 1, 1}, { 0, 1, 1, 1, 1}, { 0, 1, 1, 2, 2}, { 0, 1, 1, 2, 2}, { 0, 1, 1, 2, 2}, { 0, 1, 1, 2, 2}, { 0, 1, 2, 3, 3},
41    { 0, 1, 2, 3, 3}, { 0, 2, 2, 3, 3}, { 0, 2, 2, 4, 4}, { 0, 2, 3, 4, 4}, { 0, 2, 3, 4, 4}, { 0, 3, 3, 5, 5}, { 0, 3, 4, 6, 6}, { 0, 3, 4, 6, 6},
42    { 0, 4, 5, 7, 7}, { 0, 4, 5, 8, 8}, { 0, 4, 6, 9, 9}, { 0, 5, 7, 10, 10}, { 0, 6, 8, 11, 11}, { 0, 6, 8, 13, 13}, { 0, 7, 10, 14, 14}, { 0, 8, 11, 16, 16},
43    { 0, 9, 12, 18, 18}, { 0, 10, 13, 20, 20}, { 0, 11, 15, 23, 23}, { 0, 13, 17, 25, 25}
44};
45
46// NOTE: this table is only QP clipping, index = QP + video->FilterOffsetA/B, clipped to [0, 51]
47//       video->FilterOffsetA/B is in {-12, 12]
48const static int QP_CLIP_TAB[76] =
49{
50    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,              // [-12, 0]
51    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
52    13, 14, 15, 16, 17, 18, 19, 20, 21,
53    22, 23, 24, 25, 26, 27, 28, 29, 30,
54    31, 32, 33, 34, 35, 36, 37, 38, 39,
55    40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // [1, 51]
56    51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51      // [52,63]
57};
58
59static void DeblockMb(AVCCommonObj *video, int mb_x, int mb_y, uint8 *SrcY, uint8 *SrcU, uint8 *SrcV);
60//static void GetStrength(AVCCommonObj *video, uint8 *Strength, AVCMacroblock* MbP, AVCMacroblock* MbQ, int dir, int edge);
61static void GetStrength_Edge0(uint8 *Strength, AVCMacroblock* MbP, AVCMacroblock* MbQ, int dir);
62static void GetStrength_VerticalEdges(uint8 *Strength, AVCMacroblock* MbQ);
63static void GetStrength_HorizontalEdges(uint8 Strength[12], AVCMacroblock* MbQ);
64static void EdgeLoop_Luma_vertical(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch);
65static void EdgeLoop_Luma_horizontal(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch);
66static void EdgeLoop_Chroma_vertical(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch);
67static void EdgeLoop_Chroma_horizontal(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch);
68
69/*
70 *****************************************************************************************
71 * \brief Filter all macroblocks in order of increasing macroblock address.
72 *****************************************************************************************
73*/
74
75OSCL_EXPORT_REF AVCStatus DeblockPicture(AVCCommonObj *video)
76{
77    uint   i, j;
78    int   pitch = video->currPic->pitch, pitch_c, width;
79    uint8 *SrcY, *SrcU, *SrcV;
80
81    SrcY = video->currPic->Sl;      // pointers to source
82    SrcU = video->currPic->Scb;
83    SrcV = video->currPic->Scr;
84    pitch_c = pitch >> 1;
85    width = video->currPic->width;
86
87    for (i = 0; i < video->PicHeightInMbs; i++)
88    {
89        for (j = 0; j < video->PicWidthInMbs; j++)
90        {
91            DeblockMb(video, j, i, SrcY, SrcU, SrcV);
92            // update SrcY, SrcU, SrcV
93            SrcY += MB_BLOCK_SIZE;
94            SrcU += (MB_BLOCK_SIZE >> 1);
95            SrcV += (MB_BLOCK_SIZE >> 1);
96        }
97
98        SrcY += ((pitch << 4) - width);
99        SrcU += ((pitch_c << 3) - (width >> 1));
100        SrcV += ((pitch_c << 3) - (width >> 1));
101    }
102
103    return AVC_SUCCESS;
104}
105
106#ifdef MB_BASED_DEBLOCK
107/*
108 *****************************************************************************************
109 * \brief Filter one macroblocks in a fast macroblock memory and copy it to frame
110 *****************************************************************************************
111*/
112void MBInLoopDeblock(AVCCommonObj *video)
113{
114    AVCPictureData *currPic = video->currPic;
115#ifdef USE_PRED_BLOCK
116    uint8 *predCb, *predCr, *pred_block;
117    int i, j, dst_width, dst_height, dst_widthc, dst_heightc;
118#endif
119    int pitch = currPic->pitch;
120    int x_pos = video->mb_x;
121    int y_pos = video->mb_y;
122    uint8 *curL, *curCb, *curCr;
123    int offset;
124
125    offset = (y_pos << 4) * pitch;
126
127    curL = currPic->Sl + offset + (x_pos << 4);
128
129    offset >>= 2;
130    offset += (x_pos << 3);
131
132    curCb = currPic->Scb + offset;
133    curCr = currPic->Scr + offset;
134
135#ifdef USE_PRED_BLOCK
136    pred_block = video->pred;
137
138    /* 1. copy neighboring pixels from frame to the video->pred_block */
139    if (y_pos) /* not the 0th row */
140    {
141        /* copy to the top 4 lines of the macroblock */
142        curL -= (pitch << 2); /* go back 4 lines */
143
144        memcpy(pred_block + 4, curL, 16);
145        curL += pitch;
146        memcpy(pred_block + 24, curL, 16);
147        curL += pitch;
148        memcpy(pred_block + 44, curL, 16);
149        curL += pitch;
150        memcpy(pred_block + 64, curL, 16);
151        curL += pitch;
152
153        curCb -= (pitch << 1); /* go back 4 lines chroma */
154        curCr -= (pitch << 1);
155
156        pred_block += 400;
157
158        memcpy(pred_block + 4, curCb, 8);
159        curCb += (pitch >> 1);
160        memcpy(pred_block + 16, curCb, 8);
161        curCb += (pitch >> 1);
162        memcpy(pred_block + 28, curCb, 8);
163        curCb += (pitch >> 1);
164        memcpy(pred_block + 40, curCb, 8);
165        curCb += (pitch >> 1);
166
167        pred_block += 144;
168        memcpy(pred_block + 4, curCr, 8);
169        curCr += (pitch >> 1);
170        memcpy(pred_block + 16, curCr, 8);
171        curCr += (pitch >> 1);
172        memcpy(pred_block + 28, curCr, 8);
173        curCr += (pitch >> 1);
174        memcpy(pred_block + 40, curCr, 8);
175        curCr += (pitch >> 1);
176
177        pred_block = video->pred;
178    }
179
180    /* 2. perform deblocking. */
181    DeblockMb(video, x_pos, y_pos, pred_block + 84, pred_block + 452, pred_block + 596);
182
183    /* 3. copy it back to the frame and update pred_block */
184    predCb = pred_block + 400;
185    predCr = predCb + 144;
186
187    /* find the range of the block inside pred_block to be copied back */
188    if (y_pos)  /* the first row */
189    {
190        curL -= (pitch << 2);
191        curCb -= (pitch << 1);
192        curCr -= (pitch << 1);
193
194        dst_height = 20;
195        dst_heightc = 12;
196    }
197    else
198    {
199        pred_block += 80;
200        predCb += 48;
201        predCr += 48;
202        dst_height = 16;
203        dst_heightc = 8;
204    }
205
206    if (x_pos) /* find the width */
207    {
208        curL -= 4;
209        curCb -= 4;
210        curCr -= 4;
211        if (x_pos == (int)(video->PicWidthInMbs - 1))
212        {
213            dst_width = 20;
214            dst_widthc = 12;
215        }
216        else
217        {
218            dst_width = 16;
219            dst_widthc = 8;
220        }
221    }
222    else
223    {
224        pred_block += 4;
225        predCb += 4;
226        predCr += 4;
227        dst_width = 12;
228        dst_widthc = 4;
229    }
230
231    /* perform copy */
232    for (j = 0; j < dst_height; j++)
233    {
234        memcpy(curL, pred_block, dst_width);
235        curL += pitch;
236        pred_block += 20;
237    }
238    for (j = 0; j < dst_heightc; j++)
239    {
240        memcpy(curCb, predCb, dst_widthc);
241        memcpy(curCr, predCr, dst_widthc);
242        curCb += (pitch >> 1);
243        curCr += (pitch >> 1);
244        predCb += 12;
245        predCr += 12;
246    }
247
248    if (x_pos != (int)(video->PicWidthInMbs - 1)) /* now copy from the right-most 4 columns to the left-most 4 columns */
249    {
250        pred_block = video->pred;
251        for (i = 0; i < 20; i += 4)
252        {
253            *((uint32*)pred_block) = *((uint32*)(pred_block + 16));
254            pred_block += 20;
255            *((uint32*)pred_block) = *((uint32*)(pred_block + 16));
256            pred_block += 20;
257            *((uint32*)pred_block) = *((uint32*)(pred_block + 16));
258            pred_block += 20;
259            *((uint32*)pred_block) = *((uint32*)(pred_block + 16));
260            pred_block += 20;
261        }
262
263        for (i = 0; i < 24; i += 4)
264        {
265            *((uint32*)pred_block) = *((uint32*)(pred_block + 8));
266            pred_block += 12;
267            *((uint32*)pred_block) = *((uint32*)(pred_block + 8));
268            pred_block += 12;
269            *((uint32*)pred_block) = *((uint32*)(pred_block + 8));
270            pred_block += 12;
271            *((uint32*)pred_block) = *((uint32*)(pred_block + 8));
272            pred_block += 12;
273        }
274
275    }
276#else
277    DeblockMb(video, x_pos, y_pos, curL, curCb, curCr);
278#endif
279
280    return ;
281}
282#endif
283
284/*
285 *****************************************************************************************
286 * \brief Deblocking filter for one macroblock.
287 *****************************************************************************************
288 */
289
290void DeblockMb(AVCCommonObj *video, int mb_x, int mb_y, uint8 *SrcY, uint8 *SrcU, uint8 *SrcV)
291{
292    AVCMacroblock *MbP, *MbQ;
293    int     edge, QP, QPC;
294    int     filterLeftMbEdgeFlag = (mb_x != 0);
295    int     filterTopMbEdgeFlag  = (mb_y != 0);
296    int     pitch = video->currPic->pitch;
297    int     indexA, indexB;
298    int     *tmp;
299    int     Alpha, Beta, Alpha_c, Beta_c;
300    int     mbNum = mb_y * video->PicWidthInMbs + mb_x;
301    int     *clipTable, *clipTable_c, *qp_clip_tab;
302    uint8   Strength[16];
303    void*     str;
304
305    MbQ = &(video->mblock[mbNum]);      // current Mb
306
307
308    // If filter is disabled, return
309    if (video->sliceHdr->disable_deblocking_filter_idc == 1) return;
310
311    if (video->sliceHdr->disable_deblocking_filter_idc == 2)
312    {
313        // don't filter at slice boundaries
314        filterLeftMbEdgeFlag = mb_is_available(video->mblock, video->PicSizeInMbs, mbNum - 1, mbNum);
315        filterTopMbEdgeFlag  = mb_is_available(video->mblock, video->PicSizeInMbs, mbNum - video->PicWidthInMbs, mbNum);
316    }
317
318    /* NOTE: edge=0 and edge=1~3 are separate cases because of the difference of MbP, index A and indexB calculation */
319    /*       for edge = 1~3, MbP, indexA and indexB remain the same, and thus there is no need to re-calculate them for each edge */
320
321    qp_clip_tab = (int *)QP_CLIP_TAB + 12;
322
323    /* 1.VERTICAL EDGE + MB BOUNDARY (edge = 0) */
324    if (filterLeftMbEdgeFlag)
325    {
326        MbP = MbQ - 1;
327        //GetStrength(video, Strength, MbP, MbQ, 0, 0); // Strength for 4 blks in 1 stripe, 0 => vertical edge
328        GetStrength_Edge0(Strength, MbP, MbQ, 0);
329
330        str = (void*)Strength; //de-ref type-punned pointer fix
331        if (*((uint32*)str))    // only if one of the 4 Strength bytes is != 0
332        {
333            QP = (MbP->QPy + MbQ->QPy + 1) >> 1; // Average QP of the two blocks;
334            indexA = QP + video->FilterOffsetA;
335            indexB = QP + video->FilterOffsetB;
336            indexA = qp_clip_tab[indexA]; // IClip(0, MAX_QP, QP+video->FilterOffsetA)
337            indexB = qp_clip_tab[indexB]; // IClip(0, MAX_QP, QP+video->FilterOffsetB)
338
339            Alpha  = ALPHA_TABLE[indexA];
340            Beta = BETA_TABLE[indexB];
341            clipTable = (int *) CLIP_TAB[indexA];
342
343            if (Alpha > 0 && Beta > 0)
344#ifdef USE_PRED_BLOCK
345                EdgeLoop_Luma_vertical(SrcY, Strength,  Alpha, Beta, clipTable, 20);
346#else
347                EdgeLoop_Luma_vertical(SrcY, Strength,  Alpha, Beta, clipTable, pitch);
348#endif
349
350            QPC = (MbP->QPc + MbQ->QPc + 1) >> 1;
351            indexA = QPC + video->FilterOffsetA;
352            indexB = QPC + video->FilterOffsetB;
353            indexA = qp_clip_tab[indexA]; // IClip(0, MAX_QP, QP+video->FilterOffsetA)
354            indexB = qp_clip_tab[indexB]; // IClip(0, MAX_QP, QP+video->FilterOffsetB)
355
356            Alpha  = ALPHA_TABLE[indexA];
357            Beta = BETA_TABLE[indexB];
358            clipTable = (int *) CLIP_TAB[indexA];
359            if (Alpha > 0 && Beta > 0)
360            {
361#ifdef USE_PRED_BLOCK
362                EdgeLoop_Chroma_vertical(SrcU, Strength, Alpha, Beta, clipTable, 12);
363                EdgeLoop_Chroma_vertical(SrcV, Strength, Alpha, Beta, clipTable, 12);
364#else
365                EdgeLoop_Chroma_vertical(SrcU, Strength, Alpha, Beta, clipTable, pitch >> 1);
366                EdgeLoop_Chroma_vertical(SrcV, Strength, Alpha, Beta, clipTable, pitch >> 1);
367#endif
368            }
369        }
370
371    } /* end of: if(filterLeftMbEdgeFlag) */
372
373    /* 2.VERTICAL EDGE (no boundary), the edges are all inside a MB */
374    /* First calculate the necesary parameters all at once, outside the loop */
375    MbP = MbQ;
376
377    indexA = MbQ->QPy + video->FilterOffsetA;
378    indexB = MbQ->QPy + video->FilterOffsetB;
379    //  index
380    indexA = qp_clip_tab[indexA]; // IClip(0, MAX_QP, QP+video->FilterOffsetA)
381    indexB = qp_clip_tab[indexB]; // IClip(0, MAX_QP, QP+video->FilterOffsetB)
382
383    Alpha = ALPHA_TABLE[indexA];
384    Beta = BETA_TABLE[indexB];
385    clipTable = (int *)CLIP_TAB[indexA];
386
387    /* Save Alpha,  Beta and clipTable for future use, with the obselete variables filterLeftMbEdgeFlag, mbNum amd tmp */
388    filterLeftMbEdgeFlag = Alpha;
389    mbNum = Beta;
390    tmp = clipTable;
391
392    indexA = MbQ->QPc + video->FilterOffsetA;
393    indexB = MbQ->QPc + video->FilterOffsetB;
394    indexA = qp_clip_tab[indexA]; // IClip(0, MAX_QP, QP+video->FilterOffsetA)
395    indexB = qp_clip_tab[indexB]; // IClip(0, MAX_QP, QP+video->FilterOffsetB)
396
397    Alpha_c  = ALPHA_TABLE[indexA];
398    Beta_c = BETA_TABLE[indexB];
399    clipTable_c = (int *)CLIP_TAB[indexA];
400
401    GetStrength_VerticalEdges(Strength + 4, MbQ); // Strength for 4 blks in 1 stripe, 0 => vertical edge
402
403    for (edge = 1; edge < 4; edge++)  // 4 vertical strips of 16 pel
404    {
405        //GetStrength_VerticalEdges(video, Strength, MbP, MbQ, 0, edge); // Strength for 4 blks in 1 stripe, 0 => vertical edge
406        if (*((int*)(Strength + (edge << 2))))   // only if one of the 4 Strength bytes is != 0
407        {
408            if (Alpha > 0 && Beta > 0)
409#ifdef USE_PRED_BLOCK
410                EdgeLoop_Luma_vertical(SrcY + (edge << 2), Strength + (edge << 2),  Alpha, Beta, clipTable, 20);
411#else
412                EdgeLoop_Luma_vertical(SrcY + (edge << 2), Strength + (edge << 2),  Alpha, Beta, clipTable, pitch);
413#endif
414
415            if (!(edge & 1) && Alpha_c > 0 && Beta_c > 0)
416            {
417#ifdef USE_PRED_BLOCK
418                EdgeLoop_Chroma_vertical(SrcU + (edge << 1), Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, 12);
419                EdgeLoop_Chroma_vertical(SrcV + (edge << 1), Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, 12);
420#else
421                EdgeLoop_Chroma_vertical(SrcU + (edge << 1), Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, pitch >> 1);
422                EdgeLoop_Chroma_vertical(SrcV + (edge << 1), Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, pitch >> 1);
423#endif
424            }
425        }
426
427    } //end edge
428
429
430
431    /* 3.HORIZONTAL EDGE + MB BOUNDARY (edge = 0) */
432    if (filterTopMbEdgeFlag)
433    {
434        MbP = MbQ - video->PicWidthInMbs;
435        //GetStrength(video, Strength, MbP, MbQ, 1, 0); // Strength for 4 blks in 1 stripe, 0 => vertical edge
436        GetStrength_Edge0(Strength, MbP, MbQ, 1);
437        str = (void*)Strength; //de-ref type-punned pointer fix
438        if (*((uint32*)str))    // only if one of the 4 Strength bytes is != 0
439        {
440            QP = (MbP->QPy + MbQ->QPy + 1) >> 1; // Average QP of the two blocks;
441            indexA = QP + video->FilterOffsetA;
442            indexB = QP + video->FilterOffsetB;
443            indexA = qp_clip_tab[indexA]; // IClip(0, MAX_QP, QP+video->FilterOffsetA)
444            indexB = qp_clip_tab[indexB]; // IClip(0, MAX_QP, QP+video->FilterOffsetB)
445
446            Alpha  = ALPHA_TABLE[indexA];
447            Beta = BETA_TABLE[indexB];
448            clipTable = (int *)CLIP_TAB[indexA];
449
450            if (Alpha > 0 && Beta > 0)
451            {
452#ifdef USE_PRED_BLOCK
453                EdgeLoop_Luma_horizontal(SrcY, Strength,  Alpha, Beta, clipTable, 20);
454#else
455                EdgeLoop_Luma_horizontal(SrcY, Strength,  Alpha, Beta, clipTable, pitch);
456#endif
457            }
458
459            QPC = (MbP->QPc + MbQ->QPc + 1) >> 1;
460            indexA = QPC + video->FilterOffsetA;
461            indexB = QPC + video->FilterOffsetB;
462            indexA = qp_clip_tab[indexA]; // IClip(0, MAX_QP, QP+video->FilterOffsetA)
463            indexB = qp_clip_tab[indexB]; // IClip(0, MAX_QP, QP+video->FilterOffsetB)
464
465            Alpha  = ALPHA_TABLE[indexA];
466            Beta = BETA_TABLE[indexB];
467            clipTable = (int *)CLIP_TAB[indexA];
468            if (Alpha > 0 && Beta > 0)
469            {
470#ifdef USE_PRED_BLOCK
471                EdgeLoop_Chroma_horizontal(SrcU, Strength, Alpha, Beta, clipTable, 12);
472                EdgeLoop_Chroma_horizontal(SrcV, Strength, Alpha, Beta, clipTable, 12);
473#else
474                EdgeLoop_Chroma_horizontal(SrcU, Strength, Alpha, Beta, clipTable, pitch >> 1);
475                EdgeLoop_Chroma_horizontal(SrcV, Strength, Alpha, Beta, clipTable, pitch >> 1);
476#endif
477            }
478        }
479
480    } /* end of: if(filterTopMbEdgeFlag) */
481
482
483    /* 4.HORIZONTAL EDGE (no boundary), the edges are inside a MB */
484    MbP = MbQ;
485
486    /* Recover Alpha,  Beta and clipTable for edge!=0 with the variables filterLeftMbEdgeFlag, mbNum and tmp */
487    /* Note that Alpha_c, Beta_c and clipTable_c for chroma is already calculated */
488    Alpha = filterLeftMbEdgeFlag;
489    Beta = mbNum;
490    clipTable = tmp;
491
492    GetStrength_HorizontalEdges(Strength + 4, MbQ); // Strength for 4 blks in 1 stripe, 0 => vertical edge
493
494    for (edge = 1; edge < 4; edge++)  // 4 horicontal strips of 16 pel
495    {
496        //GetStrength(video, Strength, MbP, MbQ, 1, edge); // Strength for 4 blks in 1 stripe   1 => horizontal edge
497        if (*((int*)(Strength + (edge << 2)))) // only if one of the 4 Strength bytes is != 0
498        {
499            if (Alpha > 0 && Beta > 0)
500            {
501#ifdef USE_PRED_BLOCK
502                EdgeLoop_Luma_horizontal(SrcY + (edge << 2)*20, Strength + (edge << 2),  Alpha, Beta, clipTable, 20);
503#else
504                EdgeLoop_Luma_horizontal(SrcY + (edge << 2)*pitch, Strength + (edge << 2),  Alpha, Beta, clipTable, pitch);
505#endif
506            }
507
508            if (!(edge & 1) && Alpha_c > 0 && Beta_c > 0)
509            {
510#ifdef USE_PRED_BLOCK
511                EdgeLoop_Chroma_horizontal(SrcU + (edge << 1)*12, Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, 12);
512                EdgeLoop_Chroma_horizontal(SrcV + (edge << 1)*12, Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, 12);
513#else
514                EdgeLoop_Chroma_horizontal(SrcU + (edge << 1)*(pitch >> 1), Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, pitch >> 1);
515                EdgeLoop_Chroma_horizontal(SrcV + (edge << 1)*(pitch >> 1), Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, pitch >> 1);
516#endif
517            }
518        }
519
520    } //end edge
521
522    return;
523}
524
525/*
526 *****************************************************************************************************
527 * \brief   returns a buffer of 4 Strength values for one stripe in a mb (for different Frame types)
528 *****************************************************************************************************
529*/
530
531void GetStrength_Edge0(uint8 *Strength, AVCMacroblock* MbP, AVCMacroblock* MbQ, int dir)
532{
533    int tmp;
534    int16 *ptrQ, *ptrP;
535    void* vptr;
536    uint8 *pStrength;
537    void* refIdx;
538
539    if (MbP->mbMode == AVC_I4 || MbP->mbMode == AVC_I16 ||
540            MbQ->mbMode == AVC_I4 || MbQ->mbMode == AVC_I16)
541    {
542
543        *((int*)Strength) = ININT_STRENGTH[0];      // Start with Strength=3. or Strength=4 for Mb-edge
544
545    }
546    else // if not intra or SP-frame
547    {
548        *((int*)Strength) = 0;
549
550        if (dir == 0)  // Vertical Edge 0
551        {
552
553            //1. Check the ref_frame_id
554            refIdx = (void*) MbQ->RefIdx; //de-ref type-punned pointer fix
555            ptrQ = (int16*)refIdx;
556            refIdx = (void*)MbP->RefIdx; //de-ref type-punned pointer fix
557            ptrP = (int16*)refIdx;
558            pStrength = Strength;
559            if (ptrQ[0] != ptrP[1]) pStrength[0] = 1;
560            if (ptrQ[2] != ptrP[3]) pStrength[2] = 1;
561            pStrength[1] = pStrength[0];
562            pStrength[3] = pStrength[2];
563
564            //2. Check the non-zero coeff blocks (4x4)
565            if (MbQ->nz_coeff[0] != 0 || MbP->nz_coeff[3] != 0) pStrength[0] = 2;
566            if (MbQ->nz_coeff[4] != 0 || MbP->nz_coeff[7] != 0) pStrength[1] = 2;
567            if (MbQ->nz_coeff[8] != 0 || MbP->nz_coeff[11] != 0) pStrength[2] = 2;
568            if (MbQ->nz_coeff[12] != 0 || MbP->nz_coeff[15] != 0) pStrength[3] = 2;
569
570            //3. Only need to check the mv difference
571            vptr = (void*)MbQ->mvL0;  // for deref type-punned pointer
572            ptrQ = (int16*)vptr;
573            ptrP = (int16*)(MbP->mvL0 + 3); // points to 4x4 block #3 (the 4th column)
574
575            // 1st blk
576            if (*pStrength == 0)
577            {
578                // check |mv difference| >= 4
579                tmp = *ptrQ++ - *ptrP++;
580                if (tmp < 0) tmp = -tmp;
581                if (tmp >= 4) *pStrength = 1;
582
583                tmp = *ptrQ-- - *ptrP--;
584                if (tmp < 0) tmp = -tmp;
585                if (tmp >= 4) *pStrength = 1;
586            }
587
588            pStrength++;
589            ptrQ += 8;
590            ptrP += 8;
591
592            // 2nd blk
593            if (*pStrength == 0)
594            {
595                // check |mv difference| >= 4
596                tmp = *ptrQ++ - *ptrP++;
597                if (tmp < 0) tmp = -tmp;
598                if (tmp >= 4) *pStrength = 1;
599
600                tmp = *ptrQ-- - *ptrP--;
601                if (tmp < 0) tmp = -tmp;
602                if (tmp >= 4) *pStrength = 1;
603            }
604
605            pStrength++;
606            ptrQ += 8;
607            ptrP += 8;
608
609            // 3rd blk
610            if (*pStrength == 0)
611            {
612                // check |mv difference| >= 4
613                tmp = *ptrQ++ - *ptrP++;
614                if (tmp < 0) tmp = -tmp;
615                if (tmp >= 4) *pStrength = 1;
616
617                tmp = *ptrQ-- - *ptrP--;
618                if (tmp < 0) tmp = -tmp;
619                if (tmp >= 4) *pStrength = 1;
620            }
621
622            pStrength++;
623            ptrQ += 8;
624            ptrP += 8;
625
626            // 4th blk
627            if (*pStrength == 0)
628            {
629                // check |mv difference| >= 4
630                tmp = *ptrQ++ - *ptrP++;
631                if (tmp < 0) tmp = -tmp;
632                if (tmp >= 4) *pStrength = 1;
633
634                tmp = *ptrQ-- - *ptrP--;
635                if (tmp < 0) tmp = -tmp;
636                if (tmp >= 4) *pStrength = 1;
637            }
638        }
639        else   // Horizontal Edge 0
640        {
641
642            //1. Check the ref_frame_id
643            refIdx = (void*)MbQ->RefIdx;  //de-ref type-punned pointer
644            ptrQ = (int16*)refIdx;
645            refIdx = (void*)MbP->RefIdx;  //de-ref type-punned pointer
646            ptrP = (int16*)refIdx;
647            pStrength = Strength;
648            if (ptrQ[0] != ptrP[2]) pStrength[0] = 1;
649            if (ptrQ[1] != ptrP[3]) pStrength[2] = 1;
650            pStrength[1] = pStrength[0];
651            pStrength[3] = pStrength[2];
652
653            //2. Check the non-zero coeff blocks (4x4)
654            if (MbQ->nz_coeff[0] != 0 || MbP->nz_coeff[12] != 0) pStrength[0] = 2;
655            if (MbQ->nz_coeff[1] != 0 || MbP->nz_coeff[13] != 0) pStrength[1] = 2;
656            if (MbQ->nz_coeff[2] != 0 || MbP->nz_coeff[14] != 0) pStrength[2] = 2;
657            if (MbQ->nz_coeff[3] != 0 || MbP->nz_coeff[15] != 0) pStrength[3] = 2;
658
659            //3. Only need to check the mv difference
660            vptr = (void*)MbQ->mvL0;
661            ptrQ = (int16*)vptr;
662            ptrP = (int16*)(MbP->mvL0 + 12); // points to 4x4 block #12 (the 4th row)
663
664            // 1st blk
665            if (*pStrength == 0)
666            {
667                // check |mv difference| >= 4
668                tmp = *ptrQ++ - *ptrP++;
669                if (tmp < 0) tmp = -tmp;
670                if (tmp >= 4) *pStrength = 1;
671
672                tmp = *ptrQ-- - *ptrP--;
673                if (tmp < 0) tmp = -tmp;
674                if (tmp >= 4) *pStrength = 1;
675            }
676
677            pStrength++;
678            ptrQ += 2;
679            ptrP += 2;
680
681            // 2nd blk
682            if (*pStrength  == 0)
683            {
684                // check |mv difference| >= 4
685                tmp = *ptrQ++ - *ptrP++;
686                if (tmp < 0) tmp = -tmp;
687                if (tmp >= 4) *pStrength = 1;
688
689                tmp = *ptrQ-- - *ptrP--;
690                if (tmp < 0) tmp = -tmp;
691                if (tmp >= 4) *pStrength = 1;
692            }
693
694            pStrength++;
695            ptrQ += 2;
696            ptrP += 2;
697
698            // 3rd blk
699            if (*pStrength  == 0)
700            {
701                // check |mv difference| >= 4
702                tmp = *ptrQ++ - *ptrP++;
703                if (tmp < 0) tmp = -tmp;
704                if (tmp >= 4) *pStrength = 1;
705
706                tmp = *ptrQ-- - *ptrP--;
707                if (tmp < 0) tmp = -tmp;
708                if (tmp >= 4) *pStrength = 1;
709            }
710
711            pStrength++;
712            ptrQ += 2;
713            ptrP += 2;
714
715            // 4th blk
716            if (*pStrength  == 0)
717            {
718                // check |mv difference| >= 4
719                tmp = *ptrQ++ - *ptrP++;
720                if (tmp < 0) tmp = -tmp;
721                if (tmp >= 4) *pStrength = 1;
722
723                tmp = *ptrQ-- - *ptrP--;
724                if (tmp < 0) tmp = -tmp;
725                if (tmp >= 4) *pStrength = 1;
726            }
727
728        } /* end of: else if(dir == 0) */
729
730    } /* end of: if( !(MbP->mbMode == AVC_I4 ...) */
731}
732
733
734void GetStrength_VerticalEdges(uint8 *Strength, AVCMacroblock* MbQ)
735{
736    int     idx, tmp;
737    int16   *ptr, *pmvx, *pmvy;
738    uint8   *pnz;
739    uint8   *pStrength, *pStr;
740    void* refIdx;
741
742    if (MbQ->mbMode == AVC_I4 || MbQ->mbMode == AVC_I16)
743    {
744        *((int*)Strength)     = ININT_STRENGTH[1];      // Start with Strength=3. or Strength=4 for Mb-edge
745        *((int*)(Strength + 4)) = ININT_STRENGTH[2];
746        *((int*)(Strength + 8)) = ININT_STRENGTH[3];
747    }
748    else   // Not intra or SP-frame
749    {
750
751        *((int*)Strength)     = 0; // for non-intra MB, strength = 0, 1 or 2.
752        *((int*)(Strength + 4)) = 0;
753        *((int*)(Strength + 8)) = 0;
754
755        //1. Check the ref_frame_id
756        refIdx = (void*)MbQ->RefIdx;  //de-ref type-punned pointer fix
757        ptr = (int16*)refIdx;
758        pStrength = Strength;
759        if (ptr[0] != ptr[1]) pStrength[4] = 1;
760        if (ptr[2] != ptr[3]) pStrength[6] = 1;
761        pStrength[5] = pStrength[4];
762        pStrength[7] = pStrength[6];
763
764        //2. Check the nz_coeff block and mv difference
765        pmvx = (int16*)(MbQ->mvL0 + 1); // points to 4x4 block #1,not #0
766        pmvy = pmvx + 1;
767        for (idx = 0; idx < 4; idx += 2) // unroll the loop, make 4 iterations to 2
768        {
769            // first/third row : 1,2,3 or 9,10,12
770            // Strength = 2 for a whole row
771            pnz = MbQ->nz_coeff + (idx << 2);
772            if (*pnz++ != 0) *pStrength = 2;
773            if (*pnz++ != 0)
774            {
775                *pStrength = 2;
776                *(pStrength + 4) = 2;
777            }
778            if (*pnz++ != 0)
779            {
780                *(pStrength + 4) = 2;
781                *(pStrength + 8) = 2;
782            }
783            if (*pnz != 0) *(pStrength + 8) = 2;
784
785            // Then Strength = 1
786            if (*pStrength == 0)
787            {
788                //within the same 8x8 block, no need to check the reference id
789                //only need to check the |mv difference| >= 4
790                tmp = *pmvx - *(pmvx - 2);
791                if (tmp < 0) tmp = -tmp;
792                if (tmp >= 4) *pStrength = 1;
793
794                tmp = *pmvy - *(pmvy - 2);
795                if (tmp < 0) tmp = -tmp;
796                if (tmp >= 4) *pStrength = 1;
797            }
798
799            pmvx += 2;
800            pmvy += 2;
801            pStr = pStrength + 4;
802
803            if (*pStr == 0)
804            {
805                //check the |mv difference| >= 4
806                tmp = *pmvx - *(pmvx - 2);
807                if (tmp < 0) tmp = -tmp;
808                if (tmp >= 4) *pStr = 1;
809
810                tmp = *pmvy - *(pmvy - 2);
811                if (tmp < 0) tmp = -tmp;
812                if (tmp >= 4) *pStr = 1;
813            }
814
815            pmvx += 2;
816            pmvy += 2;
817            pStr = pStrength + 8;
818
819            if (*pStr == 0)
820            {
821                //within the same 8x8 block, no need to check the reference id
822                //only need to check the |mv difference| >= 4
823                tmp = *pmvx - *(pmvx - 2);
824                if (tmp < 0) tmp = -tmp;
825                if (tmp >= 4) *pStr = 1;
826
827                tmp = *pmvy - *(pmvy - 2);
828                if (tmp < 0) tmp = -tmp;
829                if (tmp >= 4) *pStr = 1;
830            }
831
832            // Second/fourth row: 5,6,7 or 14,15,16
833            // Strength = 2 for a whole row
834            pnz = MbQ->nz_coeff + ((idx + 1) << 2);
835            if (*pnz++ != 0) *(pStrength + 1) = 2;
836            if (*pnz++ != 0)
837            {
838                *(pStrength + 1) = 2;
839                *(pStrength + 5) = 2;
840            }
841            if (*pnz++ != 0)
842            {
843                *(pStrength + 5) = 2;
844                *(pStrength + 9) = 2;
845            }
846            if (*pnz != 0) *(pStrength + 9) = 2;
847
848            // Then Strength = 1
849            pmvx += 4;
850            pmvy += 4;
851            pStr = pStrength + 1;
852            if (*pStr == 0)
853            {
854                //within the same 8x8 block, no need to check the reference id
855                //only need to check the |mv difference| >= 4
856                tmp = *pmvx - *(pmvx - 2);
857                if (tmp < 0) tmp = -tmp;
858                if (tmp >= 4) *pStr = 1;
859
860                tmp = *pmvy - *(pmvy - 2);
861                if (tmp < 0) tmp = -tmp;
862                if (tmp >= 4) *pStr = 1;
863            }
864
865            pmvx += 2;
866            pmvy += 2;
867            pStr = pStrength + 5;
868
869            if (*pStr == 0)
870            {
871                //check the |mv difference| >= 4
872                tmp = *pmvx - *(pmvx - 2);
873                if (tmp < 0) tmp = -tmp;
874                if (tmp >= 4) *pStr = 1;
875
876                tmp = *pmvy - *(pmvy - 2);
877                if (tmp < 0) tmp = -tmp;
878                if (tmp >= 4) *pStr = 1;
879            }
880
881            pmvx += 2;
882            pmvy += 2;
883            pStr = pStrength + 9;
884
885            if (*pStr == 0)
886            {
887                //within the same 8x8 block, no need to check the reference id
888                //only need to check the |mv difference| >= 4
889                tmp = *pmvx - *(pmvx - 2);
890                if (tmp < 0) tmp = -tmp;
891                if (tmp >= 4) *pStr = 1;
892
893                tmp = *pmvy - *(pmvy - 2);
894                if (tmp < 0) tmp = -tmp;
895                if (tmp >= 4) *pStr = 1;
896            }
897
898            // update some variables for the next two rows
899            pmvx += 4;
900            pmvy += 4;
901            pStrength += 2;
902
903        } /* end of: for(idx=0; idx<2; idx++) */
904
905    } /* end of: else if( MbQ->mbMode == AVC_I4 ...) */
906}
907
908
909void GetStrength_HorizontalEdges(uint8 Strength[12], AVCMacroblock* MbQ)
910{
911    int     idx, tmp;
912    int16   *ptr, *pmvx, *pmvy;
913    uint8   *pStrength, *pStr;
914    void* refIdx;
915
916    if (MbQ->mbMode == AVC_I4 || MbQ->mbMode == AVC_I16)
917    {
918        *((int*)Strength)     = ININT_STRENGTH[1];      // Start with Strength=3. or Strength=4 for Mb-edge
919        *((int*)(Strength + 4)) = ININT_STRENGTH[2];
920        *((int*)(Strength + 8)) = ININT_STRENGTH[3];
921    }
922    else   // Not intra or SP-frame
923    {
924
925        *((int*)Strength)     = 0; // for non-intra MB, strength = 0, 1 or 2.
926        *((int*)(Strength + 4)) = 0; // for non-intra MB, strength = 0, 1 or 2.
927        *((int*)(Strength + 8)) = 0; // for non-intra MB, strength = 0, 1 or 2.
928
929
930        //1. Check the ref_frame_id
931        refIdx = (void*) MbQ->RefIdx; // de-ref type-punned fix
932        ptr = (int16*) refIdx;
933        pStrength = Strength;
934        if (ptr[0] != ptr[2]) pStrength[4] = 1;
935        if (ptr[1] != ptr[3]) pStrength[6] = 1;
936        pStrength[5] = pStrength[4];
937        pStrength[7] = pStrength[6];
938
939        //2. Check the nz_coeff block and mv difference
940        pmvx = (int16*)(MbQ->mvL0 + 4); // points to 4x4 block #4,not #0
941        pmvy = pmvx + 1;
942        for (idx = 0; idx < 4; idx += 2) // unroll the loop, make 4 iterations to 2
943        {
944            // first/third row : 1,2,3 or 9,10,12
945            // Strength = 2 for a whole row
946            if (MbQ->nz_coeff[idx] != 0) *pStrength = 2;
947            if (MbQ->nz_coeff[4+idx] != 0)
948            {
949                *pStrength = 2;
950                *(pStrength + 4) = 2;
951            }
952            if (MbQ->nz_coeff[8+idx] != 0)
953            {
954                *(pStrength + 4) = 2;
955                *(pStrength + 8) = 2;
956            }
957            if (MbQ->nz_coeff[12+idx] != 0) *(pStrength + 8) = 2;
958
959            // Then Strength = 1
960            if (*pStrength == 0)
961            {
962                //within the same 8x8 block, no need to check the reference id
963                //only need to check the |mv difference| >= 4
964                tmp = *pmvx - *(pmvx - 8);
965                if (tmp < 0) tmp = -tmp;
966                if (tmp >= 4) *pStrength = 1;
967
968                tmp = *pmvy - *(pmvy - 8);
969                if (tmp < 0) tmp = -tmp;
970                if (tmp >= 4) *pStrength = 1;
971            }
972
973            pmvx += 8;
974            pmvy += 8;
975            pStr = pStrength + 4;
976
977            if (*pStr == 0)
978            {
979                //check the |mv difference| >= 4
980                tmp = *pmvx - *(pmvx - 8);
981                if (tmp < 0) tmp = -tmp;
982                if (tmp >= 4) *pStr = 1;
983
984                tmp = *pmvy - *(pmvy - 8);
985                if (tmp < 0) tmp = -tmp;
986                if (tmp >= 4) *pStr = 1;
987            }
988
989            pmvx += 8;
990            pmvy += 8;
991            pStr = pStrength + 8;
992
993            if (*pStr == 0)
994            {
995                //within the same 8x8 block, no need to check the reference id
996                //only need to check the |mv difference| >= 4
997                tmp = *pmvx - *(pmvx - 8);
998                if (tmp < 0) tmp = -tmp;
999                if (tmp >= 4) *pStr = 1;
1000
1001                tmp = *pmvy - *(pmvy - 8);
1002                if (tmp < 0) tmp = -tmp;
1003                if (tmp >= 4) *pStr = 1;
1004            }
1005
1006            // Second/fourth row: 5,6,7 or 14,15,16
1007            // Strength = 2 for a whole row
1008            if (MbQ->nz_coeff[idx+1] != 0) *(pStrength + 1) = 2;
1009            if (MbQ->nz_coeff[4+idx+1] != 0)
1010            {
1011                *(pStrength + 1) = 2;
1012                *(pStrength + 5) = 2;
1013            }
1014            if (MbQ->nz_coeff[8+idx+1] != 0)
1015            {
1016                *(pStrength + 5) = 2;
1017                *(pStrength + 9) = 2;
1018            }
1019            if (MbQ->nz_coeff[12+idx+1] != 0) *(pStrength + 9) = 2;
1020
1021            // Then Strength = 1
1022            pmvx -= 14;
1023            pmvy -= 14; // -14 = -16 + 2
1024            pStr = pStrength + 1;
1025            if (*pStr == 0)
1026            {
1027                //within the same 8x8 block, no need to check the reference id
1028                //only need to check the |mv difference| >= 4
1029                tmp = *pmvx - *(pmvx - 8);
1030                if (tmp < 0) tmp = -tmp;
1031                if (tmp >= 4) *pStr = 1;
1032
1033                tmp = *pmvy - *(pmvy - 8);
1034                if (tmp < 0) tmp = -tmp;
1035                if (tmp >= 4) *pStr = 1;
1036            }
1037
1038            pmvx += 8;
1039            pmvy += 8;
1040            pStr = pStrength + 5;
1041
1042            if (*pStr == 0)
1043            {
1044                //check the |mv difference| >= 4
1045                tmp = *pmvx - *(pmvx - 8);
1046                if (tmp < 0) tmp = -tmp;
1047                if (tmp >= 4) *pStr = 1;
1048
1049                tmp = *pmvy - *(pmvy - 8);
1050                if (tmp < 0) tmp = -tmp;
1051                if (tmp >= 4) *pStr = 1;
1052            }
1053
1054            pmvx += 8;
1055            pmvy += 8;
1056            pStr = pStrength + 9;
1057
1058            if (*pStr == 0)
1059            {
1060                //within the same 8x8 block, no need to check the reference id
1061                //only need to check the |mv difference| >= 4
1062                tmp = *pmvx - *(pmvx - 8);
1063                if (tmp < 0) tmp = -tmp;
1064                if (tmp >= 4) *pStr = 1;
1065
1066                tmp = *pmvy - *(pmvy - 8);
1067                if (tmp < 0) tmp = -tmp;
1068                if (tmp >= 4) *pStr = 1;
1069            }
1070
1071            // update some variables for the next two rows
1072            pmvx -= 14;
1073            pmvy -= 14; // -14 = -16 + 2
1074            pStrength += 2;
1075
1076        } /* end of: for(idx=0; idx<2; idx++) */
1077
1078    } /* end of: else if( MbQ->mbMode == AVC_I4 ...) */
1079}
1080
1081/*
1082 *****************************************************************************************
1083 * \brief  Filters one edge of 16 (luma) or 8 (chroma) pel
1084 *****************************************************************************************
1085*/
1086
1087void EdgeLoop_Luma_horizontal(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch)
1088{
1089    int  pel, ap = 0, aq = 0, Strng;
1090    int  C0, c0, dif, AbsDelta, tmp, tmp1;
1091    int  L2 = 0, L1, L0, R0, R1, R2 = 0, RL0;
1092
1093
1094    if (Strength[0] == 4)  /* INTRA strong filtering */
1095    {
1096        for (pel = 0; pel < 16; pel++)
1097        {
1098            R0  = SrcPtr[0];
1099            R1  = SrcPtr[pitch];
1100            L0  = SrcPtr[-pitch];
1101            L1  = SrcPtr[-(pitch<<1)];
1102
1103            // |R0 - R1| < Beta
1104            tmp1 = R0 - R1;
1105            if (tmp1 < 0) tmp1 = -tmp1;
1106            tmp = (tmp1 - Beta);
1107
1108            //|L0 - L1| < Beta
1109            tmp1 = L0 - L1;
1110            if (tmp1 < 0) tmp1 = -tmp1;
1111            tmp &= (tmp1 - Beta);
1112
1113            //|R0 - L0| < Alpha
1114            AbsDelta = R0 - L0;
1115            if (AbsDelta < 0) AbsDelta = -AbsDelta;
1116            tmp &= (AbsDelta - Alpha);
1117
1118            if (tmp < 0)
1119            {
1120                AbsDelta -= ((Alpha >> 2) + 2);
1121                R2 = SrcPtr[pitch<<1]; //inc2
1122                L2 = SrcPtr[-(pitch+(pitch<<1))]; // -inc3
1123
1124                // |R0 - R2| < Beta && |R0 - L0| < (Alpha/4 + 2)
1125                tmp = R0 - R2;
1126                if (tmp < 0) tmp = -tmp;
1127                aq = AbsDelta & (tmp - Beta);
1128
1129                // |L0 - L2| < Beta && |R0 - L0| < (Alpha/4 + 2)
1130                tmp = L0 - L2;
1131                if (tmp < 0) tmp = -tmp;
1132                ap = AbsDelta & (tmp - Beta);
1133
1134                if (aq < 0)
1135                {
1136                    tmp = R1 + R0 + L0;
1137                    SrcPtr[0] = (L1 + (tmp << 1) +  R2 + 4) >> 3;
1138                    tmp += R2;
1139                    SrcPtr[pitch]  = (tmp + 2) >> 2;
1140                    SrcPtr[pitch<<1] = (((SrcPtr[(pitch+(pitch<<1))] + R2) << 1) + tmp + 4) >> 3;
1141                }
1142                else
1143                    SrcPtr[0] = ((R1 << 1) + R0 + L1 + 2) >> 2;
1144
1145                if (ap < 0)
1146                {
1147                    tmp = L1 + R0 + L0;
1148                    SrcPtr[-pitch]  = (R1 + (tmp << 1) +  L2 + 4) >> 3;
1149                    tmp += L2;
1150                    SrcPtr[-(pitch<<1)] = (tmp + 2) >> 2;
1151                    SrcPtr[-(pitch+(pitch<<1))] = (((SrcPtr[-(pitch<<2)] + L2) << 1) + tmp + 4) >> 3;
1152                }
1153                else
1154                    SrcPtr[-pitch] = ((L1 << 1) + L0 + R1 + 2) >> 2;
1155
1156            } /* if(tmp < 0) */
1157
1158            SrcPtr ++; // Increment to next set of pixel
1159
1160        } /* end of: for(pel=0; pel<16; pel++) */
1161
1162    } /* if(Strength[0] == 4) */
1163
1164    else   /* Normal filtering */
1165    {
1166        for (pel = 0; pel < 16; pel++)
1167        {
1168            Strng = Strength[pel >> 2];
1169            if (Strng)
1170            {
1171                R0  = SrcPtr[0];
1172                R1  = SrcPtr[pitch];
1173                L0  = SrcPtr[-pitch];
1174                L1  = SrcPtr[-(pitch<<1)]; // inc2
1175
1176                //|R0 - L0| < Alpha
1177                tmp1 = R0 - L0;
1178                if (tmp1 < 0) tmp1 = -tmp1;
1179                tmp = (tmp1 - Alpha);
1180
1181                // |R0 - R1| < Beta
1182                tmp1 = R0 - R1;
1183                if (tmp1 < 0) tmp1 = -tmp1;
1184                tmp &= (tmp1 - Beta);
1185
1186                //|L0 - L1| < Beta
1187                tmp1 = L0 - L1;
1188                if (tmp1 < 0) tmp1 = -tmp1;
1189                tmp &= (tmp1 - Beta);
1190
1191                if (tmp < 0)
1192                {
1193                    R2 = SrcPtr[pitch<<1]; //inc2
1194                    L2 = SrcPtr[-(pitch+(pitch<<1))]; // -inc3
1195
1196                    // |R0 - R2| < Beta
1197                    tmp = R0 - R2;
1198                    if (tmp < 0) tmp = -tmp;
1199                    aq = tmp - Beta;
1200
1201                    // |L0 - L2| < Beta
1202                    tmp = L0 - L2;
1203                    if (tmp < 0) tmp = -tmp;
1204                    ap = tmp - Beta;
1205
1206
1207                    c0 = C0 = clipTable[Strng];
1208                    if (ap < 0) c0++;
1209                    if (aq < 0) c0++;
1210
1211                    //dif = IClip(-c0, c0, ((Delta << 2) + (L1 - R1) + 4) >> 3);
1212                    dif = (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3;
1213                    tmp = dif + c0;
1214                    if ((uint)tmp > (uint)c0 << 1)
1215                    {
1216                        tmp = ~(tmp >> 31);
1217                        dif = (tmp & (c0 << 1)) - c0;
1218                    }
1219
1220                    //SrcPtr[0]    = (uint8)IClip(0, 255, R0 - dif);
1221                    //SrcPtr[-inc] = (uint8)IClip(0, 255, L0 + dif);
1222                    RL0 = R0 + L0;
1223                    R0 -= dif;
1224                    L0 += dif;
1225                    if ((uint)R0 > 255)
1226                    {
1227                        tmp = ~(R0 >> 31);
1228                        R0 = tmp & 255;
1229                    }
1230                    if ((uint)L0 > 255)
1231                    {
1232                        tmp = ~(L0 >> 31);
1233                        L0 = tmp & 255;
1234                    }
1235                    SrcPtr[-pitch] = L0;
1236                    SrcPtr[0] = R0;
1237
1238                    if (C0 != 0) /* Multiple zeros in the clip tables */
1239                    {
1240                        if (aq < 0)  // SrcPtr[inc]   += IClip(-C0, C0,(R2 + ((RL0 + 1) >> 1) - (R1<<1)) >> 1);
1241                        {
1242                            R2 = (R2 + ((RL0 + 1) >> 1) - (R1 << 1)) >> 1;
1243                            tmp = R2 + C0;
1244                            if ((uint)tmp > (uint)C0 << 1)
1245                            {
1246                                tmp = ~(tmp >> 31);
1247                                R2 = (tmp & (C0 << 1)) - C0;
1248                            }
1249                            SrcPtr[pitch] += R2;
1250                        }
1251
1252                        if (ap < 0)  //SrcPtr[-inc2] += IClip(-C0, C0,(L2 + ((RL0 + 1) >> 1) - (L1<<1)) >> 1);
1253                        {
1254                            L2 = (L2 + ((RL0 + 1) >> 1) - (L1 << 1)) >> 1;
1255                            tmp = L2 + C0;
1256                            if ((uint)tmp > (uint)C0 << 1)
1257                            {
1258                                tmp = ~(tmp >> 31);
1259                                L2 = (tmp & (C0 << 1)) - C0;
1260                            }
1261                            SrcPtr[-(pitch<<1)] += L2;
1262                        }
1263                    }
1264
1265                } /* if(tmp < 0) */
1266
1267            } /* end of:  if((Strng = Strength[pel >> 2])) */
1268
1269            SrcPtr ++; // Increment to next set of pixel
1270
1271        } /* for(pel=0; pel<16; pel++) */
1272
1273    } /* else if(Strength[0] == 4) */
1274}
1275
1276void EdgeLoop_Luma_vertical(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch)
1277{
1278    int  pel, ap = 1, aq = 1;
1279    int  C0, c0, dif, AbsDelta, Strng, tmp, tmp1;
1280    int  L2 = 0, L1, L0, R0, R1, R2 = 0;
1281    uint8 *ptr, *ptr1;
1282    register uint R_in, L_in;
1283    uint R_out, L_out;
1284
1285
1286    if (Strength[0] == 4)  /* INTRA strong filtering */
1287    {
1288
1289        for (pel = 0; pel < 16; pel++)
1290        {
1291
1292            // Read 8 pels
1293            R_in = *((uint *)SrcPtr);       // R_in = {R3, R2, R1, R0}
1294            L_in = *((uint *)(SrcPtr - 4)); // L_in = {L0, L1, L2, L3}
1295            R1   = (R_in >> 8) & 0xff;
1296            R0   = R_in & 0xff;
1297            L0   = L_in >> 24;
1298            L1   = (L_in >> 16) & 0xff;
1299
1300            // |R0 - R1| < Beta
1301            tmp1 = (R_in & 0xff) - R1;
1302            if (tmp1 < 0) tmp1 = -tmp1;
1303            tmp = (tmp1 - Beta);
1304
1305
1306            //|L0 - L1| < Beta
1307            tmp1 = (L_in >> 24) - L1;
1308            if (tmp1 < 0) tmp1 = -tmp1;
1309            tmp &= (tmp1 - Beta);
1310
1311            //|R0 - L0| < Alpha
1312            AbsDelta = (R_in & 0xff) - (L_in >> 24);
1313            if (AbsDelta < 0) AbsDelta = -AbsDelta;
1314            tmp &= (AbsDelta - Alpha);
1315
1316            if (tmp < 0)
1317            {
1318                AbsDelta -= ((Alpha >> 2) + 2);
1319                R2   = (R_in >> 16) & 0xff;
1320                L2   = (L_in >> 8) & 0xff;
1321
1322                // |R0 - R2| < Beta && |R0 - L0| < (Alpha/4 + 2)
1323                tmp1 = (R_in & 0xff) - R2;
1324                if (tmp1 < 0) tmp1 = -tmp1;
1325                aq = AbsDelta & (tmp1 - Beta);
1326
1327                // |L0 - L2| < Beta && |R0 - L0| < (Alpha/4 + 2)
1328                tmp1 = (L_in >> 24) - L2;
1329                if (tmp1 < 0) tmp1 = -tmp1;
1330                ap = AbsDelta & (tmp1 - Beta);
1331
1332
1333                ptr = SrcPtr;
1334                if (aq < 0)
1335                {
1336                    R_out = (R_in >> 24) << 24; // Keep R3 at the fourth byte
1337
1338                    tmp  = R0 + L0 + R1;
1339                    R_out |= (((tmp << 1) +  L1 + R2 + 4) >> 3);
1340                    tmp += R2;
1341                    R_out |= (((tmp + 2) >> 2) << 8);
1342                    tmp1 = ((R_in >> 24) + R2) << 1;
1343                    R_out |= (((tmp1 + tmp + 4) >> 3) << 16);
1344
1345                    *((uint *)SrcPtr) = R_out;
1346                }
1347                else
1348                    *ptr = ((R1 << 1) + R0 + L1 + 2) >> 2;
1349
1350
1351                if (ap < 0)
1352                {
1353                    L_out = (L_in << 24) >> 24; // Keep L3 at the first byte
1354
1355                    tmp  = R0 + L0 + L1;
1356                    L_out |= ((((tmp << 1) + R1 + L2 + 4) >> 3) << 24);
1357                    tmp += L2;
1358                    L_out |= (((tmp + 2) >> 2) << 16);
1359                    tmp1 = ((L_in & 0xff) + L2) << 1;
1360                    L_out |= (((tmp1 + tmp + 4) >> 3) << 8);
1361
1362                    *((uint *)(SrcPtr - 4)) = L_out;
1363                }
1364                else
1365                    *(--ptr) = ((L1 << 1) + L0 + R1 + 2) >> 2;
1366
1367            } /* if(tmp < 0) */
1368
1369            SrcPtr += pitch;    // Increment to next set of pixel
1370
1371        } /* end of: for(pel=0; pel<16; pel++) */
1372
1373    } /* if(Strength[0] == 4) */
1374
1375    else   /* Normal filtering */
1376    {
1377
1378        for (pel = 0; pel < 16; pel++)
1379        {
1380            Strng = Strength[pel >> 2];
1381            if (Strng)
1382            {
1383                // Read 8 pels
1384                R_in = *((uint *)SrcPtr);       // R_in = {R3, R2, R1, R0}
1385                L_in = *((uint *)(SrcPtr - 4)); // L_in = {L0, L1, L2, L3}
1386                R1   = (R_in >> 8) & 0xff;
1387                R0   = R_in & 0xff;
1388                L0   = L_in >> 24;
1389                L1   = (L_in >> 16) & 0xff;
1390
1391                //|R0 - L0| < Alpha
1392                tmp = R0 - L0;
1393                if (tmp < 0) tmp = -tmp;
1394                tmp -= Alpha;
1395
1396                // |R0 - R1| < Beta
1397                tmp1 = R0 - R1;
1398                if (tmp1 < 0) tmp1 = -tmp1;
1399                tmp &= (tmp1 - Beta);
1400
1401                //|L0 - L1| < Beta
1402                tmp1 = L0 - L1;
1403                if (tmp1 < 0) tmp1 = -tmp1;
1404                tmp &= (tmp1 - Beta);
1405
1406                if (tmp < 0)
1407                {
1408                    L2 = SrcPtr[-3];
1409                    R2 = SrcPtr[2];
1410
1411                    // |R0 - R2| < Beta
1412                    tmp = R0 - R2;
1413                    if (tmp < 0) tmp = -tmp;
1414                    aq = tmp - Beta;
1415
1416                    // |L0 - L2| < Beta
1417                    tmp = L0 - L2;
1418                    if (tmp < 0) tmp = -tmp;
1419                    ap = tmp - Beta;
1420
1421
1422                    c0 = C0 = clipTable[Strng];
1423                    if (ap < 0) c0++;
1424                    if (aq < 0) c0++;
1425
1426                    //dif = IClip(-c0, c0, ((Delta << 2) + (L1 - R1) + 4) >> 3);
1427                    dif = (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3;
1428                    tmp = dif + c0;
1429                    if ((uint)tmp > (uint)c0 << 1)
1430                    {
1431                        tmp = ~(tmp >> 31);
1432                        dif = (tmp & (c0 << 1)) - c0;
1433                    }
1434
1435                    ptr = SrcPtr;
1436                    ptr1 = SrcPtr - 1;
1437                    //SrcPtr[0]    = (uint8)IClip(0, 255, R0 - dif);
1438                    //SrcPtr[-inc] = (uint8)IClip(0, 255, L0 + dif);
1439                    R_in = R0 - dif;
1440                    L_in = L0 + dif; /* cannot re-use R0 and L0 here */
1441                    if ((uint)R_in > 255)
1442                    {
1443                        tmp = ~((int)R_in >> 31);
1444                        R_in = tmp & 255;
1445                    }
1446                    if ((uint)L_in > 255)
1447                    {
1448                        tmp = ~((int)L_in >> 31);
1449                        L_in = tmp & 255;
1450                    }
1451                    *ptr1-- = L_in;
1452                    *ptr++  = R_in;
1453
1454                    if (C0 != 0) // Multiple zeros in the clip tables
1455                    {
1456                        if (ap < 0)  //SrcPtr[-inc2] += IClip(-C0, C0,(L2 + ((RL0 + 1) >> 1) - (L1<<1)) >> 1);
1457                        {
1458                            L2 = (L2 + ((R0 + L0 + 1) >> 1) - (L1 << 1)) >> 1;
1459                            tmp = L2 + C0;
1460                            if ((uint)tmp > (uint)C0 << 1)
1461                            {
1462                                tmp = ~(tmp >> 31);
1463                                L2 = (tmp & (C0 << 1)) - C0;
1464                            }
1465                            *ptr1 += L2;
1466                        }
1467
1468                        if (aq < 0)  // SrcPtr[inc] += IClip(-C0, C0,(R2 + ((RL0 + 1) >> 1) - (R1<<1)) >> 1);
1469                        {
1470                            R2 = (R2 + ((R0 + L0 + 1) >> 1) - (R1 << 1)) >> 1;
1471                            tmp = R2 + C0;
1472                            if ((uint)tmp > (uint)C0 << 1)
1473                            {
1474                                tmp = ~(tmp >> 31);
1475                                R2 = (tmp & (C0 << 1)) - C0;
1476                            }
1477                            *ptr += R2;
1478                        }
1479                    }
1480
1481                } /* if(tmp < 0) */
1482
1483            } /* end of:  if((Strng = Strength[pel >> 2])) */
1484
1485            SrcPtr += pitch;    // Increment to next set of pixel
1486
1487        } /* for(pel=0; pel<16; pel++) */
1488
1489    } /* else if(Strength[0] == 4) */
1490
1491}
1492
1493void EdgeLoop_Chroma_vertical(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch)
1494{
1495    int     pel, Strng;
1496    int     c0, dif;
1497    int     L1, L0, R0, R1, tmp, tmp1;
1498    uint8   *ptr;
1499    uint    R_in, L_in;
1500
1501
1502    for (pel = 0; pel < 16; pel++)
1503    {
1504        Strng = Strength[pel>>2];
1505        if (Strng)
1506        {
1507            // Read 8 pels
1508            R_in = *((uint *)SrcPtr);       // R_in = {R3, R2, R1, R0}
1509            L_in = *((uint *)(SrcPtr - 4)); // L_in = {L0, L1, L2, L3}
1510            R1   = (R_in >> 8) & 0xff;
1511            R0   = R_in & 0xff;
1512            L0   = L_in >> 24;
1513            L1   = (L_in >> 16) & 0xff;
1514
1515            // |R0 - R1| < Beta
1516            tmp1 = R0 - R1;
1517            if (tmp1 < 0) tmp1 = -tmp1;
1518            tmp = (tmp1 - Beta);
1519
1520            //|L0 - L1| < Beta
1521            tmp1 = L0 - L1;
1522            if (tmp1 < 0) tmp1 = -tmp1;
1523            tmp &= (tmp1 - Beta);
1524
1525            //|R0 - L0| < Alpha
1526            tmp1 = R0 - L0;
1527            if (tmp1 < 0) tmp1 = -tmp1;
1528            tmp &= (tmp1 - Alpha);
1529
1530            if (tmp < 0)
1531            {
1532                ptr = SrcPtr;
1533                if (Strng == 4) /* INTRA strong filtering */
1534                {
1535                    *ptr-- = ((R1 << 1) + R0 + L1 + 2) >> 2;
1536                    *ptr   = ((L1 << 1) + L0 + R1 + 2) >> 2;
1537                }
1538                else  /* normal filtering */
1539                {
1540                    c0  = clipTable[Strng] + 1;
1541                    //dif = IClip(-c0, c0, ((Delta << 2) + (L1 - R1) + 4) >> 3);
1542                    dif = (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3;
1543                    tmp = dif + c0;
1544                    if ((uint)tmp > (uint)c0 << 1)
1545                    {
1546                        tmp = ~(tmp >> 31);
1547                        dif = (tmp & (c0 << 1)) - c0;
1548                    }
1549
1550                    //SrcPtr[0]    = (uint8)IClip(0, 255, R0 - dif);
1551                    //SrcPtr[-inc] = (uint8)IClip(0, 255, L0 + dif);
1552                    L0 += dif;
1553                    R0 -= dif;
1554                    if ((uint)L0 > 255)
1555                    {
1556                        tmp = ~(L0 >> 31);
1557                        L0 = tmp & 255;
1558                    }
1559                    if ((uint)R0 > 255)
1560                    {
1561                        tmp = ~(R0 >> 31);
1562                        R0 = tmp & 255;
1563                    }
1564
1565                    *ptr-- = R0;
1566                    *ptr = L0;
1567                }
1568            }
1569            pel ++;
1570            SrcPtr += pitch;   // Increment to next set of pixel
1571
1572        } /* end of: if((Strng = Strength[pel >> 2])) */
1573        else
1574        {
1575            pel += 3;
1576            SrcPtr += (pitch << 1); //PtrInc << 1;
1577        }
1578
1579    } /* end of: for(pel=0; pel<16; pel++) */
1580}
1581
1582
1583void EdgeLoop_Chroma_horizontal(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch)
1584{
1585    int  pel, Strng;
1586    int  c0, dif;
1587    int  L1, L0, R0, R1, tmp, tmp1;
1588
1589    for (pel = 0; pel < 16; pel++)
1590    {
1591        Strng = Strength[pel>>2];
1592        if (Strng)
1593        {
1594            R0  = SrcPtr[0];
1595            L0  = SrcPtr[-pitch];
1596            L1  = SrcPtr[-(pitch<<1)]; //inc2
1597            R1  = SrcPtr[pitch];
1598
1599            // |R0 - R1| < Beta
1600            tmp1 = R0 - R1;
1601            if (tmp1 < 0) tmp1 = -tmp1;
1602            tmp = (tmp1 - Beta);
1603
1604            //|L0 - L1| < Beta
1605            tmp1 = L0 - L1;
1606            if (tmp1 < 0) tmp1 = -tmp1;
1607            tmp &= (tmp1 - Beta);
1608
1609            //|R0 - L0| < Alpha
1610            tmp1 = R0 - L0;
1611            if (tmp1 < 0) tmp1 = -tmp1;
1612            tmp &= (tmp1 - Alpha);
1613
1614            if (tmp < 0)
1615            {
1616                if (Strng == 4) /* INTRA strong filtering */
1617                {
1618                    SrcPtr[0]      = ((R1 << 1) + R0 + L1 + 2) >> 2;
1619                    SrcPtr[-pitch] = ((L1 << 1) + L0 + R1 + 2) >> 2;
1620                }
1621                else  /* normal filtering */
1622                {
1623                    c0  = clipTable[Strng] + 1;
1624                    //dif = IClip(-c0, c0, ((Delta << 2) + (L1 - R1) + 4) >> 3);
1625                    dif = (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3;
1626                    tmp = dif + c0;
1627                    if ((uint)tmp > (uint)c0 << 1)
1628                    {
1629                        tmp = ~(tmp >> 31);
1630                        dif = (tmp & (c0 << 1)) - c0;
1631                    }
1632
1633                    //SrcPtr[-inc] = (uint8)IClip(0, 255, L0 + dif);
1634                    //SrcPtr[0]    = (uint8)IClip(0, 255, R0 - dif);
1635                    L0 += dif;
1636                    R0 -= dif;
1637                    if ((uint)L0 > 255)
1638                    {
1639                        tmp = ~(L0 >> 31);
1640                        L0 = tmp & 255;
1641                    }
1642                    if ((uint)R0 > 255)
1643                    {
1644                        tmp = ~(R0 >> 31);
1645                        R0 = tmp & 255;
1646                    }
1647                    SrcPtr[0] = R0;
1648                    SrcPtr[-pitch] = L0;
1649                }
1650            }
1651
1652            pel ++;
1653            SrcPtr ++; // Increment to next set of pixel
1654
1655        } /* end of: if((Strng = Strength[pel >> 2])) */
1656        else
1657        {
1658            pel += 3;
1659            SrcPtr += 2;
1660        }
1661
1662    } /* end of: for(pel=0; pel<16; pel++) */
1663}
1664
1665
1666
1667
1668