1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18
19#include <string.h>
20
21#include "avclib_common.h"
22
23#define MAX_QP 51
24#define MB_BLOCK_SIZE 16
25
26// NOTE: these 3 tables are for funtion GetStrength() only
27const static int ININT_STRENGTH[4] = {0x04040404, 0x03030303, 0x03030303, 0x03030303};
28
29
30// NOTE: these 3 tables are for funtion EdgeLoop() only
31// NOTE: to change the tables below for instance when the QP doubling is changed from 6 to 8 values
32
33const static int ALPHA_TABLE[52]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,  7, 8, 9, 10, 12, 13, 15, 17,  20, 22, 25, 28, 32, 36, 40, 45,  50, 56, 63, 71, 80, 90, 101, 113,  127, 144, 162, 182, 203, 226, 255, 255} ;
34const static int BETA_TABLE[52]   = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,  3, 3, 3, 4, 4, 4, 6, 6,   7, 7, 8, 8, 9, 9, 10, 10,  11, 11, 12, 12, 13, 13, 14, 14,   15, 15, 16, 16, 17, 17, 18, 18} ;
35const static int CLIP_TAB[52][5]  =
36{
37    { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0},
38    { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0},
39    { 0, 0, 0, 0, 0}, { 0, 0, 0, 1, 1}, { 0, 0, 0, 1, 1}, { 0, 0, 0, 1, 1}, { 0, 0, 0, 1, 1}, { 0, 0, 1, 1, 1}, { 0, 0, 1, 1, 1}, { 0, 1, 1, 1, 1},
40    { 0, 1, 1, 1, 1}, { 0, 1, 1, 1, 1}, { 0, 1, 1, 1, 1}, { 0, 1, 1, 2, 2}, { 0, 1, 1, 2, 2}, { 0, 1, 1, 2, 2}, { 0, 1, 1, 2, 2}, { 0, 1, 2, 3, 3},
41    { 0, 1, 2, 3, 3}, { 0, 2, 2, 3, 3}, { 0, 2, 2, 4, 4}, { 0, 2, 3, 4, 4}, { 0, 2, 3, 4, 4}, { 0, 3, 3, 5, 5}, { 0, 3, 4, 6, 6}, { 0, 3, 4, 6, 6},
42    { 0, 4, 5, 7, 7}, { 0, 4, 5, 8, 8}, { 0, 4, 6, 9, 9}, { 0, 5, 7, 10, 10}, { 0, 6, 8, 11, 11}, { 0, 6, 8, 13, 13}, { 0, 7, 10, 14, 14}, { 0, 8, 11, 16, 16},
43    { 0, 9, 12, 18, 18}, { 0, 10, 13, 20, 20}, { 0, 11, 15, 23, 23}, { 0, 13, 17, 25, 25}
44};
45
46// NOTE: this table is only QP clipping, index = QP + video->FilterOffsetA/B, clipped to [0, 51]
47//       video->FilterOffsetA/B is in {-12, 12]
48const static int QP_CLIP_TAB[76] =
49{
50    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,              // [-12, 0]
51    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
52    13, 14, 15, 16, 17, 18, 19, 20, 21,
53    22, 23, 24, 25, 26, 27, 28, 29, 30,
54    31, 32, 33, 34, 35, 36, 37, 38, 39,
55    40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // [1, 51]
56    51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51      // [52,63]
57};
58
59static void DeblockMb(AVCCommonObj *video, int mb_x, int mb_y, uint8 *SrcY, uint8 *SrcU, uint8 *SrcV);
60//static void GetStrength(AVCCommonObj *video, uint8 *Strength, AVCMacroblock* MbP, AVCMacroblock* MbQ, int dir, int edge);
61static void GetStrength_Edge0(uint8 *Strength, AVCMacroblock* MbP, AVCMacroblock* MbQ, int dir);
62static void GetStrength_VerticalEdges(uint8 *Strength, AVCMacroblock* MbQ);
63static void GetStrength_HorizontalEdges(uint8 Strength[12], AVCMacroblock* MbQ);
64static void EdgeLoop_Luma_vertical(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch);
65static void EdgeLoop_Luma_horizontal(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch);
66static void EdgeLoop_Chroma_vertical(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch);
67static void EdgeLoop_Chroma_horizontal(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch);
68
69/*
70 *****************************************************************************************
71 * \brief Filter all macroblocks in order of increasing macroblock address.
72 *****************************************************************************************
73*/
74
75OSCL_EXPORT_REF AVCStatus DeblockPicture(AVCCommonObj *video)
76{
77    uint   i, j;
78    int   pitch = video->currPic->pitch, pitch_c, width;
79    uint8 *SrcY, *SrcU, *SrcV;
80
81    SrcY = video->currPic->Sl;      // pointers to source
82    SrcU = video->currPic->Scb;
83    SrcV = video->currPic->Scr;
84    pitch_c = pitch >> 1;
85    width = video->currPic->width;
86
87    for (i = 0; i < video->PicHeightInMbs; i++)
88    {
89        for (j = 0; j < video->PicWidthInMbs; j++)
90        {
91            DeblockMb(video, j, i, SrcY, SrcU, SrcV);
92            // update SrcY, SrcU, SrcV
93            SrcY += MB_BLOCK_SIZE;
94            SrcU += (MB_BLOCK_SIZE >> 1);
95            SrcV += (MB_BLOCK_SIZE >> 1);
96        }
97
98        SrcY += ((pitch << 4) - width);
99        SrcU += ((pitch_c << 3) - (width >> 1));
100        SrcV += ((pitch_c << 3) - (width >> 1));
101    }
102
103    return AVC_SUCCESS;
104}
105
106#ifdef MB_BASED_DEBLOCK
107/*
108 *****************************************************************************************
109 * \brief Filter one macroblocks in a fast macroblock memory and copy it to frame
110 *****************************************************************************************
111*/
112void MBInLoopDeblock(AVCCommonObj *video)
113{
114    AVCPictureData *currPic = video->currPic;
115#ifdef USE_PRED_BLOCK
116    uint8 *predCb, *predCr, *pred_block;
117    int i, j, dst_width, dst_height, dst_widthc, dst_heightc;
118#endif
119    int pitch = currPic->pitch;
120    int x_pos = video->mb_x;
121    int y_pos = video->mb_y;
122    uint8 *curL, *curCb, *curCr;
123    int offset;
124
125    offset = (y_pos << 4) * pitch;
126
127    curL = currPic->Sl + offset + (x_pos << 4);
128
129    offset >>= 2;
130    offset += (x_pos << 3);
131
132    curCb = currPic->Scb + offset;
133    curCr = currPic->Scr + offset;
134
135#ifdef USE_PRED_BLOCK
136    pred_block = video->pred;
137
138    /* 1. copy neighboring pixels from frame to the video->pred_block */
139    if (y_pos) /* not the 0th row */
140    {
141        /* copy to the top 4 lines of the macroblock */
142        curL -= (pitch << 2); /* go back 4 lines */
143
144        memcpy(pred_block + 4, curL, 16);
145        curL += pitch;
146        memcpy(pred_block + 24, curL, 16);
147        curL += pitch;
148        memcpy(pred_block + 44, curL, 16);
149        curL += pitch;
150        memcpy(pred_block + 64, curL, 16);
151        curL += pitch;
152
153        curCb -= (pitch << 1); /* go back 4 lines chroma */
154        curCr -= (pitch << 1);
155
156        pred_block += 400;
157
158        memcpy(pred_block + 4, curCb, 8);
159        curCb += (pitch >> 1);
160        memcpy(pred_block + 16, curCb, 8);
161        curCb += (pitch >> 1);
162        memcpy(pred_block + 28, curCb, 8);
163        curCb += (pitch >> 1);
164        memcpy(pred_block + 40, curCb, 8);
165        curCb += (pitch >> 1);
166
167        pred_block += 144;
168        memcpy(pred_block + 4, curCr, 8);
169        curCr += (pitch >> 1);
170        memcpy(pred_block + 16, curCr, 8);
171        curCr += (pitch >> 1);
172        memcpy(pred_block + 28, curCr, 8);
173        curCr += (pitch >> 1);
174        memcpy(pred_block + 40, curCr, 8);
175        curCr += (pitch >> 1);
176
177        pred_block = video->pred;
178    }
179
180    /* 2. perform deblocking. */
181    DeblockMb(video, x_pos, y_pos, pred_block + 84, pred_block + 452, pred_block + 596);
182
183    /* 3. copy it back to the frame and update pred_block */
184    predCb = pred_block + 400;
185    predCr = predCb + 144;
186
187    /* find the range of the block inside pred_block to be copied back */
188    if (y_pos)  /* the first row */
189    {
190        curL -= (pitch << 2);
191        curCb -= (pitch << 1);
192        curCr -= (pitch << 1);
193
194        dst_height = 20;
195        dst_heightc = 12;
196    }
197    else
198    {
199        pred_block += 80;
200        predCb += 48;
201        predCr += 48;
202        dst_height = 16;
203        dst_heightc = 8;
204    }
205
206    if (x_pos) /* find the width */
207    {
208        curL -= 4;
209        curCb -= 4;
210        curCr -= 4;
211        if (x_pos == (int)(video->PicWidthInMbs - 1))
212        {
213            dst_width = 20;
214            dst_widthc = 12;
215        }
216        else
217        {
218            dst_width = 16;
219            dst_widthc = 8;
220        }
221    }
222    else
223    {
224        pred_block += 4;
225        predCb += 4;
226        predCr += 4;
227        dst_width = 12;
228        dst_widthc = 4;
229    }
230
231    /* perform copy */
232    for (j = 0; j < dst_height; j++)
233    {
234        memcpy(curL, pred_block, dst_width);
235        curL += pitch;
236        pred_block += 20;
237    }
238    for (j = 0; j < dst_heightc; j++)
239    {
240        memcpy(curCb, predCb, dst_widthc);
241        memcpy(curCr, predCr, dst_widthc);
242        curCb += (pitch >> 1);
243        curCr += (pitch >> 1);
244        predCb += 12;
245        predCr += 12;
246    }
247
248    if (x_pos != (int)(video->PicWidthInMbs - 1)) /* now copy from the right-most 4 columns to the left-most 4 columns */
249    {
250        pred_block = video->pred;
251        for (i = 0; i < 20; i += 4)
252        {
253            *((uint32*)pred_block) = *((uint32*)(pred_block + 16));
254            pred_block += 20;
255            *((uint32*)pred_block) = *((uint32*)(pred_block + 16));
256            pred_block += 20;
257            *((uint32*)pred_block) = *((uint32*)(pred_block + 16));
258            pred_block += 20;
259            *((uint32*)pred_block) = *((uint32*)(pred_block + 16));
260            pred_block += 20;
261        }
262
263        for (i = 0; i < 24; i += 4)
264        {
265            *((uint32*)pred_block) = *((uint32*)(pred_block + 8));
266            pred_block += 12;
267            *((uint32*)pred_block) = *((uint32*)(pred_block + 8));
268            pred_block += 12;
269            *((uint32*)pred_block) = *((uint32*)(pred_block + 8));
270            pred_block += 12;
271            *((uint32*)pred_block) = *((uint32*)(pred_block + 8));
272            pred_block += 12;
273        }
274
275    }
276#else
277    DeblockMb(video, x_pos, y_pos, curL, curCb, curCr);
278#endif
279
280    return ;
281}
282#endif
283
284/*
285 *****************************************************************************************
286 * \brief Deblocking filter for one macroblock.
287 *****************************************************************************************
288 */
289
290void DeblockMb(AVCCommonObj *video, int mb_x, int mb_y, uint8 *SrcY, uint8 *SrcU, uint8 *SrcV)
291{
292    AVCMacroblock *MbP, *MbQ;
293    int     edge, QP, QPC;
294    int     filterLeftMbEdgeFlag = (mb_x != 0);
295    int     filterTopMbEdgeFlag  = (mb_y != 0);
296    int     pitch = video->currPic->pitch;
297    int     indexA, indexB, tmp;
298    int     Alpha, Beta, Alpha_c, Beta_c;
299    int     mbNum = mb_y * video->PicWidthInMbs + mb_x;
300    int     *clipTable, *clipTable_c, *qp_clip_tab;
301    uint8   Strength[16];
302    void*     str;
303
304    MbQ = &(video->mblock[mbNum]);      // current Mb
305
306
307    // If filter is disabled, return
308    if (video->sliceHdr->disable_deblocking_filter_idc == 1) return;
309
310    if (video->sliceHdr->disable_deblocking_filter_idc == 2)
311    {
312        // don't filter at slice boundaries
313        filterLeftMbEdgeFlag = mb_is_available(video->mblock, video->PicSizeInMbs, mbNum - 1, mbNum);
314        filterTopMbEdgeFlag  = mb_is_available(video->mblock, video->PicSizeInMbs, mbNum - video->PicWidthInMbs, mbNum);
315    }
316
317    /* NOTE: edge=0 and edge=1~3 are separate cases because of the difference of MbP, index A and indexB calculation */
318    /*       for edge = 1~3, MbP, indexA and indexB remain the same, and thus there is no need to re-calculate them for each edge */
319
320    qp_clip_tab = (int *)QP_CLIP_TAB + 12;
321
322    /* 1.VERTICAL EDGE + MB BOUNDARY (edge = 0) */
323    if (filterLeftMbEdgeFlag)
324    {
325        MbP = MbQ - 1;
326        //GetStrength(video, Strength, MbP, MbQ, 0, 0); // Strength for 4 blks in 1 stripe, 0 => vertical edge
327        GetStrength_Edge0(Strength, MbP, MbQ, 0);
328
329        str = (void*)Strength; //de-ref type-punned pointer fix
330        if (*((uint32*)str))    // only if one of the 4 Strength bytes is != 0
331        {
332            QP = (MbP->QPy + MbQ->QPy + 1) >> 1; // Average QP of the two blocks;
333            indexA = QP + video->FilterOffsetA;
334            indexB = QP + video->FilterOffsetB;
335            indexA = qp_clip_tab[indexA]; // IClip(0, MAX_QP, QP+video->FilterOffsetA)
336            indexB = qp_clip_tab[indexB]; // IClip(0, MAX_QP, QP+video->FilterOffsetB)
337
338            Alpha  = ALPHA_TABLE[indexA];
339            Beta = BETA_TABLE[indexB];
340            clipTable = (int *) CLIP_TAB[indexA];
341
342            if (Alpha > 0 && Beta > 0)
343#ifdef USE_PRED_BLOCK
344                EdgeLoop_Luma_vertical(SrcY, Strength,  Alpha, Beta, clipTable, 20);
345#else
346                EdgeLoop_Luma_vertical(SrcY, Strength,  Alpha, Beta, clipTable, pitch);
347#endif
348
349            QPC = (MbP->QPc + MbQ->QPc + 1) >> 1;
350            indexA = QPC + video->FilterOffsetA;
351            indexB = QPC + video->FilterOffsetB;
352            indexA = qp_clip_tab[indexA]; // IClip(0, MAX_QP, QP+video->FilterOffsetA)
353            indexB = qp_clip_tab[indexB]; // IClip(0, MAX_QP, QP+video->FilterOffsetB)
354
355            Alpha  = ALPHA_TABLE[indexA];
356            Beta = BETA_TABLE[indexB];
357            clipTable = (int *) CLIP_TAB[indexA];
358            if (Alpha > 0 && Beta > 0)
359            {
360#ifdef USE_PRED_BLOCK
361                EdgeLoop_Chroma_vertical(SrcU, Strength, Alpha, Beta, clipTable, 12);
362                EdgeLoop_Chroma_vertical(SrcV, Strength, Alpha, Beta, clipTable, 12);
363#else
364                EdgeLoop_Chroma_vertical(SrcU, Strength, Alpha, Beta, clipTable, pitch >> 1);
365                EdgeLoop_Chroma_vertical(SrcV, Strength, Alpha, Beta, clipTable, pitch >> 1);
366#endif
367            }
368        }
369
370    } /* end of: if(filterLeftMbEdgeFlag) */
371
372    /* 2.VERTICAL EDGE (no boundary), the edges are all inside a MB */
373    /* First calculate the necesary parameters all at once, outside the loop */
374    MbP = MbQ;
375
376    indexA = MbQ->QPy + video->FilterOffsetA;
377    indexB = MbQ->QPy + video->FilterOffsetB;
378    //  index
379    indexA = qp_clip_tab[indexA]; // IClip(0, MAX_QP, QP+video->FilterOffsetA)
380    indexB = qp_clip_tab[indexB]; // IClip(0, MAX_QP, QP+video->FilterOffsetB)
381
382    Alpha = ALPHA_TABLE[indexA];
383    Beta = BETA_TABLE[indexB];
384    clipTable = (int *)CLIP_TAB[indexA];
385
386    /* Save Alpha,  Beta and clipTable for future use, with the obselete variables filterLeftMbEdgeFlag, mbNum amd tmp */
387    filterLeftMbEdgeFlag = Alpha;
388    mbNum = Beta;
389    tmp = (int)clipTable;
390
391    indexA = MbQ->QPc + video->FilterOffsetA;
392    indexB = MbQ->QPc + video->FilterOffsetB;
393    indexA = qp_clip_tab[indexA]; // IClip(0, MAX_QP, QP+video->FilterOffsetA)
394    indexB = qp_clip_tab[indexB]; // IClip(0, MAX_QP, QP+video->FilterOffsetB)
395
396    Alpha_c  = ALPHA_TABLE[indexA];
397    Beta_c = BETA_TABLE[indexB];
398    clipTable_c = (int *)CLIP_TAB[indexA];
399
400    GetStrength_VerticalEdges(Strength + 4, MbQ); // Strength for 4 blks in 1 stripe, 0 => vertical edge
401
402    for (edge = 1; edge < 4; edge++)  // 4 vertical strips of 16 pel
403    {
404        //GetStrength_VerticalEdges(video, Strength, MbP, MbQ, 0, edge); // Strength for 4 blks in 1 stripe, 0 => vertical edge
405        if (*((int*)(Strength + (edge << 2))))   // only if one of the 4 Strength bytes is != 0
406        {
407            if (Alpha > 0 && Beta > 0)
408#ifdef USE_PRED_BLOCK
409                EdgeLoop_Luma_vertical(SrcY + (edge << 2), Strength + (edge << 2),  Alpha, Beta, clipTable, 20);
410#else
411                EdgeLoop_Luma_vertical(SrcY + (edge << 2), Strength + (edge << 2),  Alpha, Beta, clipTable, pitch);
412#endif
413
414            if (!(edge & 1) && Alpha_c > 0 && Beta_c > 0)
415            {
416#ifdef USE_PRED_BLOCK
417                EdgeLoop_Chroma_vertical(SrcU + (edge << 1), Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, 12);
418                EdgeLoop_Chroma_vertical(SrcV + (edge << 1), Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, 12);
419#else
420                EdgeLoop_Chroma_vertical(SrcU + (edge << 1), Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, pitch >> 1);
421                EdgeLoop_Chroma_vertical(SrcV + (edge << 1), Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, pitch >> 1);
422#endif
423            }
424        }
425
426    } //end edge
427
428
429
430    /* 3.HORIZONTAL EDGE + MB BOUNDARY (edge = 0) */
431    if (filterTopMbEdgeFlag)
432    {
433        MbP = MbQ - video->PicWidthInMbs;
434        //GetStrength(video, Strength, MbP, MbQ, 1, 0); // Strength for 4 blks in 1 stripe, 0 => vertical edge
435        GetStrength_Edge0(Strength, MbP, MbQ, 1);
436        str = (void*)Strength; //de-ref type-punned pointer fix
437        if (*((uint32*)str))    // only if one of the 4 Strength bytes is != 0
438        {
439            QP = (MbP->QPy + MbQ->QPy + 1) >> 1; // Average QP of the two blocks;
440            indexA = QP + video->FilterOffsetA;
441            indexB = QP + video->FilterOffsetB;
442            indexA = qp_clip_tab[indexA]; // IClip(0, MAX_QP, QP+video->FilterOffsetA)
443            indexB = qp_clip_tab[indexB]; // IClip(0, MAX_QP, QP+video->FilterOffsetB)
444
445            Alpha  = ALPHA_TABLE[indexA];
446            Beta = BETA_TABLE[indexB];
447            clipTable = (int *)CLIP_TAB[indexA];
448
449            if (Alpha > 0 && Beta > 0)
450            {
451#ifdef USE_PRED_BLOCK
452                EdgeLoop_Luma_horizontal(SrcY, Strength,  Alpha, Beta, clipTable, 20);
453#else
454                EdgeLoop_Luma_horizontal(SrcY, Strength,  Alpha, Beta, clipTable, pitch);
455#endif
456            }
457
458            QPC = (MbP->QPc + MbQ->QPc + 1) >> 1;
459            indexA = QPC + video->FilterOffsetA;
460            indexB = QPC + video->FilterOffsetB;
461            indexA = qp_clip_tab[indexA]; // IClip(0, MAX_QP, QP+video->FilterOffsetA)
462            indexB = qp_clip_tab[indexB]; // IClip(0, MAX_QP, QP+video->FilterOffsetB)
463
464            Alpha  = ALPHA_TABLE[indexA];
465            Beta = BETA_TABLE[indexB];
466            clipTable = (int *)CLIP_TAB[indexA];
467            if (Alpha > 0 && Beta > 0)
468            {
469#ifdef USE_PRED_BLOCK
470                EdgeLoop_Chroma_horizontal(SrcU, Strength, Alpha, Beta, clipTable, 12);
471                EdgeLoop_Chroma_horizontal(SrcV, Strength, Alpha, Beta, clipTable, 12);
472#else
473                EdgeLoop_Chroma_horizontal(SrcU, Strength, Alpha, Beta, clipTable, pitch >> 1);
474                EdgeLoop_Chroma_horizontal(SrcV, Strength, Alpha, Beta, clipTable, pitch >> 1);
475#endif
476            }
477        }
478
479    } /* end of: if(filterTopMbEdgeFlag) */
480
481
482    /* 4.HORIZONTAL EDGE (no boundary), the edges are inside a MB */
483    MbP = MbQ;
484
485    /* Recover Alpha,  Beta and clipTable for edge!=0 with the variables filterLeftMbEdgeFlag, mbNum and tmp */
486    /* Note that Alpha_c, Beta_c and clipTable_c for chroma is already calculated */
487    Alpha = filterLeftMbEdgeFlag;
488    Beta = mbNum;
489    clipTable = (int *)tmp;
490
491    GetStrength_HorizontalEdges(Strength + 4, MbQ); // Strength for 4 blks in 1 stripe, 0 => vertical edge
492
493    for (edge = 1; edge < 4; edge++)  // 4 horicontal strips of 16 pel
494    {
495        //GetStrength(video, Strength, MbP, MbQ, 1, edge); // Strength for 4 blks in 1 stripe   1 => horizontal edge
496        if (*((int*)(Strength + (edge << 2)))) // only if one of the 4 Strength bytes is != 0
497        {
498            if (Alpha > 0 && Beta > 0)
499            {
500#ifdef USE_PRED_BLOCK
501                EdgeLoop_Luma_horizontal(SrcY + (edge << 2)*20, Strength + (edge << 2),  Alpha, Beta, clipTable, 20);
502#else
503                EdgeLoop_Luma_horizontal(SrcY + (edge << 2)*pitch, Strength + (edge << 2),  Alpha, Beta, clipTable, pitch);
504#endif
505            }
506
507            if (!(edge & 1) && Alpha_c > 0 && Beta_c > 0)
508            {
509#ifdef USE_PRED_BLOCK
510                EdgeLoop_Chroma_horizontal(SrcU + (edge << 1)*12, Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, 12);
511                EdgeLoop_Chroma_horizontal(SrcV + (edge << 1)*12, Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, 12);
512#else
513                EdgeLoop_Chroma_horizontal(SrcU + (edge << 1)*(pitch >> 1), Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, pitch >> 1);
514                EdgeLoop_Chroma_horizontal(SrcV + (edge << 1)*(pitch >> 1), Strength + (edge << 2), Alpha_c, Beta_c, clipTable_c, pitch >> 1);
515#endif
516            }
517        }
518
519    } //end edge
520
521    return;
522}
523
524/*
525 *****************************************************************************************************
526 * \brief   returns a buffer of 4 Strength values for one stripe in a mb (for different Frame types)
527 *****************************************************************************************************
528*/
529
530void GetStrength_Edge0(uint8 *Strength, AVCMacroblock* MbP, AVCMacroblock* MbQ, int dir)
531{
532    int tmp;
533    int16 *ptrQ, *ptrP;
534    void* vptr;
535    uint8 *pStrength;
536    void* refIdx;
537
538    if (MbP->mbMode == AVC_I4 || MbP->mbMode == AVC_I16 ||
539            MbQ->mbMode == AVC_I4 || MbQ->mbMode == AVC_I16)
540    {
541
542        *((int*)Strength) = ININT_STRENGTH[0];      // Start with Strength=3. or Strength=4 for Mb-edge
543
544    }
545    else // if not intra or SP-frame
546    {
547        *((int*)Strength) = 0;
548
549        if (dir == 0)  // Vertical Edge 0
550        {
551
552            //1. Check the ref_frame_id
553            refIdx = (void*) MbQ->RefIdx; //de-ref type-punned pointer fix
554            ptrQ = (int16*)refIdx;
555            refIdx = (void*)MbP->RefIdx; //de-ref type-punned pointer fix
556            ptrP = (int16*)refIdx;
557            pStrength = Strength;
558            if (ptrQ[0] != ptrP[1]) pStrength[0] = 1;
559            if (ptrQ[2] != ptrP[3]) pStrength[2] = 1;
560            pStrength[1] = pStrength[0];
561            pStrength[3] = pStrength[2];
562
563            //2. Check the non-zero coeff blocks (4x4)
564            if (MbQ->nz_coeff[0] != 0 || MbP->nz_coeff[3] != 0) pStrength[0] = 2;
565            if (MbQ->nz_coeff[4] != 0 || MbP->nz_coeff[7] != 0) pStrength[1] = 2;
566            if (MbQ->nz_coeff[8] != 0 || MbP->nz_coeff[11] != 0) pStrength[2] = 2;
567            if (MbQ->nz_coeff[12] != 0 || MbP->nz_coeff[15] != 0) pStrength[3] = 2;
568
569            //3. Only need to check the mv difference
570            vptr = (void*)MbQ->mvL0;  // for deref type-punned pointer
571            ptrQ = (int16*)vptr;
572            ptrP = (int16*)(MbP->mvL0 + 3); // points to 4x4 block #3 (the 4th column)
573
574            // 1st blk
575            if (*pStrength == 0)
576            {
577                // check |mv difference| >= 4
578                tmp = *ptrQ++ - *ptrP++;
579                if (tmp < 0) tmp = -tmp;
580                if (tmp >= 4) *pStrength = 1;
581
582                tmp = *ptrQ-- - *ptrP--;
583                if (tmp < 0) tmp = -tmp;
584                if (tmp >= 4) *pStrength = 1;
585            }
586
587            pStrength++;
588            ptrQ += 8;
589            ptrP += 8;
590
591            // 2nd blk
592            if (*pStrength == 0)
593            {
594                // check |mv difference| >= 4
595                tmp = *ptrQ++ - *ptrP++;
596                if (tmp < 0) tmp = -tmp;
597                if (tmp >= 4) *pStrength = 1;
598
599                tmp = *ptrQ-- - *ptrP--;
600                if (tmp < 0) tmp = -tmp;
601                if (tmp >= 4) *pStrength = 1;
602            }
603
604            pStrength++;
605            ptrQ += 8;
606            ptrP += 8;
607
608            // 3rd blk
609            if (*pStrength == 0)
610            {
611                // check |mv difference| >= 4
612                tmp = *ptrQ++ - *ptrP++;
613                if (tmp < 0) tmp = -tmp;
614                if (tmp >= 4) *pStrength = 1;
615
616                tmp = *ptrQ-- - *ptrP--;
617                if (tmp < 0) tmp = -tmp;
618                if (tmp >= 4) *pStrength = 1;
619            }
620
621            pStrength++;
622            ptrQ += 8;
623            ptrP += 8;
624
625            // 4th blk
626            if (*pStrength == 0)
627            {
628                // check |mv difference| >= 4
629                tmp = *ptrQ++ - *ptrP++;
630                if (tmp < 0) tmp = -tmp;
631                if (tmp >= 4) *pStrength = 1;
632
633                tmp = *ptrQ-- - *ptrP--;
634                if (tmp < 0) tmp = -tmp;
635                if (tmp >= 4) *pStrength = 1;
636            }
637        }
638        else   // Horizontal Edge 0
639        {
640
641            //1. Check the ref_frame_id
642            refIdx = (void*)MbQ->RefIdx;  //de-ref type-punned pointer
643            ptrQ = (int16*)refIdx;
644            refIdx = (void*)MbP->RefIdx;  //de-ref type-punned pointer
645            ptrP = (int16*)refIdx;
646            pStrength = Strength;
647            if (ptrQ[0] != ptrP[2]) pStrength[0] = 1;
648            if (ptrQ[1] != ptrP[3]) pStrength[2] = 1;
649            pStrength[1] = pStrength[0];
650            pStrength[3] = pStrength[2];
651
652            //2. Check the non-zero coeff blocks (4x4)
653            if (MbQ->nz_coeff[0] != 0 || MbP->nz_coeff[12] != 0) pStrength[0] = 2;
654            if (MbQ->nz_coeff[1] != 0 || MbP->nz_coeff[13] != 0) pStrength[1] = 2;
655            if (MbQ->nz_coeff[2] != 0 || MbP->nz_coeff[14] != 0) pStrength[2] = 2;
656            if (MbQ->nz_coeff[3] != 0 || MbP->nz_coeff[15] != 0) pStrength[3] = 2;
657
658            //3. Only need to check the mv difference
659            vptr = (void*)MbQ->mvL0;
660            ptrQ = (int16*)vptr;
661            ptrP = (int16*)(MbP->mvL0 + 12); // points to 4x4 block #12 (the 4th row)
662
663            // 1st blk
664            if (*pStrength == 0)
665            {
666                // check |mv difference| >= 4
667                tmp = *ptrQ++ - *ptrP++;
668                if (tmp < 0) tmp = -tmp;
669                if (tmp >= 4) *pStrength = 1;
670
671                tmp = *ptrQ-- - *ptrP--;
672                if (tmp < 0) tmp = -tmp;
673                if (tmp >= 4) *pStrength = 1;
674            }
675
676            pStrength++;
677            ptrQ += 2;
678            ptrP += 2;
679
680            // 2nd blk
681            if (*pStrength  == 0)
682            {
683                // check |mv difference| >= 4
684                tmp = *ptrQ++ - *ptrP++;
685                if (tmp < 0) tmp = -tmp;
686                if (tmp >= 4) *pStrength = 1;
687
688                tmp = *ptrQ-- - *ptrP--;
689                if (tmp < 0) tmp = -tmp;
690                if (tmp >= 4) *pStrength = 1;
691            }
692
693            pStrength++;
694            ptrQ += 2;
695            ptrP += 2;
696
697            // 3rd blk
698            if (*pStrength  == 0)
699            {
700                // check |mv difference| >= 4
701                tmp = *ptrQ++ - *ptrP++;
702                if (tmp < 0) tmp = -tmp;
703                if (tmp >= 4) *pStrength = 1;
704
705                tmp = *ptrQ-- - *ptrP--;
706                if (tmp < 0) tmp = -tmp;
707                if (tmp >= 4) *pStrength = 1;
708            }
709
710            pStrength++;
711            ptrQ += 2;
712            ptrP += 2;
713
714            // 4th blk
715            if (*pStrength  == 0)
716            {
717                // check |mv difference| >= 4
718                tmp = *ptrQ++ - *ptrP++;
719                if (tmp < 0) tmp = -tmp;
720                if (tmp >= 4) *pStrength = 1;
721
722                tmp = *ptrQ-- - *ptrP--;
723                if (tmp < 0) tmp = -tmp;
724                if (tmp >= 4) *pStrength = 1;
725            }
726
727        } /* end of: else if(dir == 0) */
728
729    } /* end of: if( !(MbP->mbMode == AVC_I4 ...) */
730}
731
732
733void GetStrength_VerticalEdges(uint8 *Strength, AVCMacroblock* MbQ)
734{
735    int     idx, tmp;
736    int16   *ptr, *pmvx, *pmvy;
737    uint8   *pnz;
738    uint8   *pStrength, *pStr;
739    void* refIdx;
740
741    if (MbQ->mbMode == AVC_I4 || MbQ->mbMode == AVC_I16)
742    {
743        *((int*)Strength)     = ININT_STRENGTH[1];      // Start with Strength=3. or Strength=4 for Mb-edge
744        *((int*)(Strength + 4)) = ININT_STRENGTH[2];
745        *((int*)(Strength + 8)) = ININT_STRENGTH[3];
746    }
747    else   // Not intra or SP-frame
748    {
749
750        *((int*)Strength)     = 0; // for non-intra MB, strength = 0, 1 or 2.
751        *((int*)(Strength + 4)) = 0;
752        *((int*)(Strength + 8)) = 0;
753
754        //1. Check the ref_frame_id
755        refIdx = (void*)MbQ->RefIdx;  //de-ref type-punned pointer fix
756        ptr = (int16*)refIdx;
757        pStrength = Strength;
758        if (ptr[0] != ptr[1]) pStrength[4] = 1;
759        if (ptr[2] != ptr[3]) pStrength[6] = 1;
760        pStrength[5] = pStrength[4];
761        pStrength[7] = pStrength[6];
762
763        //2. Check the nz_coeff block and mv difference
764        pmvx = (int16*)(MbQ->mvL0 + 1); // points to 4x4 block #1,not #0
765        pmvy = pmvx + 1;
766        for (idx = 0; idx < 4; idx += 2) // unroll the loop, make 4 iterations to 2
767        {
768            // first/third row : 1,2,3 or 9,10,12
769            // Strength = 2 for a whole row
770            pnz = MbQ->nz_coeff + (idx << 2);
771            if (*pnz++ != 0) *pStrength = 2;
772            if (*pnz++ != 0)
773            {
774                *pStrength = 2;
775                *(pStrength + 4) = 2;
776            }
777            if (*pnz++ != 0)
778            {
779                *(pStrength + 4) = 2;
780                *(pStrength + 8) = 2;
781            }
782            if (*pnz != 0) *(pStrength + 8) = 2;
783
784            // Then Strength = 1
785            if (*pStrength == 0)
786            {
787                //within the same 8x8 block, no need to check the reference id
788                //only need to check the |mv difference| >= 4
789                tmp = *pmvx - *(pmvx - 2);
790                if (tmp < 0) tmp = -tmp;
791                if (tmp >= 4) *pStrength = 1;
792
793                tmp = *pmvy - *(pmvy - 2);
794                if (tmp < 0) tmp = -tmp;
795                if (tmp >= 4) *pStrength = 1;
796            }
797
798            pmvx += 2;
799            pmvy += 2;
800            pStr = pStrength + 4;
801
802            if (*pStr == 0)
803            {
804                //check the |mv difference| >= 4
805                tmp = *pmvx - *(pmvx - 2);
806                if (tmp < 0) tmp = -tmp;
807                if (tmp >= 4) *pStr = 1;
808
809                tmp = *pmvy - *(pmvy - 2);
810                if (tmp < 0) tmp = -tmp;
811                if (tmp >= 4) *pStr = 1;
812            }
813
814            pmvx += 2;
815            pmvy += 2;
816            pStr = pStrength + 8;
817
818            if (*pStr == 0)
819            {
820                //within the same 8x8 block, no need to check the reference id
821                //only need to check the |mv difference| >= 4
822                tmp = *pmvx - *(pmvx - 2);
823                if (tmp < 0) tmp = -tmp;
824                if (tmp >= 4) *pStr = 1;
825
826                tmp = *pmvy - *(pmvy - 2);
827                if (tmp < 0) tmp = -tmp;
828                if (tmp >= 4) *pStr = 1;
829            }
830
831            // Second/fourth row: 5,6,7 or 14,15,16
832            // Strength = 2 for a whole row
833            pnz = MbQ->nz_coeff + ((idx + 1) << 2);
834            if (*pnz++ != 0) *(pStrength + 1) = 2;
835            if (*pnz++ != 0)
836            {
837                *(pStrength + 1) = 2;
838                *(pStrength + 5) = 2;
839            }
840            if (*pnz++ != 0)
841            {
842                *(pStrength + 5) = 2;
843                *(pStrength + 9) = 2;
844            }
845            if (*pnz != 0) *(pStrength + 9) = 2;
846
847            // Then Strength = 1
848            pmvx += 4;
849            pmvy += 4;
850            pStr = pStrength + 1;
851            if (*pStr == 0)
852            {
853                //within the same 8x8 block, no need to check the reference id
854                //only need to check the |mv difference| >= 4
855                tmp = *pmvx - *(pmvx - 2);
856                if (tmp < 0) tmp = -tmp;
857                if (tmp >= 4) *pStr = 1;
858
859                tmp = *pmvy - *(pmvy - 2);
860                if (tmp < 0) tmp = -tmp;
861                if (tmp >= 4) *pStr = 1;
862            }
863
864            pmvx += 2;
865            pmvy += 2;
866            pStr = pStrength + 5;
867
868            if (*pStr == 0)
869            {
870                //check the |mv difference| >= 4
871                tmp = *pmvx - *(pmvx - 2);
872                if (tmp < 0) tmp = -tmp;
873                if (tmp >= 4) *pStr = 1;
874
875                tmp = *pmvy - *(pmvy - 2);
876                if (tmp < 0) tmp = -tmp;
877                if (tmp >= 4) *pStr = 1;
878            }
879
880            pmvx += 2;
881            pmvy += 2;
882            pStr = pStrength + 9;
883
884            if (*pStr == 0)
885            {
886                //within the same 8x8 block, no need to check the reference id
887                //only need to check the |mv difference| >= 4
888                tmp = *pmvx - *(pmvx - 2);
889                if (tmp < 0) tmp = -tmp;
890                if (tmp >= 4) *pStr = 1;
891
892                tmp = *pmvy - *(pmvy - 2);
893                if (tmp < 0) tmp = -tmp;
894                if (tmp >= 4) *pStr = 1;
895            }
896
897            // update some variables for the next two rows
898            pmvx += 4;
899            pmvy += 4;
900            pStrength += 2;
901
902        } /* end of: for(idx=0; idx<2; idx++) */
903
904    } /* end of: else if( MbQ->mbMode == AVC_I4 ...) */
905}
906
907
908void GetStrength_HorizontalEdges(uint8 Strength[12], AVCMacroblock* MbQ)
909{
910    int     idx, tmp;
911    int16   *ptr, *pmvx, *pmvy;
912    uint8   *pStrength, *pStr;
913    void* refIdx;
914
915    if (MbQ->mbMode == AVC_I4 || MbQ->mbMode == AVC_I16)
916    {
917        *((int*)Strength)     = ININT_STRENGTH[1];      // Start with Strength=3. or Strength=4 for Mb-edge
918        *((int*)(Strength + 4)) = ININT_STRENGTH[2];
919        *((int*)(Strength + 8)) = ININT_STRENGTH[3];
920    }
921    else   // Not intra or SP-frame
922    {
923
924        *((int*)Strength)     = 0; // for non-intra MB, strength = 0, 1 or 2.
925        *((int*)(Strength + 4)) = 0; // for non-intra MB, strength = 0, 1 or 2.
926        *((int*)(Strength + 8)) = 0; // for non-intra MB, strength = 0, 1 or 2.
927
928
929        //1. Check the ref_frame_id
930        refIdx = (void*) MbQ->RefIdx; // de-ref type-punned fix
931        ptr = (int16*) refIdx;
932        pStrength = Strength;
933        if (ptr[0] != ptr[2]) pStrength[4] = 1;
934        if (ptr[1] != ptr[3]) pStrength[6] = 1;
935        pStrength[5] = pStrength[4];
936        pStrength[7] = pStrength[6];
937
938        //2. Check the nz_coeff block and mv difference
939        pmvx = (int16*)(MbQ->mvL0 + 4); // points to 4x4 block #4,not #0
940        pmvy = pmvx + 1;
941        for (idx = 0; idx < 4; idx += 2) // unroll the loop, make 4 iterations to 2
942        {
943            // first/third row : 1,2,3 or 9,10,12
944            // Strength = 2 for a whole row
945            if (MbQ->nz_coeff[idx] != 0) *pStrength = 2;
946            if (MbQ->nz_coeff[4+idx] != 0)
947            {
948                *pStrength = 2;
949                *(pStrength + 4) = 2;
950            }
951            if (MbQ->nz_coeff[8+idx] != 0)
952            {
953                *(pStrength + 4) = 2;
954                *(pStrength + 8) = 2;
955            }
956            if (MbQ->nz_coeff[12+idx] != 0) *(pStrength + 8) = 2;
957
958            // Then Strength = 1
959            if (*pStrength == 0)
960            {
961                //within the same 8x8 block, no need to check the reference id
962                //only need to check the |mv difference| >= 4
963                tmp = *pmvx - *(pmvx - 8);
964                if (tmp < 0) tmp = -tmp;
965                if (tmp >= 4) *pStrength = 1;
966
967                tmp = *pmvy - *(pmvy - 8);
968                if (tmp < 0) tmp = -tmp;
969                if (tmp >= 4) *pStrength = 1;
970            }
971
972            pmvx += 8;
973            pmvy += 8;
974            pStr = pStrength + 4;
975
976            if (*pStr == 0)
977            {
978                //check the |mv difference| >= 4
979                tmp = *pmvx - *(pmvx - 8);
980                if (tmp < 0) tmp = -tmp;
981                if (tmp >= 4) *pStr = 1;
982
983                tmp = *pmvy - *(pmvy - 8);
984                if (tmp < 0) tmp = -tmp;
985                if (tmp >= 4) *pStr = 1;
986            }
987
988            pmvx += 8;
989            pmvy += 8;
990            pStr = pStrength + 8;
991
992            if (*pStr == 0)
993            {
994                //within the same 8x8 block, no need to check the reference id
995                //only need to check the |mv difference| >= 4
996                tmp = *pmvx - *(pmvx - 8);
997                if (tmp < 0) tmp = -tmp;
998                if (tmp >= 4) *pStr = 1;
999
1000                tmp = *pmvy - *(pmvy - 8);
1001                if (tmp < 0) tmp = -tmp;
1002                if (tmp >= 4) *pStr = 1;
1003            }
1004
1005            // Second/fourth row: 5,6,7 or 14,15,16
1006            // Strength = 2 for a whole row
1007            if (MbQ->nz_coeff[idx+1] != 0) *(pStrength + 1) = 2;
1008            if (MbQ->nz_coeff[4+idx+1] != 0)
1009            {
1010                *(pStrength + 1) = 2;
1011                *(pStrength + 5) = 2;
1012            }
1013            if (MbQ->nz_coeff[8+idx+1] != 0)
1014            {
1015                *(pStrength + 5) = 2;
1016                *(pStrength + 9) = 2;
1017            }
1018            if (MbQ->nz_coeff[12+idx+1] != 0) *(pStrength + 9) = 2;
1019
1020            // Then Strength = 1
1021            pmvx -= 14;
1022            pmvy -= 14; // -14 = -16 + 2
1023            pStr = pStrength + 1;
1024            if (*pStr == 0)
1025            {
1026                //within the same 8x8 block, no need to check the reference id
1027                //only need to check the |mv difference| >= 4
1028                tmp = *pmvx - *(pmvx - 8);
1029                if (tmp < 0) tmp = -tmp;
1030                if (tmp >= 4) *pStr = 1;
1031
1032                tmp = *pmvy - *(pmvy - 8);
1033                if (tmp < 0) tmp = -tmp;
1034                if (tmp >= 4) *pStr = 1;
1035            }
1036
1037            pmvx += 8;
1038            pmvy += 8;
1039            pStr = pStrength + 5;
1040
1041            if (*pStr == 0)
1042            {
1043                //check the |mv difference| >= 4
1044                tmp = *pmvx - *(pmvx - 8);
1045                if (tmp < 0) tmp = -tmp;
1046                if (tmp >= 4) *pStr = 1;
1047
1048                tmp = *pmvy - *(pmvy - 8);
1049                if (tmp < 0) tmp = -tmp;
1050                if (tmp >= 4) *pStr = 1;
1051            }
1052
1053            pmvx += 8;
1054            pmvy += 8;
1055            pStr = pStrength + 9;
1056
1057            if (*pStr == 0)
1058            {
1059                //within the same 8x8 block, no need to check the reference id
1060                //only need to check the |mv difference| >= 4
1061                tmp = *pmvx - *(pmvx - 8);
1062                if (tmp < 0) tmp = -tmp;
1063                if (tmp >= 4) *pStr = 1;
1064
1065                tmp = *pmvy - *(pmvy - 8);
1066                if (tmp < 0) tmp = -tmp;
1067                if (tmp >= 4) *pStr = 1;
1068            }
1069
1070            // update some variables for the next two rows
1071            pmvx -= 14;
1072            pmvy -= 14; // -14 = -16 + 2
1073            pStrength += 2;
1074
1075        } /* end of: for(idx=0; idx<2; idx++) */
1076
1077    } /* end of: else if( MbQ->mbMode == AVC_I4 ...) */
1078}
1079
1080/*
1081 *****************************************************************************************
1082 * \brief  Filters one edge of 16 (luma) or 8 (chroma) pel
1083 *****************************************************************************************
1084*/
1085
1086void EdgeLoop_Luma_horizontal(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch)
1087{
1088    int  pel, ap = 0, aq = 0, Strng;
1089    int  C0, c0, dif, AbsDelta, tmp, tmp1;
1090    int  L2 = 0, L1, L0, R0, R1, R2 = 0, RL0;
1091
1092
1093    if (Strength[0] == 4)  /* INTRA strong filtering */
1094    {
1095        for (pel = 0; pel < 16; pel++)
1096        {
1097            R0  = SrcPtr[0];
1098            R1  = SrcPtr[pitch];
1099            L0  = SrcPtr[-pitch];
1100            L1  = SrcPtr[-(pitch<<1)];
1101
1102            // |R0 - R1| < Beta
1103            tmp1 = R0 - R1;
1104            if (tmp1 < 0) tmp1 = -tmp1;
1105            tmp = (tmp1 - Beta);
1106
1107            //|L0 - L1| < Beta
1108            tmp1 = L0 - L1;
1109            if (tmp1 < 0) tmp1 = -tmp1;
1110            tmp &= (tmp1 - Beta);
1111
1112            //|R0 - L0| < Alpha
1113            AbsDelta = R0 - L0;
1114            if (AbsDelta < 0) AbsDelta = -AbsDelta;
1115            tmp &= (AbsDelta - Alpha);
1116
1117            if (tmp < 0)
1118            {
1119                AbsDelta -= ((Alpha >> 2) + 2);
1120                R2 = SrcPtr[pitch<<1]; //inc2
1121                L2 = SrcPtr[-(pitch+(pitch<<1))]; // -inc3
1122
1123                // |R0 - R2| < Beta && |R0 - L0| < (Alpha/4 + 2)
1124                tmp = R0 - R2;
1125                if (tmp < 0) tmp = -tmp;
1126                aq = AbsDelta & (tmp - Beta);
1127
1128                // |L0 - L2| < Beta && |R0 - L0| < (Alpha/4 + 2)
1129                tmp = L0 - L2;
1130                if (tmp < 0) tmp = -tmp;
1131                ap = AbsDelta & (tmp - Beta);
1132
1133                if (aq < 0)
1134                {
1135                    tmp = R1 + R0 + L0;
1136                    SrcPtr[0] = (L1 + (tmp << 1) +  R2 + 4) >> 3;
1137                    tmp += R2;
1138                    SrcPtr[pitch]  = (tmp + 2) >> 2;
1139                    SrcPtr[pitch<<1] = (((SrcPtr[(pitch+(pitch<<1))] + R2) << 1) + tmp + 4) >> 3;
1140                }
1141                else
1142                    SrcPtr[0] = ((R1 << 1) + R0 + L1 + 2) >> 2;
1143
1144                if (ap < 0)
1145                {
1146                    tmp = L1 + R0 + L0;
1147                    SrcPtr[-pitch]  = (R1 + (tmp << 1) +  L2 + 4) >> 3;
1148                    tmp += L2;
1149                    SrcPtr[-(pitch<<1)] = (tmp + 2) >> 2;
1150                    SrcPtr[-(pitch+(pitch<<1))] = (((SrcPtr[-(pitch<<2)] + L2) << 1) + tmp + 4) >> 3;
1151                }
1152                else
1153                    SrcPtr[-pitch] = ((L1 << 1) + L0 + R1 + 2) >> 2;
1154
1155            } /* if(tmp < 0) */
1156
1157            SrcPtr ++; // Increment to next set of pixel
1158
1159        } /* end of: for(pel=0; pel<16; pel++) */
1160
1161    } /* if(Strength[0] == 4) */
1162
1163    else   /* Normal filtering */
1164    {
1165        for (pel = 0; pel < 16; pel++)
1166        {
1167            Strng = Strength[pel >> 2];
1168            if (Strng)
1169            {
1170                R0  = SrcPtr[0];
1171                R1  = SrcPtr[pitch];
1172                L0  = SrcPtr[-pitch];
1173                L1  = SrcPtr[-(pitch<<1)]; // inc2
1174
1175                //|R0 - L0| < Alpha
1176                tmp1 = R0 - L0;
1177                if (tmp1 < 0) tmp1 = -tmp1;
1178                tmp = (tmp1 - Alpha);
1179
1180                // |R0 - R1| < Beta
1181                tmp1 = R0 - R1;
1182                if (tmp1 < 0) tmp1 = -tmp1;
1183                tmp &= (tmp1 - Beta);
1184
1185                //|L0 - L1| < Beta
1186                tmp1 = L0 - L1;
1187                if (tmp1 < 0) tmp1 = -tmp1;
1188                tmp &= (tmp1 - Beta);
1189
1190                if (tmp < 0)
1191                {
1192                    R2 = SrcPtr[pitch<<1]; //inc2
1193                    L2 = SrcPtr[-(pitch+(pitch<<1))]; // -inc3
1194
1195                    // |R0 - R2| < Beta
1196                    tmp = R0 - R2;
1197                    if (tmp < 0) tmp = -tmp;
1198                    aq = tmp - Beta;
1199
1200                    // |L0 - L2| < Beta
1201                    tmp = L0 - L2;
1202                    if (tmp < 0) tmp = -tmp;
1203                    ap = tmp - Beta;
1204
1205
1206                    c0 = C0 = clipTable[Strng];
1207                    if (ap < 0) c0++;
1208                    if (aq < 0) c0++;
1209
1210                    //dif = IClip(-c0, c0, ((Delta << 2) + (L1 - R1) + 4) >> 3);
1211                    dif = (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3;
1212                    tmp = dif + c0;
1213                    if ((uint)tmp > (uint)c0 << 1)
1214                    {
1215                        tmp = ~(tmp >> 31);
1216                        dif = (tmp & (c0 << 1)) - c0;
1217                    }
1218
1219                    //SrcPtr[0]    = (uint8)IClip(0, 255, R0 - dif);
1220                    //SrcPtr[-inc] = (uint8)IClip(0, 255, L0 + dif);
1221                    RL0 = R0 + L0;
1222                    R0 -= dif;
1223                    L0 += dif;
1224                    if ((uint)R0 > 255)
1225                    {
1226                        tmp = ~(R0 >> 31);
1227                        R0 = tmp & 255;
1228                    }
1229                    if ((uint)L0 > 255)
1230                    {
1231                        tmp = ~(L0 >> 31);
1232                        L0 = tmp & 255;
1233                    }
1234                    SrcPtr[-pitch] = L0;
1235                    SrcPtr[0] = R0;
1236
1237                    if (C0 != 0) /* Multiple zeros in the clip tables */
1238                    {
1239                        if (aq < 0)  // SrcPtr[inc]   += IClip(-C0, C0,(R2 + ((RL0 + 1) >> 1) - (R1<<1)) >> 1);
1240                        {
1241                            R2 = (R2 + ((RL0 + 1) >> 1) - (R1 << 1)) >> 1;
1242                            tmp = R2 + C0;
1243                            if ((uint)tmp > (uint)C0 << 1)
1244                            {
1245                                tmp = ~(tmp >> 31);
1246                                R2 = (tmp & (C0 << 1)) - C0;
1247                            }
1248                            SrcPtr[pitch] += R2;
1249                        }
1250
1251                        if (ap < 0)  //SrcPtr[-inc2] += IClip(-C0, C0,(L2 + ((RL0 + 1) >> 1) - (L1<<1)) >> 1);
1252                        {
1253                            L2 = (L2 + ((RL0 + 1) >> 1) - (L1 << 1)) >> 1;
1254                            tmp = L2 + C0;
1255                            if ((uint)tmp > (uint)C0 << 1)
1256                            {
1257                                tmp = ~(tmp >> 31);
1258                                L2 = (tmp & (C0 << 1)) - C0;
1259                            }
1260                            SrcPtr[-(pitch<<1)] += L2;
1261                        }
1262                    }
1263
1264                } /* if(tmp < 0) */
1265
1266            } /* end of:  if((Strng = Strength[pel >> 2])) */
1267
1268            SrcPtr ++; // Increment to next set of pixel
1269
1270        } /* for(pel=0; pel<16; pel++) */
1271
1272    } /* else if(Strength[0] == 4) */
1273}
1274
1275void EdgeLoop_Luma_vertical(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch)
1276{
1277    int  pel, ap = 1, aq = 1;
1278    int  C0, c0, dif, AbsDelta, Strng, tmp, tmp1;
1279    int  L2 = 0, L1, L0, R0, R1, R2 = 0;
1280    uint8 *ptr, *ptr1;
1281    register uint R_in, L_in;
1282    uint R_out, L_out;
1283
1284
1285    if (Strength[0] == 4)  /* INTRA strong filtering */
1286    {
1287
1288        for (pel = 0; pel < 16; pel++)
1289        {
1290
1291            // Read 8 pels
1292            R_in = *((uint *)SrcPtr);       // R_in = {R3, R2, R1, R0}
1293            L_in = *((uint *)(SrcPtr - 4)); // L_in = {L0, L1, L2, L3}
1294            R1   = (R_in >> 8) & 0xff;
1295            R0   = R_in & 0xff;
1296            L0   = L_in >> 24;
1297            L1   = (L_in >> 16) & 0xff;
1298
1299            // |R0 - R1| < Beta
1300            tmp1 = (R_in & 0xff) - R1;
1301            if (tmp1 < 0) tmp1 = -tmp1;
1302            tmp = (tmp1 - Beta);
1303
1304
1305            //|L0 - L1| < Beta
1306            tmp1 = (L_in >> 24) - L1;
1307            if (tmp1 < 0) tmp1 = -tmp1;
1308            tmp &= (tmp1 - Beta);
1309
1310            //|R0 - L0| < Alpha
1311            AbsDelta = (R_in & 0xff) - (L_in >> 24);
1312            if (AbsDelta < 0) AbsDelta = -AbsDelta;
1313            tmp &= (AbsDelta - Alpha);
1314
1315            if (tmp < 0)
1316            {
1317                AbsDelta -= ((Alpha >> 2) + 2);
1318                R2   = (R_in >> 16) & 0xff;
1319                L2   = (L_in >> 8) & 0xff;
1320
1321                // |R0 - R2| < Beta && |R0 - L0| < (Alpha/4 + 2)
1322                tmp1 = (R_in & 0xff) - R2;
1323                if (tmp1 < 0) tmp1 = -tmp1;
1324                aq = AbsDelta & (tmp1 - Beta);
1325
1326                // |L0 - L2| < Beta && |R0 - L0| < (Alpha/4 + 2)
1327                tmp1 = (L_in >> 24) - L2;
1328                if (tmp1 < 0) tmp1 = -tmp1;
1329                ap = AbsDelta & (tmp1 - Beta);
1330
1331
1332                ptr = SrcPtr;
1333                if (aq < 0)
1334                {
1335                    R_out = (R_in >> 24) << 24; // Keep R3 at the fourth byte
1336
1337                    tmp  = R0 + L0 + R1;
1338                    R_out |= (((tmp << 1) +  L1 + R2 + 4) >> 3);
1339                    tmp += R2;
1340                    R_out |= (((tmp + 2) >> 2) << 8);
1341                    tmp1 = ((R_in >> 24) + R2) << 1;
1342                    R_out |= (((tmp1 + tmp + 4) >> 3) << 16);
1343
1344                    *((uint *)SrcPtr) = R_out;
1345                }
1346                else
1347                    *ptr = ((R1 << 1) + R0 + L1 + 2) >> 2;
1348
1349
1350                if (ap < 0)
1351                {
1352                    L_out = (L_in << 24) >> 24; // Keep L3 at the first byte
1353
1354                    tmp  = R0 + L0 + L1;
1355                    L_out |= ((((tmp << 1) + R1 + L2 + 4) >> 3) << 24);
1356                    tmp += L2;
1357                    L_out |= (((tmp + 2) >> 2) << 16);
1358                    tmp1 = ((L_in & 0xff) + L2) << 1;
1359                    L_out |= (((tmp1 + tmp + 4) >> 3) << 8);
1360
1361                    *((uint *)(SrcPtr - 4)) = L_out;
1362                }
1363                else
1364                    *(--ptr) = ((L1 << 1) + L0 + R1 + 2) >> 2;
1365
1366            } /* if(tmp < 0) */
1367
1368            SrcPtr += pitch;    // Increment to next set of pixel
1369
1370        } /* end of: for(pel=0; pel<16; pel++) */
1371
1372    } /* if(Strength[0] == 4) */
1373
1374    else   /* Normal filtering */
1375    {
1376
1377        for (pel = 0; pel < 16; pel++)
1378        {
1379            Strng = Strength[pel >> 2];
1380            if (Strng)
1381            {
1382                // Read 8 pels
1383                R_in = *((uint *)SrcPtr);       // R_in = {R3, R2, R1, R0}
1384                L_in = *((uint *)(SrcPtr - 4)); // L_in = {L0, L1, L2, L3}
1385                R1   = (R_in >> 8) & 0xff;
1386                R0   = R_in & 0xff;
1387                L0   = L_in >> 24;
1388                L1   = (L_in >> 16) & 0xff;
1389
1390                //|R0 - L0| < Alpha
1391                tmp = R0 - L0;
1392                if (tmp < 0) tmp = -tmp;
1393                tmp -= Alpha;
1394
1395                // |R0 - R1| < Beta
1396                tmp1 = R0 - R1;
1397                if (tmp1 < 0) tmp1 = -tmp1;
1398                tmp &= (tmp1 - Beta);
1399
1400                //|L0 - L1| < Beta
1401                tmp1 = L0 - L1;
1402                if (tmp1 < 0) tmp1 = -tmp1;
1403                tmp &= (tmp1 - Beta);
1404
1405                if (tmp < 0)
1406                {
1407                    L2 = SrcPtr[-3];
1408                    R2 = SrcPtr[2];
1409
1410                    // |R0 - R2| < Beta
1411                    tmp = R0 - R2;
1412                    if (tmp < 0) tmp = -tmp;
1413                    aq = tmp - Beta;
1414
1415                    // |L0 - L2| < Beta
1416                    tmp = L0 - L2;
1417                    if (tmp < 0) tmp = -tmp;
1418                    ap = tmp - Beta;
1419
1420
1421                    c0 = C0 = clipTable[Strng];
1422                    if (ap < 0) c0++;
1423                    if (aq < 0) c0++;
1424
1425                    //dif = IClip(-c0, c0, ((Delta << 2) + (L1 - R1) + 4) >> 3);
1426                    dif = (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3;
1427                    tmp = dif + c0;
1428                    if ((uint)tmp > (uint)c0 << 1)
1429                    {
1430                        tmp = ~(tmp >> 31);
1431                        dif = (tmp & (c0 << 1)) - c0;
1432                    }
1433
1434                    ptr = SrcPtr;
1435                    ptr1 = SrcPtr - 1;
1436                    //SrcPtr[0]    = (uint8)IClip(0, 255, R0 - dif);
1437                    //SrcPtr[-inc] = (uint8)IClip(0, 255, L0 + dif);
1438                    R_in = R0 - dif;
1439                    L_in = L0 + dif; /* cannot re-use R0 and L0 here */
1440                    if ((uint)R_in > 255)
1441                    {
1442                        tmp = ~((int)R_in >> 31);
1443                        R_in = tmp & 255;
1444                    }
1445                    if ((uint)L_in > 255)
1446                    {
1447                        tmp = ~((int)L_in >> 31);
1448                        L_in = tmp & 255;
1449                    }
1450                    *ptr1-- = L_in;
1451                    *ptr++  = R_in;
1452
1453                    if (C0 != 0) // Multiple zeros in the clip tables
1454                    {
1455                        if (ap < 0)  //SrcPtr[-inc2] += IClip(-C0, C0,(L2 + ((RL0 + 1) >> 1) - (L1<<1)) >> 1);
1456                        {
1457                            L2 = (L2 + ((R0 + L0 + 1) >> 1) - (L1 << 1)) >> 1;
1458                            tmp = L2 + C0;
1459                            if ((uint)tmp > (uint)C0 << 1)
1460                            {
1461                                tmp = ~(tmp >> 31);
1462                                L2 = (tmp & (C0 << 1)) - C0;
1463                            }
1464                            *ptr1 += L2;
1465                        }
1466
1467                        if (aq < 0)  // SrcPtr[inc] += IClip(-C0, C0,(R2 + ((RL0 + 1) >> 1) - (R1<<1)) >> 1);
1468                        {
1469                            R2 = (R2 + ((R0 + L0 + 1) >> 1) - (R1 << 1)) >> 1;
1470                            tmp = R2 + C0;
1471                            if ((uint)tmp > (uint)C0 << 1)
1472                            {
1473                                tmp = ~(tmp >> 31);
1474                                R2 = (tmp & (C0 << 1)) - C0;
1475                            }
1476                            *ptr += R2;
1477                        }
1478                    }
1479
1480                } /* if(tmp < 0) */
1481
1482            } /* end of:  if((Strng = Strength[pel >> 2])) */
1483
1484            SrcPtr += pitch;    // Increment to next set of pixel
1485
1486        } /* for(pel=0; pel<16; pel++) */
1487
1488    } /* else if(Strength[0] == 4) */
1489
1490}
1491
1492void EdgeLoop_Chroma_vertical(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch)
1493{
1494    int     pel, Strng;
1495    int     c0, dif;
1496    int     L1, L0, R0, R1, tmp, tmp1;
1497    uint8   *ptr;
1498    uint    R_in, L_in;
1499
1500
1501    for (pel = 0; pel < 16; pel++)
1502    {
1503        Strng = Strength[pel>>2];
1504        if (Strng)
1505        {
1506            // Read 8 pels
1507            R_in = *((uint *)SrcPtr);       // R_in = {R3, R2, R1, R0}
1508            L_in = *((uint *)(SrcPtr - 4)); // L_in = {L0, L1, L2, L3}
1509            R1   = (R_in >> 8) & 0xff;
1510            R0   = R_in & 0xff;
1511            L0   = L_in >> 24;
1512            L1   = (L_in >> 16) & 0xff;
1513
1514            // |R0 - R1| < Beta
1515            tmp1 = R0 - R1;
1516            if (tmp1 < 0) tmp1 = -tmp1;
1517            tmp = (tmp1 - Beta);
1518
1519            //|L0 - L1| < Beta
1520            tmp1 = L0 - L1;
1521            if (tmp1 < 0) tmp1 = -tmp1;
1522            tmp &= (tmp1 - Beta);
1523
1524            //|R0 - L0| < Alpha
1525            tmp1 = R0 - L0;
1526            if (tmp1 < 0) tmp1 = -tmp1;
1527            tmp &= (tmp1 - Alpha);
1528
1529            if (tmp < 0)
1530            {
1531                ptr = SrcPtr;
1532                if (Strng == 4) /* INTRA strong filtering */
1533                {
1534                    *ptr-- = ((R1 << 1) + R0 + L1 + 2) >> 2;
1535                    *ptr   = ((L1 << 1) + L0 + R1 + 2) >> 2;
1536                }
1537                else  /* normal filtering */
1538                {
1539                    c0  = clipTable[Strng] + 1;
1540                    //dif = IClip(-c0, c0, ((Delta << 2) + (L1 - R1) + 4) >> 3);
1541                    dif = (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3;
1542                    tmp = dif + c0;
1543                    if ((uint)tmp > (uint)c0 << 1)
1544                    {
1545                        tmp = ~(tmp >> 31);
1546                        dif = (tmp & (c0 << 1)) - c0;
1547                    }
1548
1549                    //SrcPtr[0]    = (uint8)IClip(0, 255, R0 - dif);
1550                    //SrcPtr[-inc] = (uint8)IClip(0, 255, L0 + dif);
1551                    L0 += dif;
1552                    R0 -= dif;
1553                    if ((uint)L0 > 255)
1554                    {
1555                        tmp = ~(L0 >> 31);
1556                        L0 = tmp & 255;
1557                    }
1558                    if ((uint)R0 > 255)
1559                    {
1560                        tmp = ~(R0 >> 31);
1561                        R0 = tmp & 255;
1562                    }
1563
1564                    *ptr-- = R0;
1565                    *ptr = L0;
1566                }
1567            }
1568            pel ++;
1569            SrcPtr += pitch;   // Increment to next set of pixel
1570
1571        } /* end of: if((Strng = Strength[pel >> 2])) */
1572        else
1573        {
1574            pel += 3;
1575            SrcPtr += (pitch << 1); //PtrInc << 1;
1576        }
1577
1578    } /* end of: for(pel=0; pel<16; pel++) */
1579}
1580
1581
1582void EdgeLoop_Chroma_horizontal(uint8* SrcPtr, uint8 *Strength, int Alpha, int Beta, int *clipTable, int pitch)
1583{
1584    int  pel, Strng;
1585    int  c0, dif;
1586    int  L1, L0, R0, R1, tmp, tmp1;
1587
1588    for (pel = 0; pel < 16; pel++)
1589    {
1590        Strng = Strength[pel>>2];
1591        if (Strng)
1592        {
1593            R0  = SrcPtr[0];
1594            L0  = SrcPtr[-pitch];
1595            L1  = SrcPtr[-(pitch<<1)]; //inc2
1596            R1  = SrcPtr[pitch];
1597
1598            // |R0 - R1| < Beta
1599            tmp1 = R0 - R1;
1600            if (tmp1 < 0) tmp1 = -tmp1;
1601            tmp = (tmp1 - Beta);
1602
1603            //|L0 - L1| < Beta
1604            tmp1 = L0 - L1;
1605            if (tmp1 < 0) tmp1 = -tmp1;
1606            tmp &= (tmp1 - Beta);
1607
1608            //|R0 - L0| < Alpha
1609            tmp1 = R0 - L0;
1610            if (tmp1 < 0) tmp1 = -tmp1;
1611            tmp &= (tmp1 - Alpha);
1612
1613            if (tmp < 0)
1614            {
1615                if (Strng == 4) /* INTRA strong filtering */
1616                {
1617                    SrcPtr[0]      = ((R1 << 1) + R0 + L1 + 2) >> 2;
1618                    SrcPtr[-pitch] = ((L1 << 1) + L0 + R1 + 2) >> 2;
1619                }
1620                else  /* normal filtering */
1621                {
1622                    c0  = clipTable[Strng] + 1;
1623                    //dif = IClip(-c0, c0, ((Delta << 2) + (L1 - R1) + 4) >> 3);
1624                    dif = (((R0 - L0) << 2) + (L1 - R1) + 4) >> 3;
1625                    tmp = dif + c0;
1626                    if ((uint)tmp > (uint)c0 << 1)
1627                    {
1628                        tmp = ~(tmp >> 31);
1629                        dif = (tmp & (c0 << 1)) - c0;
1630                    }
1631
1632                    //SrcPtr[-inc] = (uint8)IClip(0, 255, L0 + dif);
1633                    //SrcPtr[0]    = (uint8)IClip(0, 255, R0 - dif);
1634                    L0 += dif;
1635                    R0 -= dif;
1636                    if ((uint)L0 > 255)
1637                    {
1638                        tmp = ~(L0 >> 31);
1639                        L0 = tmp & 255;
1640                    }
1641                    if ((uint)R0 > 255)
1642                    {
1643                        tmp = ~(R0 >> 31);
1644                        R0 = tmp & 255;
1645                    }
1646                    SrcPtr[0] = R0;
1647                    SrcPtr[-pitch] = L0;
1648                }
1649            }
1650
1651            pel ++;
1652            SrcPtr ++; // Increment to next set of pixel
1653
1654        } /* end of: if((Strng = Strength[pel >> 2])) */
1655        else
1656        {
1657            pel += 3;
1658            SrcPtr += 2;
1659        }
1660
1661    } /* end of: for(pel=0; pel<16; pel++) */
1662}
1663
1664
1665
1666
1667