159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* ------------------------------------------------------------------
259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Copyright (C) 1998-2009 PacketVideo
359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong *
459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Licensed under the Apache License, Version 2.0 (the "License");
559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * you may not use this file except in compliance with the License.
659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * You may obtain a copy of the License at
759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong *
859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong *      http://www.apache.org/licenses/LICENSE-2.0
959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong *
1059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Unless required by applicable law or agreed to in writing, software
1159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * distributed under the License is distributed on an "AS IS" BASIS,
1259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
1359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * express or implied.
1459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * See the License for the specific language governing permissions
1559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * and limitations under the License.
1659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * -------------------------------------------------------------------
1759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong */
1859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*********************************************************************************/
1959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*  Filename: sad_mb_offset.h                                                       */
2059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*  Description: Implementation for in-line functions used in dct.cpp           */
2159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*  Modified:                                                                   */
2259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*********************************************************************************/
2359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
2459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if !defined(PV_ARM_GCC_V4) && !defined(PV_ARM_GCC_V5) /* ARM GNU COMPILER  */
2559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
2659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3)
2759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset3(UChar *ref, UChar *blk, Int lx, Int dmin)
2859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2)
2959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset2(UChar *ref, UChar *blk, Int lx, Int dmin)
3059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1)
3159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset1(UChar *ref, UChar *blk, Int lx, Int dmin)
3259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif
3359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
3459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
3559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
3659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    //  x5 = (x4<<8) - x4;
3759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 = x5 = 0;
3859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x6 = 0xFFFF00FF;
3959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x9 = 0x80808080; /* const. */
4059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    ref -= NUMBER; /* bic ref, ref, #3 */
4159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    ref -= lx;
4259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 16;
4359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x8 = 16;
4459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
4559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3)
4659f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD3:
4759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2)
4859f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD2:
4959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1)
5059f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD1:
5159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif
5259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /****** process 8 pixels ******/
5359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = *((uint32*)(ref += lx)); /* D C B A */
5459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = *((uint32*)(ref + 4));    /* H G F E */
5559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x12 = *((uint32*)(ref + 8));    /* L K J I */
5659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
5759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = ((uint32)x10 >> SHIFT); /* 0 0 0 D */
5859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = x10 | (x11 << (32 - SHIFT));        /* G F E D */
5959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = ((uint32)x11 >> SHIFT); /* 0 0 0 H */
6059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = x11 | (x12 << (32 - SHIFT));        /* K J I H */
6159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
6259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x12 = *((uint32*)(blk += 16));
6359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x14 = *((uint32*)(blk + 4));
6459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
6559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* process x11 & x14 */
6659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = sad_4pixel(x11, x14, x9);
6759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
6859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* process x12 & x10 */
6959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = sad_4pixel(x10, x12, x9);
7059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
7159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 = x5 + x10; /* accumulate low bytes */
7259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
7359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 = x4 + ((uint32)x10 >> 8);  /* accumulate high bytes */
7459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 = x5 + x11;  /* accumulate low bytes */
7559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
7659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
7759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
7859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /****** process 8 pixels ******/
7959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = *((uint32*)(ref + 8)); /* D C B A */
8059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = *((uint32*)(ref + 12));   /* H G F E */
8159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x12 = *((uint32*)(ref + 16));   /* L K J I */
8259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
8359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = ((uint32)x10 >> SHIFT); /* mvn x10, x10, lsr #24  = 0xFF 0xFF 0xFF ~D */
8459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = x10 | (x11 << (32 - SHIFT));        /* bic x10, x10, x11, lsl #8 = ~G ~F ~E ~D */
8559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = ((uint32)x11 >> SHIFT); /* 0xFF 0xFF 0xFF ~H */
8659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = x11 | (x12 << (32 - SHIFT));        /* ~K ~J ~I ~H */
8759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
8859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x12 = *((uint32*)(blk + 8));
8959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x14 = *((uint32*)(blk + 12));
9059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
9159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* process x11 & x14 */
9259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = sad_4pixel(x11, x14, x9);
9359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
9459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* process x12 & x10 */
9559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = sad_4pixel(x10, x12, x9);
9659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
9759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 = x5 + x10; /* accumulate low bytes */
9859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
9959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 = x4 + ((uint32)x10 >> 8);  /* accumulate high bytes */
10059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 = x5 + x11;  /* accumulate low bytes */
10159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
10259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
10359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
10459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /****************/
10559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = x5 - (x4 << 8); /* extract low bytes */
10659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = x10 + x4;     /* add with high bytes */
10759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = x10 + (x10 << 16); /* add with lower half word */
10859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
10959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    if (((uint32)x10 >> 16) <= (uint32)dmin) /* compare with dmin */
11059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
11159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        if (--x8)
11259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        {
11359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3)
11459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            goto         LOOP_SAD3;
11559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2)
11659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            goto         LOOP_SAD2;
11759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1)
11859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            goto         LOOP_SAD1;
11959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif
12059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        }
12159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
12259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
12359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
12459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ((uint32)x10 >> 16);
12559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
12659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
12759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif defined(__CC_ARM)  /* only work with arm v5 */
12859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
12959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3)
13059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset3(UChar *ref, UChar *blk, Int lx, Int dmin, int32 x8)
13159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2)
13259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset2(UChar *ref, UChar *blk, Int lx, Int dmin, int32 x8)
13359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1)
13459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset1(UChar *ref, UChar *blk, Int lx, Int dmin, int32 x8)
13559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif
13659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
13759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x4, x5, x6, x9, x10, x11, x12, x14;
13859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
13959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x9 = 0x80808080; /* const. */
14059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 = x5 = 0;
14159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
14259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    __asm{
14359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        MVN      x6, #0xff0000;
14459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        BIC      ref, ref, #3;
14559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
14659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3)
14759f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD3:
14859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2)
14959f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD2:
15059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1)
15159f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD1:
15259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif
15359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
15459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /****** process 8 pixels ******/
15559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = *((int32*)(ref + 12));
15659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x12 = *((int32*)(ref + 16));
15759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = *((int32*)(ref + 8));
15859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x14 = *((int32*)(blk + 12));
15959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
16059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    __asm{
16159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        MVN      x10, x10, lsr #SHIFT;
16259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        BIC      x10, x10, x11, lsl #(32-SHIFT);
16359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        MVN      x11, x11, lsr #SHIFT;
16459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        BIC      x11, x11, x12, lsl #(32-SHIFT);
16559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
16659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        LDR      x12, [blk, #8];
16759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
16859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
16959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* process x11 & x14 */
17059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = sad_4pixelN(x11, x14, x9);
17159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
17259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* process x12 & x10 */
17359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = sad_4pixelN(x10, x12, x9);
17459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
17559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    sum_accumulate;
17659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
17759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    __asm{
17859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /****** process 8 pixels ******/
17959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        LDR      x11, [ref, #4];
18059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        LDR      x12, [ref, #8];
18159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        LDR  x10, [ref], lx ;
18259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        LDR  x14, [blk, #4];
18359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
18459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        MVN      x10, x10, lsr #SHIFT;
18559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        BIC      x10, x10, x11, lsl #(32-SHIFT);
18659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        MVN      x11, x11, lsr #SHIFT;
18759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        BIC      x11, x11, x12, lsl #(32-SHIFT);
18859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
18959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        LDR      x12, [blk], #16;
19059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
19159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
19259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* process x11 & x14 */
19359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = sad_4pixelN(x11, x14, x9);
19459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
19559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* process x12 & x10 */
19659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = sad_4pixelN(x10, x12, x9);
19759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
19859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    sum_accumulate;
19959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
20059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /****************/
20159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = x5 - (x4 << 8); /* extract low bytes */
20259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = x10 + x4;     /* add with high bytes */
20359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = x10 + (x10 << 16); /* add with lower half word */
20459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
20559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    __asm{
20659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        RSBS     x11, dmin, x10, lsr #16
20759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADDLSS   x8, x8, #INC_X8
20859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3)
20959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        BLS      LOOP_SAD3;
21059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2)
21159f566c4ec3dfc097ad8163523e522280b27e5c3James DongBLS      LOOP_SAD2;
21259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1)
21359f566c4ec3dfc097ad8163523e522280b27e5c3James DongBLS      LOOP_SAD1;
21459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif
21559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
21659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
21759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ((uint32)x10 >> 16);
21859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
21959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
22059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif ( defined(PV_ARM_GCC_V5) || defined(PV_ARM_GCC_V4) ) /* ARM GNU COMPILER  */
22159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
22259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3)
22359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset3(UChar *ref, UChar *blk, Int lx, Int dmin)
22459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2)
22559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset2(UChar *ref, UChar *blk, Int lx, Int dmin)
22659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1)
22759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset1(UChar *ref, UChar *blk, Int lx, Int dmin)
22859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif
22959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
23059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
23159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
23259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    //  x5 = (x4<<8) - x4;
23359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 = x5 = 0;
23459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x6 = 0xFFFF00FF;
23559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x9 = 0x80808080; /* const. */
23659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    ref -= NUMBER; /* bic ref, ref, #3 */
23759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    ref -= lx;
23859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x8 = 16;
23959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
24059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3)
24159f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD3:
24259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2)
24359f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD2:
24459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1)
24559f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD1:
24659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif
24759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /****** process 8 pixels ******/
24859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = *((uint32*)(ref += lx)); /* D C B A */
24959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = *((uint32*)(ref + 4));    /* H G F E */
25059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x12 = *((uint32*)(ref + 8));    /* L K J I */
25159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
25259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 shift = SHIFT;
25359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 shift2 = 32 - SHIFT;
25459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    asm volatile("ldr  %3, [%4, #4]\n\t"
25559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                 "mvn  %0, %0, lsr %5\n\t"
25659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                 "bic  %0, %0, %1, lsl %6\n\t"
25759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                 "mvn  %1, %1, lsr %5\n\t"
25859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                 "bic  %1, %1, %2, lsl %6\n\t"
25959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                 "ldr  %2, [%4, #8]"
26059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong             : "+r"(x10), "+r"(x11), "+r"(x12), "=r"(x14)
26159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                         : "r"(blk), "r"(shift), "r"(shift2));
26259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
26359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* process x11 & x14 */
26459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = sad_4pixel(x11, x14, x9);
26559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
26659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* process x12 & x10 */
26759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = sad_4pixel(x10, x12, x9);
26859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
26959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    sum_accumulate;
27059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
27159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /****** process 8 pixels ******/
27259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = *((uint32*)(ref + 8)); /* D C B A */
27359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = *((uint32*)(ref + 12));   /* H G F E */
27459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x12 = *((uint32*)(ref + 16));   /* L K J I */
27559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
27659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    asm volatile("ldr  %3, [%4, #4]\n\t"
27759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                 "mvn  %0, %0, lsr %5\n\t"
27859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                 "bic  %0, %0, %1, lsl %6\n\t"
27959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                 "mvn  %1, %1, lsr %5\n\t"
28059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                 "bic  %1, %1, %2, lsl %6\n\t"
28159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                 "ldr  %2, [%4, #8]"
28259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong             : "+r"(x10), "+r"(x11), "+r"(x12), "=r"(x14)
28359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                         : "r"(blk), "r"(shift), "r"(shift2));
28459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
28559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* process x11 & x14 */
28659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x11 = sad_4pixel(x11, x14, x9);
28759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
28859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* process x12 & x10 */
28959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = sad_4pixel(x10, x12, x9);
29059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
29159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    sum_accumulate;
29259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
29359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /****************/
29459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = x5 - (x4 << 8); /* extract low bytes */
29559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = x10 + x4;     /* add with high bytes */
29659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x10 = x10 + (x10 << 16); /* add with lower half word */
29759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
29859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    if (((uint32)x10 >> 16) <= (uint32)dmin) /* compare with dmin */
29959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
30059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        if (--x8)
30159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        {
30259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3)
30359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            goto         LOOP_SAD3;
30459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2)
30559f566c4ec3dfc097ad8163523e522280b27e5c3James Donggoto         LOOP_SAD2;
30659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1)
30759f566c4ec3dfc097ad8163523e522280b27e5c3James Donggoto         LOOP_SAD1;
30859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif
30959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        }
31059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
31159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
31259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
31359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ((uint32)x10 >> 16);
31459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
31559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
31659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif
31759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
318