129a84457aed4c45bc900998b5e11c03023264208James Dong/* ------------------------------------------------------------------
229a84457aed4c45bc900998b5e11c03023264208James Dong * Copyright (C) 1998-2009 PacketVideo
329a84457aed4c45bc900998b5e11c03023264208James Dong *
429a84457aed4c45bc900998b5e11c03023264208James Dong * Licensed under the Apache License, Version 2.0 (the "License");
529a84457aed4c45bc900998b5e11c03023264208James Dong * you may not use this file except in compliance with the License.
629a84457aed4c45bc900998b5e11c03023264208James Dong * You may obtain a copy of the License at
729a84457aed4c45bc900998b5e11c03023264208James Dong *
829a84457aed4c45bc900998b5e11c03023264208James Dong *      http://www.apache.org/licenses/LICENSE-2.0
929a84457aed4c45bc900998b5e11c03023264208James Dong *
1029a84457aed4c45bc900998b5e11c03023264208James Dong * Unless required by applicable law or agreed to in writing, software
1129a84457aed4c45bc900998b5e11c03023264208James Dong * distributed under the License is distributed on an "AS IS" BASIS,
1229a84457aed4c45bc900998b5e11c03023264208James Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
1329a84457aed4c45bc900998b5e11c03023264208James Dong * express or implied.
1429a84457aed4c45bc900998b5e11c03023264208James Dong * See the License for the specific language governing permissions
1529a84457aed4c45bc900998b5e11c03023264208James Dong * and limitations under the License.
1629a84457aed4c45bc900998b5e11c03023264208James Dong * -------------------------------------------------------------------
1729a84457aed4c45bc900998b5e11c03023264208James Dong */
1829a84457aed4c45bc900998b5e11c03023264208James Dong#ifndef _SAD_INLINE_H_
1929a84457aed4c45bc900998b5e11c03023264208James Dong#define _SAD_INLINE_H_
2029a84457aed4c45bc900998b5e11c03023264208James Dong
2129a84457aed4c45bc900998b5e11c03023264208James Dong#ifdef __cplusplus
2229a84457aed4c45bc900998b5e11c03023264208James Dongextern "C"
2329a84457aed4c45bc900998b5e11c03023264208James Dong{
2429a84457aed4c45bc900998b5e11c03023264208James Dong#endif
2529a84457aed4c45bc900998b5e11c03023264208James Dong
2642d515121f11389df082dd02319904c99dd50cd6Martin Storsjo/* Intentionally not using the gcc asm version, since it is
27f5af6314db25ff3bef9bd2eeba201bc6cc60805dMartin Storsjo * slightly slower than the plain C version on modern GCC versions. */
28f5af6314db25ff3bef9bd2eeba201bc6cc60805dMartin Storsjo#if !defined(__CC_ARM) /* Generic C version */
2929a84457aed4c45bc900998b5e11c03023264208James Dong
3029a84457aed4c45bc900998b5e11c03023264208James Dong    __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
3129a84457aed4c45bc900998b5e11c03023264208James Dong    {
3229a84457aed4c45bc900998b5e11c03023264208James Dong        tmp = tmp - tmp2;
3329a84457aed4c45bc900998b5e11c03023264208James Dong        if (tmp > 0) sad += tmp;
3429a84457aed4c45bc900998b5e11c03023264208James Dong        else sad -= tmp;
3529a84457aed4c45bc900998b5e11c03023264208James Dong
3629a84457aed4c45bc900998b5e11c03023264208James Dong        return sad;
3729a84457aed4c45bc900998b5e11c03023264208James Dong    }
3829a84457aed4c45bc900998b5e11c03023264208James Dong
3929a84457aed4c45bc900998b5e11c03023264208James Dong    __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
4029a84457aed4c45bc900998b5e11c03023264208James Dong    {
4129a84457aed4c45bc900998b5e11c03023264208James Dong        int32 x7;
4229a84457aed4c45bc900998b5e11c03023264208James Dong
4329a84457aed4c45bc900998b5e11c03023264208James Dong        x7 = src2 ^ src1;       /* check odd/even combination */
4429a84457aed4c45bc900998b5e11c03023264208James Dong        if ((uint32)src2 >= (uint32)src1)
4529a84457aed4c45bc900998b5e11c03023264208James Dong        {
4629a84457aed4c45bc900998b5e11c03023264208James Dong            src1 = src2 - src1;     /* subs */
4729a84457aed4c45bc900998b5e11c03023264208James Dong        }
4829a84457aed4c45bc900998b5e11c03023264208James Dong        else
4929a84457aed4c45bc900998b5e11c03023264208James Dong        {
5029a84457aed4c45bc900998b5e11c03023264208James Dong            src1 = src1 - src2;
5129a84457aed4c45bc900998b5e11c03023264208James Dong        }
5229a84457aed4c45bc900998b5e11c03023264208James Dong        x7 = x7 ^ src1;     /* only odd bytes need to add carry */
5329a84457aed4c45bc900998b5e11c03023264208James Dong        x7 = mask & ((uint32)x7 >> 1);
5429a84457aed4c45bc900998b5e11c03023264208James Dong        x7 = (x7 << 8) - x7;
5529a84457aed4c45bc900998b5e11c03023264208James Dong        src1 = src1 + (x7 >> 7); /* add 0xFF to the negative byte, add back carry */
5629a84457aed4c45bc900998b5e11c03023264208James Dong        src1 = src1 ^(x7 >> 7);   /* take absolute value of negative byte */
5729a84457aed4c45bc900998b5e11c03023264208James Dong
5829a84457aed4c45bc900998b5e11c03023264208James Dong        return src1;
5929a84457aed4c45bc900998b5e11c03023264208James Dong    }
6029a84457aed4c45bc900998b5e11c03023264208James Dong
6129a84457aed4c45bc900998b5e11c03023264208James Dong#define NUMBER 3
6229a84457aed4c45bc900998b5e11c03023264208James Dong#define SHIFT 24
6329a84457aed4c45bc900998b5e11c03023264208James Dong
6429a84457aed4c45bc900998b5e11c03023264208James Dong#include "sad_mb_offset.h"
6529a84457aed4c45bc900998b5e11c03023264208James Dong
6629a84457aed4c45bc900998b5e11c03023264208James Dong#undef NUMBER
6729a84457aed4c45bc900998b5e11c03023264208James Dong#define NUMBER 2
6829a84457aed4c45bc900998b5e11c03023264208James Dong#undef SHIFT
6929a84457aed4c45bc900998b5e11c03023264208James Dong#define SHIFT 16
7029a84457aed4c45bc900998b5e11c03023264208James Dong#include "sad_mb_offset.h"
7129a84457aed4c45bc900998b5e11c03023264208James Dong
7229a84457aed4c45bc900998b5e11c03023264208James Dong#undef NUMBER
7329a84457aed4c45bc900998b5e11c03023264208James Dong#define NUMBER 1
7429a84457aed4c45bc900998b5e11c03023264208James Dong#undef SHIFT
7529a84457aed4c45bc900998b5e11c03023264208James Dong#define SHIFT 8
7629a84457aed4c45bc900998b5e11c03023264208James Dong#include "sad_mb_offset.h"
7729a84457aed4c45bc900998b5e11c03023264208James Dong
7829a84457aed4c45bc900998b5e11c03023264208James Dong
7929a84457aed4c45bc900998b5e11c03023264208James Dong    __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx)
8029a84457aed4c45bc900998b5e11c03023264208James Dong    {
8129a84457aed4c45bc900998b5e11c03023264208James Dong        int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
8229a84457aed4c45bc900998b5e11c03023264208James Dong
8329a84457aed4c45bc900998b5e11c03023264208James Dong        x9 = 0x80808080; /* const. */
8429a84457aed4c45bc900998b5e11c03023264208James Dong
854b43b41eaf8c4c80f66185e13620cf94b8b2ef5bMartin Storsjo        x8 = (intptr_t)ref & 0x3;
8629a84457aed4c45bc900998b5e11c03023264208James Dong        if (x8 == 3)
8729a84457aed4c45bc900998b5e11c03023264208James Dong            goto SadMBOffset3;
8829a84457aed4c45bc900998b5e11c03023264208James Dong        if (x8 == 2)
8929a84457aed4c45bc900998b5e11c03023264208James Dong            goto SadMBOffset2;
9029a84457aed4c45bc900998b5e11c03023264208James Dong        if (x8 == 1)
9129a84457aed4c45bc900998b5e11c03023264208James Dong            goto SadMBOffset1;
9229a84457aed4c45bc900998b5e11c03023264208James Dong
9329a84457aed4c45bc900998b5e11c03023264208James Dong//  x5 = (x4<<8)-x4; /* x5 = x4*255; */
9429a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x5 = 0;
9529a84457aed4c45bc900998b5e11c03023264208James Dong
9629a84457aed4c45bc900998b5e11c03023264208James Dong        x6 = 0xFFFF00FF;
9729a84457aed4c45bc900998b5e11c03023264208James Dong
9829a84457aed4c45bc900998b5e11c03023264208James Dong        ref -= lx;
9929a84457aed4c45bc900998b5e11c03023264208James Dong        blk -= 16;
10029a84457aed4c45bc900998b5e11c03023264208James Dong
10129a84457aed4c45bc900998b5e11c03023264208James Dong        x8 = 16;
10229a84457aed4c45bc900998b5e11c03023264208James Dong
10329a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD0:
10429a84457aed4c45bc900998b5e11c03023264208James Dong        /****** process 8 pixels ******/
10529a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = *((uint32*)(ref += lx));
10629a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = *((uint32*)(ref + 4));
10729a84457aed4c45bc900998b5e11c03023264208James Dong        x12 = *((uint32*)(blk += 16));
10829a84457aed4c45bc900998b5e11c03023264208James Dong        x14 = *((uint32*)(blk + 4));
10929a84457aed4c45bc900998b5e11c03023264208James Dong
11029a84457aed4c45bc900998b5e11c03023264208James Dong        /* process x11 & x14 */
11129a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = sad_4pixel(x11, x14, x9);
11229a84457aed4c45bc900998b5e11c03023264208James Dong
11329a84457aed4c45bc900998b5e11c03023264208James Dong        /* process x12 & x10 */
11429a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = sad_4pixel(x10, x12, x9);
11529a84457aed4c45bc900998b5e11c03023264208James Dong
11629a84457aed4c45bc900998b5e11c03023264208James Dong        x5 = x5 + x10; /* accumulate low bytes */
11729a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
11829a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x4 + ((uint32)x10 >> 8);  /* accumulate high bytes */
11929a84457aed4c45bc900998b5e11c03023264208James Dong        x5 = x5 + x11;  /* accumulate low bytes */
12029a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
12129a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
12229a84457aed4c45bc900998b5e11c03023264208James Dong
12329a84457aed4c45bc900998b5e11c03023264208James Dong        /****** process 8 pixels ******/
12429a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = *((uint32*)(ref + 8));
12529a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = *((uint32*)(ref + 12));
12629a84457aed4c45bc900998b5e11c03023264208James Dong        x12 = *((uint32*)(blk + 8));
12729a84457aed4c45bc900998b5e11c03023264208James Dong        x14 = *((uint32*)(blk + 12));
12829a84457aed4c45bc900998b5e11c03023264208James Dong
12929a84457aed4c45bc900998b5e11c03023264208James Dong        /* process x11 & x14 */
13029a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = sad_4pixel(x11, x14, x9);
13129a84457aed4c45bc900998b5e11c03023264208James Dong
13229a84457aed4c45bc900998b5e11c03023264208James Dong        /* process x12 & x10 */
13329a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = sad_4pixel(x10, x12, x9);
13429a84457aed4c45bc900998b5e11c03023264208James Dong
13529a84457aed4c45bc900998b5e11c03023264208James Dong        x5 = x5 + x10;  /* accumulate low bytes */
13629a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
13729a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
13829a84457aed4c45bc900998b5e11c03023264208James Dong        x5 = x5 + x11;  /* accumulate low bytes */
13929a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
14029a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
14129a84457aed4c45bc900998b5e11c03023264208James Dong
14229a84457aed4c45bc900998b5e11c03023264208James Dong        /****************/
14329a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x5 - (x4 << 8); /* extract low bytes */
14429a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x10 + x4;     /* add with high bytes */
14529a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x10 + (x10 << 16); /* add with lower half word */
14629a84457aed4c45bc900998b5e11c03023264208James Dong
14729a84457aed4c45bc900998b5e11c03023264208James Dong        if ((int)((uint32)x10 >> 16) <= dmin) /* compare with dmin */
14829a84457aed4c45bc900998b5e11c03023264208James Dong        {
14929a84457aed4c45bc900998b5e11c03023264208James Dong            if (--x8)
15029a84457aed4c45bc900998b5e11c03023264208James Dong            {
15129a84457aed4c45bc900998b5e11c03023264208James Dong                goto LOOP_SAD0;
15229a84457aed4c45bc900998b5e11c03023264208James Dong            }
15329a84457aed4c45bc900998b5e11c03023264208James Dong
15429a84457aed4c45bc900998b5e11c03023264208James Dong        }
15529a84457aed4c45bc900998b5e11c03023264208James Dong
15629a84457aed4c45bc900998b5e11c03023264208James Dong        return ((uint32)x10 >> 16);
15729a84457aed4c45bc900998b5e11c03023264208James Dong
15829a84457aed4c45bc900998b5e11c03023264208James DongSadMBOffset3:
15929a84457aed4c45bc900998b5e11c03023264208James Dong
16029a84457aed4c45bc900998b5e11c03023264208James Dong        return sad_mb_offset3(ref, blk, lx, dmin);
16129a84457aed4c45bc900998b5e11c03023264208James Dong
16229a84457aed4c45bc900998b5e11c03023264208James DongSadMBOffset2:
16329a84457aed4c45bc900998b5e11c03023264208James Dong
16429a84457aed4c45bc900998b5e11c03023264208James Dong        return sad_mb_offset2(ref, blk, lx, dmin);
16529a84457aed4c45bc900998b5e11c03023264208James Dong
16629a84457aed4c45bc900998b5e11c03023264208James DongSadMBOffset1:
16729a84457aed4c45bc900998b5e11c03023264208James Dong
16829a84457aed4c45bc900998b5e11c03023264208James Dong        return sad_mb_offset1(ref, blk, lx, dmin);
16929a84457aed4c45bc900998b5e11c03023264208James Dong
17029a84457aed4c45bc900998b5e11c03023264208James Dong    }
17129a84457aed4c45bc900998b5e11c03023264208James Dong
17229a84457aed4c45bc900998b5e11c03023264208James Dong#elif defined(__CC_ARM)  /* only work with arm v5 */
17329a84457aed4c45bc900998b5e11c03023264208James Dong
17429a84457aed4c45bc900998b5e11c03023264208James Dong    __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
17529a84457aed4c45bc900998b5e11c03023264208James Dong    {
17629a84457aed4c45bc900998b5e11c03023264208James Dong        __asm
17729a84457aed4c45bc900998b5e11c03023264208James Dong        {
17829a84457aed4c45bc900998b5e11c03023264208James Dong            rsbs    tmp, tmp, tmp2 ;
17929a84457aed4c45bc900998b5e11c03023264208James Dong            rsbmi   tmp, tmp, #0 ;
18029a84457aed4c45bc900998b5e11c03023264208James Dong            add     sad, sad, tmp ;
18129a84457aed4c45bc900998b5e11c03023264208James Dong        }
18229a84457aed4c45bc900998b5e11c03023264208James Dong
18329a84457aed4c45bc900998b5e11c03023264208James Dong        return sad;
18429a84457aed4c45bc900998b5e11c03023264208James Dong    }
18529a84457aed4c45bc900998b5e11c03023264208James Dong
18629a84457aed4c45bc900998b5e11c03023264208James Dong    __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
18729a84457aed4c45bc900998b5e11c03023264208James Dong    {
18829a84457aed4c45bc900998b5e11c03023264208James Dong        int32 x7;
18929a84457aed4c45bc900998b5e11c03023264208James Dong
19029a84457aed4c45bc900998b5e11c03023264208James Dong        __asm
19129a84457aed4c45bc900998b5e11c03023264208James Dong        {
19229a84457aed4c45bc900998b5e11c03023264208James Dong            EOR     x7, src2, src1;     /* check odd/even combination */
19329a84457aed4c45bc900998b5e11c03023264208James Dong            SUBS    src1, src2, src1;
19429a84457aed4c45bc900998b5e11c03023264208James Dong            EOR     x7, x7, src1;
19529a84457aed4c45bc900998b5e11c03023264208James Dong            AND     x7, mask, x7, lsr #1;
19629a84457aed4c45bc900998b5e11c03023264208James Dong            ORRCC   x7, x7, #0x80000000;
19729a84457aed4c45bc900998b5e11c03023264208James Dong            RSB     x7, x7, x7, lsl #8;
19829a84457aed4c45bc900998b5e11c03023264208James Dong            ADD     src1, src1, x7, asr #7;   /* add 0xFF to the negative byte, add back carry */
19929a84457aed4c45bc900998b5e11c03023264208James Dong            EOR     src1, src1, x7, asr #7;   /* take absolute value of negative byte */
20029a84457aed4c45bc900998b5e11c03023264208James Dong        }
20129a84457aed4c45bc900998b5e11c03023264208James Dong
20229a84457aed4c45bc900998b5e11c03023264208James Dong        return src1;
20329a84457aed4c45bc900998b5e11c03023264208James Dong    }
20429a84457aed4c45bc900998b5e11c03023264208James Dong
20529a84457aed4c45bc900998b5e11c03023264208James Dong    __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask)
20629a84457aed4c45bc900998b5e11c03023264208James Dong    {
20729a84457aed4c45bc900998b5e11c03023264208James Dong        int32 x7;
20829a84457aed4c45bc900998b5e11c03023264208James Dong
20929a84457aed4c45bc900998b5e11c03023264208James Dong        __asm
21029a84457aed4c45bc900998b5e11c03023264208James Dong        {
21129a84457aed4c45bc900998b5e11c03023264208James Dong            EOR      x7, src2, src1;        /* check odd/even combination */
21229a84457aed4c45bc900998b5e11c03023264208James Dong            ADDS     src1, src2, src1;
21329a84457aed4c45bc900998b5e11c03023264208James Dong            EOR      x7, x7, src1;      /* only odd bytes need to add carry */
21429a84457aed4c45bc900998b5e11c03023264208James Dong            ANDS     x7, mask, x7, rrx;
21529a84457aed4c45bc900998b5e11c03023264208James Dong            RSB      x7, x7, x7, lsl #8;
21629a84457aed4c45bc900998b5e11c03023264208James Dong            SUB      src1, src1, x7, asr #7;  /* add 0xFF to the negative byte, add back carry */
21729a84457aed4c45bc900998b5e11c03023264208James Dong            EOR      src1, src1, x7, asr #7; /* take absolute value of negative byte */
21829a84457aed4c45bc900998b5e11c03023264208James Dong        }
21929a84457aed4c45bc900998b5e11c03023264208James Dong
22029a84457aed4c45bc900998b5e11c03023264208James Dong        return src1;
22129a84457aed4c45bc900998b5e11c03023264208James Dong    }
22229a84457aed4c45bc900998b5e11c03023264208James Dong
22329a84457aed4c45bc900998b5e11c03023264208James Dong#define sum_accumulate  __asm{      SBC      x5, x5, x10;  /* accumulate low bytes */ \
22429a84457aed4c45bc900998b5e11c03023264208James Dong        BIC      x10, x6, x10;   /* x10 & 0xFF00FF00 */ \
22529a84457aed4c45bc900998b5e11c03023264208James Dong        ADD      x4, x4, x10,lsr #8;   /* accumulate high bytes */ \
22629a84457aed4c45bc900998b5e11c03023264208James Dong        SBC      x5, x5, x11;    /* accumulate low bytes */ \
22729a84457aed4c45bc900998b5e11c03023264208James Dong        BIC      x11, x6, x11;   /* x11 & 0xFF00FF00 */ \
22829a84457aed4c45bc900998b5e11c03023264208James Dong        ADD      x4, x4, x11,lsr #8; } /* accumulate high bytes */
22929a84457aed4c45bc900998b5e11c03023264208James Dong
23029a84457aed4c45bc900998b5e11c03023264208James Dong
23129a84457aed4c45bc900998b5e11c03023264208James Dong#define NUMBER 3
23229a84457aed4c45bc900998b5e11c03023264208James Dong#define SHIFT 24
23329a84457aed4c45bc900998b5e11c03023264208James Dong#define INC_X8 0x08000001
23429a84457aed4c45bc900998b5e11c03023264208James Dong
23529a84457aed4c45bc900998b5e11c03023264208James Dong#include "sad_mb_offset.h"
23629a84457aed4c45bc900998b5e11c03023264208James Dong
23729a84457aed4c45bc900998b5e11c03023264208James Dong#undef NUMBER
23829a84457aed4c45bc900998b5e11c03023264208James Dong#define NUMBER 2
23929a84457aed4c45bc900998b5e11c03023264208James Dong#undef SHIFT
24029a84457aed4c45bc900998b5e11c03023264208James Dong#define SHIFT 16
24129a84457aed4c45bc900998b5e11c03023264208James Dong#undef INC_X8
24229a84457aed4c45bc900998b5e11c03023264208James Dong#define INC_X8 0x10000001
24329a84457aed4c45bc900998b5e11c03023264208James Dong#include "sad_mb_offset.h"
24429a84457aed4c45bc900998b5e11c03023264208James Dong
24529a84457aed4c45bc900998b5e11c03023264208James Dong#undef NUMBER
24629a84457aed4c45bc900998b5e11c03023264208James Dong#define NUMBER 1
24729a84457aed4c45bc900998b5e11c03023264208James Dong#undef SHIFT
24829a84457aed4c45bc900998b5e11c03023264208James Dong#define SHIFT 8
24929a84457aed4c45bc900998b5e11c03023264208James Dong#undef INC_X8
25029a84457aed4c45bc900998b5e11c03023264208James Dong#define INC_X8 0x08000001
25129a84457aed4c45bc900998b5e11c03023264208James Dong#include "sad_mb_offset.h"
25229a84457aed4c45bc900998b5e11c03023264208James Dong
25329a84457aed4c45bc900998b5e11c03023264208James Dong
25429a84457aed4c45bc900998b5e11c03023264208James Dong    __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx)
25529a84457aed4c45bc900998b5e11c03023264208James Dong    {
25629a84457aed4c45bc900998b5e11c03023264208James Dong        int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
25729a84457aed4c45bc900998b5e11c03023264208James Dong
25829a84457aed4c45bc900998b5e11c03023264208James Dong        x9 = 0x80808080; /* const. */
25929a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x5 = 0;
26029a84457aed4c45bc900998b5e11c03023264208James Dong
26129a84457aed4c45bc900998b5e11c03023264208James Dong        __asm
26229a84457aed4c45bc900998b5e11c03023264208James Dong        {
26329a84457aed4c45bc900998b5e11c03023264208James Dong            MOVS    x8, ref, lsl #31 ;
26429a84457aed4c45bc900998b5e11c03023264208James Dong            BHI     SadMBOffset3;
26529a84457aed4c45bc900998b5e11c03023264208James Dong            BCS     SadMBOffset2;
26629a84457aed4c45bc900998b5e11c03023264208James Dong            BMI     SadMBOffset1;
26729a84457aed4c45bc900998b5e11c03023264208James Dong
26829a84457aed4c45bc900998b5e11c03023264208James Dong            MVN     x6, #0xFF00;
26929a84457aed4c45bc900998b5e11c03023264208James Dong        }
27029a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD0:
27129a84457aed4c45bc900998b5e11c03023264208James Dong        /****** process 8 pixels ******/
27229a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = *((int32*)(ref + 12));
27329a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = *((int32*)(ref + 8));
27429a84457aed4c45bc900998b5e11c03023264208James Dong        x14 = *((int32*)(blk + 12));
27529a84457aed4c45bc900998b5e11c03023264208James Dong        x12 = *((int32*)(blk + 8));
27629a84457aed4c45bc900998b5e11c03023264208James Dong
27729a84457aed4c45bc900998b5e11c03023264208James Dong        /* process x11 & x14 */
27829a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = sad_4pixel(x11, x14, x9);
27929a84457aed4c45bc900998b5e11c03023264208James Dong
28029a84457aed4c45bc900998b5e11c03023264208James Dong        /* process x12 & x10 */
28129a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = sad_4pixel(x10, x12, x9);
28229a84457aed4c45bc900998b5e11c03023264208James Dong
28329a84457aed4c45bc900998b5e11c03023264208James Dong        x5 = x5 + x10;  /* accumulate low bytes */
28429a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
28529a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
28629a84457aed4c45bc900998b5e11c03023264208James Dong        x5 = x5 + x11;  /* accumulate low bytes */
28729a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
28829a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
28929a84457aed4c45bc900998b5e11c03023264208James Dong
29029a84457aed4c45bc900998b5e11c03023264208James Dong        __asm
29129a84457aed4c45bc900998b5e11c03023264208James Dong        {
29229a84457aed4c45bc900998b5e11c03023264208James Dong            /****** process 8 pixels ******/
29329a84457aed4c45bc900998b5e11c03023264208James Dong            LDR     x11, [ref, #4];
29429a84457aed4c45bc900998b5e11c03023264208James Dong            LDR     x10, [ref], lx ;
29529a84457aed4c45bc900998b5e11c03023264208James Dong            LDR     x14, [blk, #4];
29629a84457aed4c45bc900998b5e11c03023264208James Dong            LDR     x12, [blk], #16 ;
29729a84457aed4c45bc900998b5e11c03023264208James Dong        }
29829a84457aed4c45bc900998b5e11c03023264208James Dong
29929a84457aed4c45bc900998b5e11c03023264208James Dong        /* process x11 & x14 */
30029a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = sad_4pixel(x11, x14, x9);
30129a84457aed4c45bc900998b5e11c03023264208James Dong
30229a84457aed4c45bc900998b5e11c03023264208James Dong        /* process x12 & x10 */
30329a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = sad_4pixel(x10, x12, x9);
30429a84457aed4c45bc900998b5e11c03023264208James Dong
30529a84457aed4c45bc900998b5e11c03023264208James Dong        x5 = x5 + x10;  /* accumulate low bytes */
30629a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
30729a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
30829a84457aed4c45bc900998b5e11c03023264208James Dong        x5 = x5 + x11;  /* accumulate low bytes */
30929a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
31029a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
31129a84457aed4c45bc900998b5e11c03023264208James Dong
31229a84457aed4c45bc900998b5e11c03023264208James Dong        /****************/
31329a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x5 - (x4 << 8); /* extract low bytes */
31429a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x10 + x4;     /* add with high bytes */
31529a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x10 + (x10 << 16); /* add with lower half word */
31629a84457aed4c45bc900998b5e11c03023264208James Dong
31729a84457aed4c45bc900998b5e11c03023264208James Dong        __asm
31829a84457aed4c45bc900998b5e11c03023264208James Dong        {
31929a84457aed4c45bc900998b5e11c03023264208James Dong            /****************/
32029a84457aed4c45bc900998b5e11c03023264208James Dong            RSBS    x11, dmin, x10, lsr #16;
32129a84457aed4c45bc900998b5e11c03023264208James Dong            ADDLSS  x8, x8, #0x10000001;
32229a84457aed4c45bc900998b5e11c03023264208James Dong            BLS     LOOP_SAD0;
32329a84457aed4c45bc900998b5e11c03023264208James Dong        }
32429a84457aed4c45bc900998b5e11c03023264208James Dong
32529a84457aed4c45bc900998b5e11c03023264208James Dong        return ((uint32)x10 >> 16);
32629a84457aed4c45bc900998b5e11c03023264208James Dong
32729a84457aed4c45bc900998b5e11c03023264208James DongSadMBOffset3:
32829a84457aed4c45bc900998b5e11c03023264208James Dong
32929a84457aed4c45bc900998b5e11c03023264208James Dong        return sad_mb_offset3(ref, blk, lx, dmin, x8);
33029a84457aed4c45bc900998b5e11c03023264208James Dong
33129a84457aed4c45bc900998b5e11c03023264208James DongSadMBOffset2:
33229a84457aed4c45bc900998b5e11c03023264208James Dong
33329a84457aed4c45bc900998b5e11c03023264208James Dong        return sad_mb_offset2(ref, blk, lx, dmin, x8);
33429a84457aed4c45bc900998b5e11c03023264208James Dong
33529a84457aed4c45bc900998b5e11c03023264208James DongSadMBOffset1:
33629a84457aed4c45bc900998b5e11c03023264208James Dong
33729a84457aed4c45bc900998b5e11c03023264208James Dong        return sad_mb_offset1(ref, blk, lx, dmin, x8);
33829a84457aed4c45bc900998b5e11c03023264208James Dong    }
33929a84457aed4c45bc900998b5e11c03023264208James Dong
34029a84457aed4c45bc900998b5e11c03023264208James Dong
34129a84457aed4c45bc900998b5e11c03023264208James Dong#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER  */
34229a84457aed4c45bc900998b5e11c03023264208James Dong
34329a84457aed4c45bc900998b5e11c03023264208James Dong    __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
34429a84457aed4c45bc900998b5e11c03023264208James Dong    {
345ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo        __asm__ volatile(
346ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "rsbs       %1, %1, %2\n\t"
347ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "rsbmi      %1, %1, #0\n\t"
348ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "add        %0, %0, %1"
3493fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo            : "+r"(sad), "+r"(tmp)
3503fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo            : "r"(tmp2)
351ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo        );
35229a84457aed4c45bc900998b5e11c03023264208James Dong        return sad;
35329a84457aed4c45bc900998b5e11c03023264208James Dong    }
35429a84457aed4c45bc900998b5e11c03023264208James Dong
35529a84457aed4c45bc900998b5e11c03023264208James Dong    __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
35629a84457aed4c45bc900998b5e11c03023264208James Dong    {
35729a84457aed4c45bc900998b5e11c03023264208James Dong        int32 x7;
35829a84457aed4c45bc900998b5e11c03023264208James Dong
359ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo        __asm__ volatile(
360ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "EOR        %1, %2, %0\n\t"
361ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "SUBS       %0, %2, %0\n\t"
362ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "EOR        %1, %1, %0\n\t"
363ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "AND        %1, %3, %1, lsr #1\n\t"
364ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "ORRCC      %1, %1, #0x80000000\n\t"
365ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "RSB        %1, %1, %1, lsl #8\n\t"
366ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "ADD        %0, %0, %1, asr #7\n\t"
367ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "EOR        %0, %0, %1, asr #7"
3683fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo            : "+r"(src1), "=&r"(x7)
369ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            : "r"(src2), "r"(mask)
370ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo        );
37129a84457aed4c45bc900998b5e11c03023264208James Dong
37229a84457aed4c45bc900998b5e11c03023264208James Dong        return src1;
37329a84457aed4c45bc900998b5e11c03023264208James Dong    }
37429a84457aed4c45bc900998b5e11c03023264208James Dong
37529a84457aed4c45bc900998b5e11c03023264208James Dong    __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask)
37629a84457aed4c45bc900998b5e11c03023264208James Dong    {
37729a84457aed4c45bc900998b5e11c03023264208James Dong        int32 x7;
37829a84457aed4c45bc900998b5e11c03023264208James Dong
379ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo        __asm__ volatile(
380ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "EOR        %1, %2, %0\n\t"
381ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "ADDS       %0, %2, %0\n\t"
382ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "EOR        %1, %1, %0\n\t"
383ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "ANDS       %1, %3, %1, rrx\n\t"
384ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "RSB        %1, %1, %1, lsl #8\n\t"
385ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "SUB        %0, %0, %1, asr #7\n\t"
386ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            "EOR        %0, %0, %1, asr #7"
3873fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo            : "+r"(src1), "=&r"(x7)
388ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo            : "r"(src2), "r"(mask)
389ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo        );
39029a84457aed4c45bc900998b5e11c03023264208James Dong
39129a84457aed4c45bc900998b5e11c03023264208James Dong        return src1;
39229a84457aed4c45bc900998b5e11c03023264208James Dong    }
39329a84457aed4c45bc900998b5e11c03023264208James Dong
394ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo#define sum_accumulate  __asm__ volatile(              \
395ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo    "SBC   %0, %0, %1\n\t"                             \
396ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo    "BIC   %1, %4, %1\n\t"                             \
397ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo    "ADD   %2, %2, %1, lsr #8\n\t"                     \
398ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo    "SBC   %0, %0, %3\n\t"                             \
399ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo    "BIC   %3, %4, %3\n\t"                             \
400ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo    "ADD   %2, %2, %3, lsr #8"                         \
4013fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo    : "+r" (x5), "+r" (x10), "+r" (x4), "+r" (x11)     \
402ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo    : "r" (x6)                                         \
403ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo    );
40429a84457aed4c45bc900998b5e11c03023264208James Dong
40529a84457aed4c45bc900998b5e11c03023264208James Dong#define NUMBER 3
40629a84457aed4c45bc900998b5e11c03023264208James Dong#define SHIFT 24
40729a84457aed4c45bc900998b5e11c03023264208James Dong#define INC_X8 0x08000001
40829a84457aed4c45bc900998b5e11c03023264208James Dong
40929a84457aed4c45bc900998b5e11c03023264208James Dong#include "sad_mb_offset.h"
41029a84457aed4c45bc900998b5e11c03023264208James Dong
41129a84457aed4c45bc900998b5e11c03023264208James Dong#undef NUMBER
41229a84457aed4c45bc900998b5e11c03023264208James Dong#define NUMBER 2
41329a84457aed4c45bc900998b5e11c03023264208James Dong#undef SHIFT
41429a84457aed4c45bc900998b5e11c03023264208James Dong#define SHIFT 16
41529a84457aed4c45bc900998b5e11c03023264208James Dong#undef INC_X8
41629a84457aed4c45bc900998b5e11c03023264208James Dong#define INC_X8 0x10000001
41729a84457aed4c45bc900998b5e11c03023264208James Dong#include "sad_mb_offset.h"
41829a84457aed4c45bc900998b5e11c03023264208James Dong
41929a84457aed4c45bc900998b5e11c03023264208James Dong#undef NUMBER
42029a84457aed4c45bc900998b5e11c03023264208James Dong#define NUMBER 1
42129a84457aed4c45bc900998b5e11c03023264208James Dong#undef SHIFT
42229a84457aed4c45bc900998b5e11c03023264208James Dong#define SHIFT 8
42329a84457aed4c45bc900998b5e11c03023264208James Dong#undef INC_X8
42429a84457aed4c45bc900998b5e11c03023264208James Dong#define INC_X8 0x08000001
42529a84457aed4c45bc900998b5e11c03023264208James Dong#include "sad_mb_offset.h"
42629a84457aed4c45bc900998b5e11c03023264208James Dong
42729a84457aed4c45bc900998b5e11c03023264208James Dong
42829a84457aed4c45bc900998b5e11c03023264208James Dong    __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx)
42929a84457aed4c45bc900998b5e11c03023264208James Dong    {
43029a84457aed4c45bc900998b5e11c03023264208James Dong        int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
43129a84457aed4c45bc900998b5e11c03023264208James Dong
43229a84457aed4c45bc900998b5e11c03023264208James Dong        x9 = 0x80808080; /* const. */
43329a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x5 = 0;
43429a84457aed4c45bc900998b5e11c03023264208James Dong
43529a84457aed4c45bc900998b5e11c03023264208James Dong        x8 = (uint32)ref & 0x3;
43629a84457aed4c45bc900998b5e11c03023264208James Dong        if (x8 == 3)
43729a84457aed4c45bc900998b5e11c03023264208James Dong            goto SadMBOffset3;
43829a84457aed4c45bc900998b5e11c03023264208James Dong        if (x8 == 2)
43929a84457aed4c45bc900998b5e11c03023264208James Dong            goto SadMBOffset2;
44029a84457aed4c45bc900998b5e11c03023264208James Dong        if (x8 == 1)
44129a84457aed4c45bc900998b5e11c03023264208James Dong            goto SadMBOffset1;
44229a84457aed4c45bc900998b5e11c03023264208James Dong
44329a84457aed4c45bc900998b5e11c03023264208James Dong        x8 = 16;
44429a84457aed4c45bc900998b5e11c03023264208James Dong///
445ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo        __asm__ volatile("MVN   %0, #0xFF00": "=r"(x6));
44629a84457aed4c45bc900998b5e11c03023264208James Dong
44729a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD0:
44829a84457aed4c45bc900998b5e11c03023264208James Dong        /****** process 8 pixels ******/
44929a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = *((int32*)(ref + 12));
45029a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = *((int32*)(ref + 8));
45129a84457aed4c45bc900998b5e11c03023264208James Dong        x14 = *((int32*)(blk + 12));
45229a84457aed4c45bc900998b5e11c03023264208James Dong        x12 = *((int32*)(blk + 8));
45329a84457aed4c45bc900998b5e11c03023264208James Dong
45429a84457aed4c45bc900998b5e11c03023264208James Dong        /* process x11 & x14 */
45529a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = sad_4pixel(x11, x14, x9);
45629a84457aed4c45bc900998b5e11c03023264208James Dong
45729a84457aed4c45bc900998b5e11c03023264208James Dong        /* process x12 & x10 */
45829a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = sad_4pixel(x10, x12, x9);
45929a84457aed4c45bc900998b5e11c03023264208James Dong
46029a84457aed4c45bc900998b5e11c03023264208James Dong        x5 = x5 + x10;  /* accumulate low bytes */
46129a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
46229a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
46329a84457aed4c45bc900998b5e11c03023264208James Dong        x5 = x5 + x11;  /* accumulate low bytes */
46429a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
46529a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
46629a84457aed4c45bc900998b5e11c03023264208James Dong
46729a84457aed4c45bc900998b5e11c03023264208James Dong        /****** process 8 pixels ******/
46829a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = *((int32*)(ref + 4));
4693fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo        __asm__ volatile("LDR   %0, [%1], %2": "=&r"(x10), "+r"(ref): "r"(lx));
47029a84457aed4c45bc900998b5e11c03023264208James Dong        //x10 = *((int32*)ref); ref+=lx;
47129a84457aed4c45bc900998b5e11c03023264208James Dong        x14 = *((int32*)(blk + 4));
4723fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo        __asm__ volatile("LDR   %0, [%1], #16": "=&r"(x12), "+r"(blk));
47329a84457aed4c45bc900998b5e11c03023264208James Dong
47429a84457aed4c45bc900998b5e11c03023264208James Dong        /* process x11 & x14 */
47529a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = sad_4pixel(x11, x14, x9);
47629a84457aed4c45bc900998b5e11c03023264208James Dong
47729a84457aed4c45bc900998b5e11c03023264208James Dong        /* process x12 & x10 */
47829a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = sad_4pixel(x10, x12, x9);
47929a84457aed4c45bc900998b5e11c03023264208James Dong
48029a84457aed4c45bc900998b5e11c03023264208James Dong        x5 = x5 + x10;  /* accumulate low bytes */
48129a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
48229a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
48329a84457aed4c45bc900998b5e11c03023264208James Dong        x5 = x5 + x11;  /* accumulate low bytes */
48429a84457aed4c45bc900998b5e11c03023264208James Dong        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
48529a84457aed4c45bc900998b5e11c03023264208James Dong        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
48629a84457aed4c45bc900998b5e11c03023264208James Dong
48729a84457aed4c45bc900998b5e11c03023264208James Dong        /****************/
48829a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x5 - (x4 << 8); /* extract low bytes */
48929a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x10 + x4;     /* add with high bytes */
49029a84457aed4c45bc900998b5e11c03023264208James Dong        x10 = x10 + (x10 << 16); /* add with lower half word */
49129a84457aed4c45bc900998b5e11c03023264208James Dong
49229a84457aed4c45bc900998b5e11c03023264208James Dong        /****************/
49329a84457aed4c45bc900998b5e11c03023264208James Dong
49429a84457aed4c45bc900998b5e11c03023264208James Dong        if (((uint32)x10 >> 16) <= dmin) /* compare with dmin */
49529a84457aed4c45bc900998b5e11c03023264208James Dong        {
49629a84457aed4c45bc900998b5e11c03023264208James Dong            if (--x8)
49729a84457aed4c45bc900998b5e11c03023264208James Dong            {
49829a84457aed4c45bc900998b5e11c03023264208James Dong                goto LOOP_SAD0;
49929a84457aed4c45bc900998b5e11c03023264208James Dong            }
50029a84457aed4c45bc900998b5e11c03023264208James Dong
50129a84457aed4c45bc900998b5e11c03023264208James Dong        }
50229a84457aed4c45bc900998b5e11c03023264208James Dong
50329a84457aed4c45bc900998b5e11c03023264208James Dong        return ((uint32)x10 >> 16);
50429a84457aed4c45bc900998b5e11c03023264208James Dong
50529a84457aed4c45bc900998b5e11c03023264208James DongSadMBOffset3:
50629a84457aed4c45bc900998b5e11c03023264208James Dong
50729a84457aed4c45bc900998b5e11c03023264208James Dong        return sad_mb_offset3(ref, blk, lx, dmin);
50829a84457aed4c45bc900998b5e11c03023264208James Dong
50929a84457aed4c45bc900998b5e11c03023264208James DongSadMBOffset2:
51029a84457aed4c45bc900998b5e11c03023264208James Dong
51129a84457aed4c45bc900998b5e11c03023264208James Dong        return sad_mb_offset2(ref, blk, lx, dmin);
51229a84457aed4c45bc900998b5e11c03023264208James Dong
51329a84457aed4c45bc900998b5e11c03023264208James DongSadMBOffset1:
51429a84457aed4c45bc900998b5e11c03023264208James Dong
51529a84457aed4c45bc900998b5e11c03023264208James Dong        return sad_mb_offset1(ref, blk, lx, dmin);
51629a84457aed4c45bc900998b5e11c03023264208James Dong    }
51729a84457aed4c45bc900998b5e11c03023264208James Dong
51829a84457aed4c45bc900998b5e11c03023264208James Dong
51929a84457aed4c45bc900998b5e11c03023264208James Dong#endif
52029a84457aed4c45bc900998b5e11c03023264208James Dong
52129a84457aed4c45bc900998b5e11c03023264208James Dong#ifdef __cplusplus
52229a84457aed4c45bc900998b5e11c03023264208James Dong}
52329a84457aed4c45bc900998b5e11c03023264208James Dong#endif
52429a84457aed4c45bc900998b5e11c03023264208James Dong
52529a84457aed4c45bc900998b5e11c03023264208James Dong#endif // _SAD_INLINE_H_
52629a84457aed4c45bc900998b5e11c03023264208James Dong
527