sad_inline.h revision 3fdb405597f0e062a9bb8af20199c5e67f0f764c
1dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project/* ------------------------------------------------------------------
2dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Copyright (C) 1998-2009 PacketVideo
3dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *
4dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License");
5dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * you may not use this file except in compliance with the License.
6dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * You may obtain a copy of the License at
7dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *
8dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *      http://www.apache.org/licenses/LICENSE-2.0
9dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *
10dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Unless required by applicable law or agreed to in writing, software
11dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS,
12dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * express or implied.
14dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * See the License for the specific language governing permissions
15dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * and limitations under the License.
16dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * -------------------------------------------------------------------
17dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project */
18dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#ifndef _SAD_INLINE_H_
19dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define _SAD_INLINE_H_
20dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
21dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#ifdef __cplusplus
22dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Projectextern "C"
23dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project{
24dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#endif
25dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
26dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project/* Intentionally not using the gcc asm version, since it (if fixed so
27dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * as to not crash - the current register constraints are faulty) is
28dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * slightly slower than the plain C version on modern GCC versions. */
29dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#if !defined(__CC_ARM) /* Generic C version */
30dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
31dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
32dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    {
33dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        tmp = tmp - tmp2;
34cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        if (tmp > 0) sad += tmp;
35cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        else sad -= tmp;
36dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
37dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        return sad;
38dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    }
39dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
40dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
41dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    {
42dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        int32 x7;
43dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
44dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x7 = src2 ^ src1;       /* check odd/even combination */
45dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        if ((uint32)src2 >= (uint32)src1)
46dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        {
47dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            src1 = src2 - src1;     /* subs */
48dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        }
49dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        else
50dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        {
51dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            src1 = src1 - src2;
52dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        }
53dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x7 = x7 ^ src1;     /* only odd bytes need to add carry */
54dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x7 = mask & ((uint32)x7 >> 1);
55dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x7 = (x7 << 8) - x7;
56dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        src1 = src1 + (x7 >> 7); /* add 0xFF to the negative byte, add back carry */
57dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        src1 = src1 ^(x7 >> 7);   /* take absolute value of negative byte */
58dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
59dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        return src1;
60dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    }
61dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
62dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 3
63dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 24
64dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
65dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h"
66dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
67dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef NUMBER
68dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 2
69dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef SHIFT
70dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 16
71dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h"
72dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
73dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef NUMBER
74dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 1
75dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef SHIFT
76dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 8
77dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h"
78dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
79dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
80dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx)
81dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    {
82dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
83dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
84dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x9 = 0x80808080; /* const. */
85dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
86dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x8 = (intptr_t)ref & 0x3;
87dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        if (x8 == 3)
88dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            goto SadMBOffset3;
89dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        if (x8 == 2)
90dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            goto SadMBOffset2;
91dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        if (x8 == 1)
92dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            goto SadMBOffset1;
93dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
94dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project//  x5 = (x4<<8)-x4; /* x5 = x4*255; */
95dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x4 = x5 = 0;
96dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
97dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x6 = 0xFFFF00FF;
98dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
99dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        ref -= lx;
100dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        blk -= 16;
101dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
102dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x8 = 16;
103dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
104dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source ProjectLOOP_SAD0:
105dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        /****** process 8 pixels ******/
106dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x10 = *((uint32*)(ref += lx));
107dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x11 = *((uint32*)(ref + 4));
108e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x12 = *((uint32*)(blk += 16));
109e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x14 = *((uint32*)(blk + 4));
110e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project
111e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        /* process x11 & x14 */
112e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x11 = sad_4pixel(x11, x14, x9);
113e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project
114e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        /* process x12 & x10 */
115e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x10 = sad_4pixel(x10, x12, x9);
116e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project
117e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x5 = x5 + x10; /* accumulate low bytes */
118e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
119e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x4 = x4 + ((uint32)x10 >> 8);  /* accumulate high bytes */
120e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x5 = x5 + x11;  /* accumulate low bytes */
121e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
122e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
123e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project
124e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        /****** process 8 pixels ******/
125e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x10 = *((uint32*)(ref + 8));
126e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x11 = *((uint32*)(ref + 12));
127e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x12 = *((uint32*)(blk + 8));
128e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x14 = *((uint32*)(blk + 12));
129e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project
130e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        /* process x11 & x14 */
131e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x11 = sad_4pixel(x11, x14, x9);
132e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project
133e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        /* process x12 & x10 */
134e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project        x10 = sad_4pixel(x10, x12, x9);
135e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project
136dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x5 = x5 + x10;  /* accumulate low bytes */
137dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
138dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
139dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x5 = x5 + x11;  /* accumulate low bytes */
140dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
141dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
142dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
143dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        /****************/
144dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x10 = x5 - (x4 << 8); /* extract low bytes */
145dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x10 = x10 + x4;     /* add with high bytes */
146dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x10 = x10 + (x10 << 16); /* add with lower half word */
147dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
148dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        if ((int)((uint32)x10 >> 16) <= dmin) /* compare with dmin */
149dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        {
150dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            if (--x8)
151dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            {
152dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project                goto LOOP_SAD0;
153dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            }
154dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
155dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        }
156dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
157dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        return ((uint32)x10 >> 16);
158dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
159dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source ProjectSadMBOffset3:
160dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
161dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        return sad_mb_offset3(ref, blk, lx, dmin);
162dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
163dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source ProjectSadMBOffset2:
164dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
165dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        return sad_mb_offset2(ref, blk, lx, dmin);
166dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
167dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source ProjectSadMBOffset1:
168dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
169dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        return sad_mb_offset1(ref, blk, lx, dmin);
170dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
171dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    }
172dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
173dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#elif defined(__CC_ARM)  /* only work with arm v5 */
174dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
175dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
176dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    {
177dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        __asm
178dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        {
179dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            rsbs    tmp, tmp, tmp2 ;
180dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            rsbmi   tmp, tmp, #0 ;
181dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            add     sad, sad, tmp ;
182dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        }
183dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
184dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        return sad;
185dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    }
186dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
187dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
188dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    {
189dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        int32 x7;
190dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
191dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        __asm
192dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        {
193dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            EOR     x7, src2, src1;     /* check odd/even combination */
194dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            SUBS    src1, src2, src1;
195dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            EOR     x7, x7, src1;
196dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            AND     x7, mask, x7, lsr #1;
197dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            ORRCC   x7, x7, #0x80000000;
198dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            RSB     x7, x7, x7, lsl #8;
199dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            ADD     src1, src1, x7, asr #7;   /* add 0xFF to the negative byte, add back carry */
200dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            EOR     src1, src1, x7, asr #7;   /* take absolute value of negative byte */
201cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        }
202dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
203dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        return src1;
204dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    }
205dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
206dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask)
207dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    {
208dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        int32 x7;
209dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
210dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        __asm
211dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        {
212dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            EOR      x7, src2, src1;        /* check odd/even combination */
213dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            ADDS     src1, src2, src1;
214dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            EOR      x7, x7, src1;      /* only odd bytes need to add carry */
215dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            ANDS     x7, mask, x7, rrx;
216dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            RSB      x7, x7, x7, lsl #8;
217dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            SUB      src1, src1, x7, asr #7;  /* add 0xFF to the negative byte, add back carry */
218dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            EOR      src1, src1, x7, asr #7; /* take absolute value of negative byte */
219dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        }
220dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
221dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        return src1;
222dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    }
223dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
224dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define sum_accumulate  __asm{      SBC      x5, x5, x10;  /* accumulate low bytes */ \
225dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        BIC      x10, x6, x10;   /* x10 & 0xFF00FF00 */ \
226dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        ADD      x4, x4, x10,lsr #8;   /* accumulate high bytes */ \
227dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        SBC      x5, x5, x11;    /* accumulate low bytes */ \
228dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        BIC      x11, x6, x11;   /* x11 & 0xFF00FF00 */ \
229dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        ADD      x4, x4, x11,lsr #8; } /* accumulate high bytes */
230dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
231dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
232dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 3
233dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 24
234dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define INC_X8 0x08000001
235dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
236dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h"
237dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
238dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef NUMBER
239dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 2
240dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef SHIFT
241dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 16
242dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef INC_X8
243dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define INC_X8 0x10000001
244dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h"
245dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
246249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#undef NUMBER
247249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#define NUMBER 1
248249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#undef SHIFT
249249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#define SHIFT 8
250249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#undef INC_X8
251249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#define INC_X8 0x08000001
252249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#include "sad_mb_offset.h"
253249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood
254249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood
255249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood    __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx)
256cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood    {
257dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
258dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
259dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x9 = 0x80808080; /* const. */
260cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x4 = x5 = 0;
261dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
262dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        __asm
263dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        {
264dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            MOVS    x8, ref, lsl #31 ;
265dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            BHI     SadMBOffset3;
266dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            BCS     SadMBOffset2;
267dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            BMI     SadMBOffset1;
268cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
269cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood            MVN     x6, #0xFF00;
270cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        }
271cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike LockwoodLOOP_SAD0:
272cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        /****** process 8 pixels ******/
273cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x11 = *((int32*)(ref + 12));
274cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x10 = *((int32*)(ref + 8));
275cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x14 = *((int32*)(blk + 12));
276cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x12 = *((int32*)(blk + 8));
277cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
278cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        /* process x11 & x14 */
279cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x11 = sad_4pixel(x11, x14, x9);
280cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
281cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        /* process x12 & x10 */
282cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x10 = sad_4pixel(x10, x12, x9);
283cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
284cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x5 = x5 + x10;  /* accumulate low bytes */
285cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
286cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
287cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x5 = x5 + x11;  /* accumulate low bytes */
288cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
289cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
290cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
291cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        __asm
292cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        {
293cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood            /****** process 8 pixels ******/
294cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood            LDR     x11, [ref, #4];
295cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood            LDR     x10, [ref], lx ;
296cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood            LDR     x14, [blk, #4];
297cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood            LDR     x12, [blk], #16 ;
298cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        }
299cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
300cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        /* process x11 & x14 */
301cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x11 = sad_4pixel(x11, x14, x9);
302cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
303cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        /* process x12 & x10 */
304cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x10 = sad_4pixel(x10, x12, x9);
305cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
306cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x5 = x5 + x10;  /* accumulate low bytes */
307cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
308cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
309cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x5 = x5 + x11;  /* accumulate low bytes */
310cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
311cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
312cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
313cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        /****************/
314cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x10 = x5 - (x4 << 8); /* extract low bytes */
315cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x10 = x10 + x4;     /* add with high bytes */
316cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        x10 = x10 + (x10 << 16); /* add with lower half word */
317cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
318cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        __asm
319cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        {
320cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood            /****************/
321cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood            RSBS    x11, dmin, x10, lsr #16;
322cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood            ADDLSS  x8, x8, #0x10000001;
323cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood            BLS     LOOP_SAD0;
324cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        }
325cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
326cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        return ((uint32)x10 >> 16);
327cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
328cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike LockwoodSadMBOffset3:
329cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
330cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        return sad_mb_offset3(ref, blk, lx, dmin, x8);
331cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
332cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike LockwoodSadMBOffset2:
333cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
334cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        return sad_mb_offset2(ref, blk, lx, dmin, x8);
335cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
336cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike LockwoodSadMBOffset1:
337cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
338dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        return sad_mb_offset1(ref, blk, lx, dmin, x8);
339dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    }
340dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
341dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
342dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER  */
343dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
344dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
345dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    {
346dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        __asm__ volatile(
347dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "rsbs       %1, %1, %2\n\t"
348dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "rsbmi      %1, %1, #0\n\t"
349dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "add        %0, %0, %1"
350dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            : "+r"(sad), "+r"(tmp)
351dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            : "r"(tmp2)
352dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        );
353dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        return sad;
354dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    }
355dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
356dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
357dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    {
358dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        int32 x7;
359dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
360dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        __asm__ volatile(
361dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "EOR        %1, %2, %0\n\t"
362dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "SUBS       %0, %2, %0\n\t"
363dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "EOR        %1, %1, %0\n\t"
364dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "AND        %1, %3, %1, lsr #1\n\t"
365dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "ORRCC      %1, %1, #0x80000000\n\t"
366dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "RSB        %1, %1, %1, lsl #8\n\t"
367dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "ADD        %0, %0, %1, asr #7\n\t"
368dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "EOR        %0, %0, %1, asr #7"
369dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            : "+r"(src1), "=&r"(x7)
370dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            : "r"(src2), "r"(mask)
371dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        );
372dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
373dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        return src1;
374dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    }
375dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
376dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask)
377dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    {
378dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        int32 x7;
379dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
380dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        __asm__ volatile(
381dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "EOR        %1, %2, %0\n\t"
382dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "ADDS       %0, %2, %0\n\t"
383dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "EOR        %1, %1, %0\n\t"
384dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "ANDS       %1, %3, %1, rrx\n\t"
385dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "RSB        %1, %1, %1, lsl #8\n\t"
386dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "SUB        %0, %0, %1, asr #7\n\t"
387dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            "EOR        %0, %0, %1, asr #7"
388dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            : "+r"(src1), "=&r"(x7)
389cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood            : "r"(src2), "r"(mask)
390dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        );
391cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
392cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood        return src1;
393dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    }
394cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood
395cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood#define sum_accumulate  __asm__ volatile(              \
396dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    "SBC   %0, %0, %1\n\t"                             \
397cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood    "BIC   %1, %4, %1\n\t"                             \
398dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    "ADD   %2, %2, %1, lsr #8\n\t"                     \
399dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    "SBC   %0, %0, %3\n\t"                             \
400dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    "BIC   %3, %4, %3\n\t"                             \
401dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    "ADD   %2, %2, %3, lsr #8"                         \
402e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project    : "+r" (x5), "+r" (x10), "+r" (x4), "+r" (x11)     \
403e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project    : "r" (x6)                                         \
404dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    );
405dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
406dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 3
407dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 24
408dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define INC_X8 0x08000001
409dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
410dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h"
411dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
412dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef NUMBER
413dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 2
414dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef SHIFT
415dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 16
416dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef INC_X8
417dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define INC_X8 0x10000001
418dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h"
419dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
420dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef NUMBER
421dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 1
422dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef SHIFT
423dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 8
424dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef INC_X8
425dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define INC_X8 0x08000001
426dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h"
427dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
428dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
429dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx)
430dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    {
431dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
432dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
433dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x9 = 0x80808080; /* const. */
434dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x4 = x5 = 0;
435dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
436dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x8 = (uint32)ref & 0x3;
437dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        if (x8 == 3)
438dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            goto SadMBOffset3;
439dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        if (x8 == 2)
440dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            goto SadMBOffset2;
441dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        if (x8 == 1)
442dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            goto SadMBOffset1;
443dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
444dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x8 = 16;
445dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project///
446dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        __asm__ volatile("MVN   %0, #0xFF00": "=r"(x6));
447dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
448dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source ProjectLOOP_SAD0:
449dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        /****** process 8 pixels ******/
450dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x11 = *((int32*)(ref + 12));
451dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x10 = *((int32*)(ref + 8));
452dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x14 = *((int32*)(blk + 12));
453dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x12 = *((int32*)(blk + 8));
454dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
455dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        /* process x11 & x14 */
456dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x11 = sad_4pixel(x11, x14, x9);
457dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
458dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        /* process x12 & x10 */
459dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x10 = sad_4pixel(x10, x12, x9);
460dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
461dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x5 = x5 + x10;  /* accumulate low bytes */
462dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
463dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
464dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x5 = x5 + x11;  /* accumulate low bytes */
465dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
466dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
467dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
468dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        /****** process 8 pixels ******/
469dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x11 = *((int32*)(ref + 4));
470dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        __asm__ volatile("LDR   %0, [%1], %2": "=&r"(x10), "+r"(ref): "r"(lx));
471dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        //x10 = *((int32*)ref); ref+=lx;
472dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x14 = *((int32*)(blk + 4));
473dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        __asm__ volatile("LDR   %0, [%1], #16": "=&r"(x12), "+r"(blk));
474dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
475dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        /* process x11 & x14 */
476dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x11 = sad_4pixel(x11, x14, x9);
477dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
478dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        /* process x12 & x10 */
479dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        x10 = sad_4pixel(x10, x12, x9);
480
481        x5 = x5 + x10;  /* accumulate low bytes */
482        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
483        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
484        x5 = x5 + x11;  /* accumulate low bytes */
485        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
486        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
487
488        /****************/
489        x10 = x5 - (x4 << 8); /* extract low bytes */
490        x10 = x10 + x4;     /* add with high bytes */
491        x10 = x10 + (x10 << 16); /* add with lower half word */
492
493        /****************/
494
495        if (((uint32)x10 >> 16) <= dmin) /* compare with dmin */
496        {
497            if (--x8)
498            {
499                goto LOOP_SAD0;
500            }
501
502        }
503
504        return ((uint32)x10 >> 16);
505
506SadMBOffset3:
507
508        return sad_mb_offset3(ref, blk, lx, dmin);
509
510SadMBOffset2:
511
512        return sad_mb_offset2(ref, blk, lx, dmin);
513
514SadMBOffset1:
515
516        return sad_mb_offset1(ref, blk, lx, dmin);
517    }
518
519
520#endif
521
522#ifdef __cplusplus
523}
524#endif
525
526#endif // _SAD_INLINE_H_
527
528