sad_inline.h revision 3fdb405597f0e062a9bb8af20199c5e67f0f764c
1dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project/* ------------------------------------------------------------------ 2dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Copyright (C) 1998-2009 PacketVideo 3dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * 4dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License"); 5dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * you may not use this file except in compliance with the License. 6dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * You may obtain a copy of the License at 7dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * 8dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * http://www.apache.org/licenses/LICENSE-2.0 9dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * 10dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Unless required by applicable law or agreed to in writing, software 11dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS, 12dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * express or implied. 14dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * See the License for the specific language governing permissions 15dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * and limitations under the License. 16dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * ------------------------------------------------------------------- 17dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project */ 18dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#ifndef _SAD_INLINE_H_ 19dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define _SAD_INLINE_H_ 20dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 21dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#ifdef __cplusplus 22dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Projectextern "C" 23dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project{ 24dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#endif 25dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 26dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project/* Intentionally not using the gcc asm version, since it (if fixed so 27dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * as to not crash - the current register constraints are faulty) is 28dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * slightly slower than the plain C version on modern GCC versions. */ 29dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#if !defined(__CC_ARM) /* Generic C version */ 30dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 31dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2) 32dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 33dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project tmp = tmp - tmp2; 34cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood if (tmp > 0) sad += tmp; 35cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood else sad -= tmp; 36dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 37dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return sad; 38dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 39dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 40dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask) 41dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 42dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project int32 x7; 43dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 44dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x7 = src2 ^ src1; /* check odd/even combination */ 45dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project if ((uint32)src2 >= (uint32)src1) 46dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 47dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project src1 = src2 - src1; /* subs */ 48dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 49dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project else 50dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 51dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project src1 = src1 - src2; 52dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 53dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x7 = x7 ^ src1; /* only odd bytes need to add carry */ 54dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x7 = mask & ((uint32)x7 >> 1); 55dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x7 = (x7 << 8) - x7; 56dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project src1 = src1 + (x7 >> 7); /* add 0xFF to the negative byte, add back carry */ 57dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project src1 = src1 ^(x7 >> 7); /* take absolute value of negative byte */ 58dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 59dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return src1; 60dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 61dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 62dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 3 63dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 24 64dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 65dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h" 66dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 67dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef NUMBER 68dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 2 69dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef SHIFT 70dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 16 71dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h" 72dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 73dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef NUMBER 74dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 1 75dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef SHIFT 76dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 8 77dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h" 78dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 79dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 80dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx) 81dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 82dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; 83dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 84dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x9 = 0x80808080; /* const. */ 85dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 86dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x8 = (intptr_t)ref & 0x3; 87dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project if (x8 == 3) 88dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project goto SadMBOffset3; 89dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project if (x8 == 2) 90dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project goto SadMBOffset2; 91dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project if (x8 == 1) 92dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project goto SadMBOffset1; 93dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 94dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project// x5 = (x4<<8)-x4; /* x5 = x4*255; */ 95dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x4 = x5 = 0; 96dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 97dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x6 = 0xFFFF00FF; 98dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 99dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project ref -= lx; 100dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project blk -= 16; 101dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 102dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x8 = 16; 103dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 104dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source ProjectLOOP_SAD0: 105dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project /****** process 8 pixels ******/ 106dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x10 = *((uint32*)(ref += lx)); 107dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x11 = *((uint32*)(ref + 4)); 108e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x12 = *((uint32*)(blk += 16)); 109e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x14 = *((uint32*)(blk + 4)); 110e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project 111e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project /* process x11 & x14 */ 112e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x11 = sad_4pixel(x11, x14, x9); 113e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project 114e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project /* process x12 & x10 */ 115e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x10 = sad_4pixel(x10, x12, x9); 116e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project 117e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x5 = x5 + x10; /* accumulate low bytes */ 118e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 119e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 120e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x5 = x5 + x11; /* accumulate low bytes */ 121e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 122e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 123e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project 124e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project /****** process 8 pixels ******/ 125e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x10 = *((uint32*)(ref + 8)); 126e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x11 = *((uint32*)(ref + 12)); 127e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x12 = *((uint32*)(blk + 8)); 128e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x14 = *((uint32*)(blk + 12)); 129e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project 130e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project /* process x11 & x14 */ 131e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x11 = sad_4pixel(x11, x14, x9); 132e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project 133e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project /* process x12 & x10 */ 134e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project x10 = sad_4pixel(x10, x12, x9); 135e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project 136dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x5 = x5 + x10; /* accumulate low bytes */ 137dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 138dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 139dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x5 = x5 + x11; /* accumulate low bytes */ 140dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 141dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 142dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 143dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project /****************/ 144dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x10 = x5 - (x4 << 8); /* extract low bytes */ 145dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x10 = x10 + x4; /* add with high bytes */ 146dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x10 = x10 + (x10 << 16); /* add with lower half word */ 147dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 148dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project if ((int)((uint32)x10 >> 16) <= dmin) /* compare with dmin */ 149dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 150dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project if (--x8) 151dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 152dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project goto LOOP_SAD0; 153dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 154dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 155dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 156dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 157dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return ((uint32)x10 >> 16); 158dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 159dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source ProjectSadMBOffset3: 160dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 161dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return sad_mb_offset3(ref, blk, lx, dmin); 162dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 163dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source ProjectSadMBOffset2: 164dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 165dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return sad_mb_offset2(ref, blk, lx, dmin); 166dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 167dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source ProjectSadMBOffset1: 168dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 169dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return sad_mb_offset1(ref, blk, lx, dmin); 170dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 171dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 172dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 173dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#elif defined(__CC_ARM) /* only work with arm v5 */ 174dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 175dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2) 176dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 177dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __asm 178dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 179dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project rsbs tmp, tmp, tmp2 ; 180dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project rsbmi tmp, tmp, #0 ; 181dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project add sad, sad, tmp ; 182dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 183dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 184dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return sad; 185dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 186dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 187dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask) 188dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 189dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project int32 x7; 190dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 191dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __asm 192dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 193dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project EOR x7, src2, src1; /* check odd/even combination */ 194dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project SUBS src1, src2, src1; 195dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project EOR x7, x7, src1; 196dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project AND x7, mask, x7, lsr #1; 197dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project ORRCC x7, x7, #0x80000000; 198dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project RSB x7, x7, x7, lsl #8; 199dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project ADD src1, src1, x7, asr #7; /* add 0xFF to the negative byte, add back carry */ 200dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project EOR src1, src1, x7, asr #7; /* take absolute value of negative byte */ 201cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood } 202dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 203dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return src1; 204dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 205dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 206dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask) 207dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 208dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project int32 x7; 209dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 210dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __asm 211dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 212dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project EOR x7, src2, src1; /* check odd/even combination */ 213dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project ADDS src1, src2, src1; 214dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project EOR x7, x7, src1; /* only odd bytes need to add carry */ 215dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project ANDS x7, mask, x7, rrx; 216dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project RSB x7, x7, x7, lsl #8; 217dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project SUB src1, src1, x7, asr #7; /* add 0xFF to the negative byte, add back carry */ 218dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project EOR src1, src1, x7, asr #7; /* take absolute value of negative byte */ 219dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 220dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 221dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return src1; 222dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 223dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 224dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define sum_accumulate __asm{ SBC x5, x5, x10; /* accumulate low bytes */ \ 225dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project BIC x10, x6, x10; /* x10 & 0xFF00FF00 */ \ 226dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project ADD x4, x4, x10,lsr #8; /* accumulate high bytes */ \ 227dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project SBC x5, x5, x11; /* accumulate low bytes */ \ 228dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project BIC x11, x6, x11; /* x11 & 0xFF00FF00 */ \ 229dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project ADD x4, x4, x11,lsr #8; } /* accumulate high bytes */ 230dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 231dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 232dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 3 233dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 24 234dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define INC_X8 0x08000001 235dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 236dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h" 237dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 238dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef NUMBER 239dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 2 240dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef SHIFT 241dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 16 242dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef INC_X8 243dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define INC_X8 0x10000001 244dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h" 245dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 246249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#undef NUMBER 247249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#define NUMBER 1 248249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#undef SHIFT 249249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#define SHIFT 8 250249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#undef INC_X8 251249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#define INC_X8 0x08000001 252249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood#include "sad_mb_offset.h" 253249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood 254249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood 255249ad57a887680538d1dc0195e746b1d877ebd6aMike Lockwood __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx) 256cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood { 257dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; 258dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 259dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x9 = 0x80808080; /* const. */ 260cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x4 = x5 = 0; 261dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 262dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __asm 263dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 264dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project MOVS x8, ref, lsl #31 ; 265dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project BHI SadMBOffset3; 266dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project BCS SadMBOffset2; 267dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project BMI SadMBOffset1; 268cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 269cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood MVN x6, #0xFF00; 270cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood } 271cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike LockwoodLOOP_SAD0: 272cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood /****** process 8 pixels ******/ 273cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x11 = *((int32*)(ref + 12)); 274cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x10 = *((int32*)(ref + 8)); 275cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x14 = *((int32*)(blk + 12)); 276cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x12 = *((int32*)(blk + 8)); 277cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 278cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood /* process x11 & x14 */ 279cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x11 = sad_4pixel(x11, x14, x9); 280cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 281cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood /* process x12 & x10 */ 282cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x10 = sad_4pixel(x10, x12, x9); 283cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 284cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x5 = x5 + x10; /* accumulate low bytes */ 285cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 286cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 287cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x5 = x5 + x11; /* accumulate low bytes */ 288cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 289cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 290cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 291cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood __asm 292cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood { 293cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood /****** process 8 pixels ******/ 294cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood LDR x11, [ref, #4]; 295cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood LDR x10, [ref], lx ; 296cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood LDR x14, [blk, #4]; 297cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood LDR x12, [blk], #16 ; 298cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood } 299cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 300cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood /* process x11 & x14 */ 301cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x11 = sad_4pixel(x11, x14, x9); 302cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 303cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood /* process x12 & x10 */ 304cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x10 = sad_4pixel(x10, x12, x9); 305cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 306cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x5 = x5 + x10; /* accumulate low bytes */ 307cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 308cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 309cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x5 = x5 + x11; /* accumulate low bytes */ 310cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 311cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 312cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 313cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood /****************/ 314cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x10 = x5 - (x4 << 8); /* extract low bytes */ 315cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x10 = x10 + x4; /* add with high bytes */ 316cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood x10 = x10 + (x10 << 16); /* add with lower half word */ 317cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 318cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood __asm 319cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood { 320cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood /****************/ 321cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood RSBS x11, dmin, x10, lsr #16; 322cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood ADDLSS x8, x8, #0x10000001; 323cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood BLS LOOP_SAD0; 324cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood } 325cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 326cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood return ((uint32)x10 >> 16); 327cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 328cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike LockwoodSadMBOffset3: 329cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 330cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood return sad_mb_offset3(ref, blk, lx, dmin, x8); 331cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 332cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike LockwoodSadMBOffset2: 333cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 334cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood return sad_mb_offset2(ref, blk, lx, dmin, x8); 335cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 336cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike LockwoodSadMBOffset1: 337cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 338dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return sad_mb_offset1(ref, blk, lx, dmin, x8); 339dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 340dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 341dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 342dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */ 343dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 344dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2) 345dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 346dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __asm__ volatile( 347dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "rsbs %1, %1, %2\n\t" 348dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "rsbmi %1, %1, #0\n\t" 349dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "add %0, %0, %1" 350dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project : "+r"(sad), "+r"(tmp) 351dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project : "r"(tmp2) 352dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project ); 353dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return sad; 354dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 355dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 356dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask) 357dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 358dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project int32 x7; 359dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 360dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __asm__ volatile( 361dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "EOR %1, %2, %0\n\t" 362dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "SUBS %0, %2, %0\n\t" 363dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "EOR %1, %1, %0\n\t" 364dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "AND %1, %3, %1, lsr #1\n\t" 365dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "ORRCC %1, %1, #0x80000000\n\t" 366dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "RSB %1, %1, %1, lsl #8\n\t" 367dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "ADD %0, %0, %1, asr #7\n\t" 368dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "EOR %0, %0, %1, asr #7" 369dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project : "+r"(src1), "=&r"(x7) 370dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project : "r"(src2), "r"(mask) 371dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project ); 372dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 373dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return src1; 374dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 375dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 376dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask) 377dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 378dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project int32 x7; 379dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 380dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __asm__ volatile( 381dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "EOR %1, %2, %0\n\t" 382dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "ADDS %0, %2, %0\n\t" 383dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "EOR %1, %1, %0\n\t" 384dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "ANDS %1, %3, %1, rrx\n\t" 385dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "RSB %1, %1, %1, lsl #8\n\t" 386dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "SUB %0, %0, %1, asr #7\n\t" 387dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "EOR %0, %0, %1, asr #7" 388dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project : "+r"(src1), "=&r"(x7) 389cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood : "r"(src2), "r"(mask) 390dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project ); 391cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 392cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood return src1; 393dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 394cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood 395cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood#define sum_accumulate __asm__ volatile( \ 396dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "SBC %0, %0, %1\n\t" \ 397cc1de48dcdf06c76ee14abbe2a237aa51b5b3badMike Lockwood "BIC %1, %4, %1\n\t" \ 398dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "ADD %2, %2, %1, lsr #8\n\t" \ 399dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "SBC %0, %0, %3\n\t" \ 400dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "BIC %3, %4, %3\n\t" \ 401dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project "ADD %2, %2, %3, lsr #8" \ 402e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project : "+r" (x5), "+r" (x10), "+r" (x4), "+r" (x11) \ 403e037fd7e193ecccbb5c0888e49f6d58c224bc11dThe Android Open Source Project : "r" (x6) \ 404dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project ); 405dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 406dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 3 407dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 24 408dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define INC_X8 0x08000001 409dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 410dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h" 411dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 412dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef NUMBER 413dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 2 414dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef SHIFT 415dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 16 416dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef INC_X8 417dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define INC_X8 0x10000001 418dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h" 419dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 420dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef NUMBER 421dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define NUMBER 1 422dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef SHIFT 423dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define SHIFT 8 424dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#undef INC_X8 425dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#define INC_X8 0x08000001 426dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include "sad_mb_offset.h" 427dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 428dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 429dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx) 430dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project { 431dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; 432dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 433dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x9 = 0x80808080; /* const. */ 434dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x4 = x5 = 0; 435dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 436dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x8 = (uint32)ref & 0x3; 437dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project if (x8 == 3) 438dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project goto SadMBOffset3; 439dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project if (x8 == 2) 440dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project goto SadMBOffset2; 441dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project if (x8 == 1) 442dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project goto SadMBOffset1; 443dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 444dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x8 = 16; 445dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project/// 446dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __asm__ volatile("MVN %0, #0xFF00": "=r"(x6)); 447dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 448dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source ProjectLOOP_SAD0: 449dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project /****** process 8 pixels ******/ 450dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x11 = *((int32*)(ref + 12)); 451dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x10 = *((int32*)(ref + 8)); 452dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x14 = *((int32*)(blk + 12)); 453dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x12 = *((int32*)(blk + 8)); 454dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 455dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project /* process x11 & x14 */ 456dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x11 = sad_4pixel(x11, x14, x9); 457dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 458dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project /* process x12 & x10 */ 459dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x10 = sad_4pixel(x10, x12, x9); 460dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 461dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x5 = x5 + x10; /* accumulate low bytes */ 462dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 463dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 464dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x5 = x5 + x11; /* accumulate low bytes */ 465dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 466dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 467dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 468dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project /****** process 8 pixels ******/ 469dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x11 = *((int32*)(ref + 4)); 470dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __asm__ volatile("LDR %0, [%1], %2": "=&r"(x10), "+r"(ref): "r"(lx)); 471dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project //x10 = *((int32*)ref); ref+=lx; 472dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x14 = *((int32*)(blk + 4)); 473dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project __asm__ volatile("LDR %0, [%1], #16": "=&r"(x12), "+r"(blk)); 474dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 475dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project /* process x11 & x14 */ 476dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x11 = sad_4pixel(x11, x14, x9); 477dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 478dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project /* process x12 & x10 */ 479dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project x10 = sad_4pixel(x10, x12, x9); 480 481 x5 = x5 + x10; /* accumulate low bytes */ 482 x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 483 x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 484 x5 = x5 + x11; /* accumulate low bytes */ 485 x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 486 x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 487 488 /****************/ 489 x10 = x5 - (x4 << 8); /* extract low bytes */ 490 x10 = x10 + x4; /* add with high bytes */ 491 x10 = x10 + (x10 << 16); /* add with lower half word */ 492 493 /****************/ 494 495 if (((uint32)x10 >> 16) <= dmin) /* compare with dmin */ 496 { 497 if (--x8) 498 { 499 goto LOOP_SAD0; 500 } 501 502 } 503 504 return ((uint32)x10 >> 16); 505 506SadMBOffset3: 507 508 return sad_mb_offset3(ref, blk, lx, dmin); 509 510SadMBOffset2: 511 512 return sad_mb_offset2(ref, blk, lx, dmin); 513 514SadMBOffset1: 515 516 return sad_mb_offset1(ref, blk, lx, dmin); 517 } 518 519 520#endif 521 522#ifdef __cplusplus 523} 524#endif 525 526#endif // _SAD_INLINE_H_ 527 528