sad_inline.h revision 377b2ec9a2885f9b6405b07ba900a9e3f4349c38
159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* ------------------------------------------------------------------ 259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Copyright (C) 1998-2009 PacketVideo 359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * 459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Licensed under the Apache License, Version 2.0 (the "License"); 559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * you may not use this file except in compliance with the License. 659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * You may obtain a copy of the License at 759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * 859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * http://www.apache.org/licenses/LICENSE-2.0 959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * 1059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Unless required by applicable law or agreed to in writing, software 1159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * distributed under the License is distributed on an "AS IS" BASIS, 1259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 1359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * express or implied. 1459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * See the License for the specific language governing permissions 1559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * and limitations under the License. 1659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * ------------------------------------------------------------------- 1759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong */ 1859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*********************************************************************************/ 1959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* Filename: sad_inline.h */ 2059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* Description: Implementation for in-line functions used in dct.cpp */ 2159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* Modified: */ 2259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*********************************************************************************/ 2359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#ifndef _SAD_INLINE_H_ 2459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define _SAD_INLINE_H_ 2559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 2659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#ifdef __cplusplus 2759f566c4ec3dfc097ad8163523e522280b27e5c3James Dongextern "C" 2859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{ 2959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif 3059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 3159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if !defined(PV_ARM_GCC_V5) && !defined(PV_ARM_GCC_V4) /* ARM GNU COMPILER */ 3259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 3359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2) 3459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 3559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong tmp = tmp - tmp2; 3659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (tmp > 0) sad += tmp; 3759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong else sad -= tmp; 3859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 3959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return sad; 4059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 4159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 4259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask) 4359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 4459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong int32 x7; 4559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 4659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x7 = src2 ^ src1; /* check odd/even combination */ 4759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if ((uint32)src2 >= (uint32)src1) 4859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 4959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong src1 = src2 - src1; /* subs */ 5059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 5159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong else 5259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 5359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong src1 = src1 - src2; 5459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 5559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x7 = x7 ^ src1; /* only odd bytes need to add carry */ 5659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x7 = mask & ((uint32)x7 >> 1); 5759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x7 = (x7 << 8) - x7; 5859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong src1 = src1 + (x7 >> 7); /* add 0xFF to the negative byte, add back carry */ 5959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong src1 = src1 ^(x7 >> 7); /* take absolute value of negative byte */ 6059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 6159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return src1; 6259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 6359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 6459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define NUMBER 3 6559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define SHIFT 24 6659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 6759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "sad_mb_offset.h" 6859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 6959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef NUMBER 7059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define NUMBER 2 7159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef SHIFT 7259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define SHIFT 16 7359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "sad_mb_offset.h" 7459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 7559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef NUMBER 7659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define NUMBER 1 7759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef SHIFT 7859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define SHIFT 8 7959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "sad_mb_offset.h" 8059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 8159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 8259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __inline int32 simd_sad_mb(UChar *ref, UChar *blk, Int dmin, Int lx) 8359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 8459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; 8559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 8659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x9 = 0x80808080; /* const. */ 8759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 88377b2ec9a2885f9b6405b07ba900a9e3f4349c38Kévin PETIT x8 = (uintptr_t)ref & 0x3; 8959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (x8 == 3) 9059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong goto SadMBOffset3; 9159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (x8 == 2) 9259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong goto SadMBOffset2; 9359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (x8 == 1) 9459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong goto SadMBOffset1; 9559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 9659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong// x5 = (x4<<8)-x4; /* x5 = x4*255; */ 9759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x5 = 0; 9859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 9959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x6 = 0xFFFF00FF; 10059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 10159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong ref -= lx; 10259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong blk -= 16; 10359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 10459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x8 = 16; 10559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 10659f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD0: 10759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****** process 8 pixels ******/ 10859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = *((uint32*)(ref += lx)); 10959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = *((uint32*)(ref + 4)); 11059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x12 = *((uint32*)(blk += 16)); 11159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x14 = *((uint32*)(blk + 4)); 11259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 11359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x11 & x14 */ 11459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = sad_4pixel(x11, x14, x9); 11559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 11659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x12 & x10 */ 11759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = sad_4pixel(x10, x12, x9); 11859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 11959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x10; /* accumulate low bytes */ 12059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 12159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 12259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x11; /* accumulate low bytes */ 12359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 12459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 12559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 12659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****** process 8 pixels ******/ 12759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = *((uint32*)(ref + 8)); 12859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = *((uint32*)(ref + 12)); 12959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x12 = *((uint32*)(blk + 8)); 13059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x14 = *((uint32*)(blk + 12)); 13159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 13259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x11 & x14 */ 13359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = sad_4pixel(x11, x14, x9); 13459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 13559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x12 & x10 */ 13659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = sad_4pixel(x10, x12, x9); 13759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 13859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x10; /* accumulate low bytes */ 13959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 14059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 14159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x11; /* accumulate low bytes */ 14259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 14359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 14459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 14559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****************/ 14659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x5 - (x4 << 8); /* extract low bytes */ 14759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 + x4; /* add with high bytes */ 14859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 + (x10 << 16); /* add with lower half word */ 14959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 15059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (((uint32)x10 >> 16) <= (uint32)dmin) /* compare with dmin */ 15159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 15259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (--x8) 15359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 15459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong goto LOOP_SAD0; 15559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 15659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 15759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 15859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 15959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return ((uint32)x10 >> 16); 16059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 16159f566c4ec3dfc097ad8163523e522280b27e5c3James DongSadMBOffset3: 16259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 16359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return sad_mb_offset3(ref, blk, lx, dmin); 16459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 16559f566c4ec3dfc097ad8163523e522280b27e5c3James DongSadMBOffset2: 16659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 16759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return sad_mb_offset2(ref, blk, lx, dmin); 16859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 16959f566c4ec3dfc097ad8163523e522280b27e5c3James DongSadMBOffset1: 17059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 17159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return sad_mb_offset1(ref, blk, lx, dmin); 17259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 17359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 17459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 17559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif defined(__CC_ARM) /* only work with arm v5 */ 17659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 17759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2) 17859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 17959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __asm 18059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 18159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong rsbs tmp, tmp, tmp2 ; 18259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong rsbmi tmp, tmp, #0 ; 18359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong add sad, sad, tmp ; 18459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 18559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 18659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return sad; 18759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 18859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 18959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask) 19059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 19159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong int32 x7; 19259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 19359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __asm 19459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 19559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong EOR x7, src2, src1; /* check odd/even combination */ 19659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong SUBS src1, src2, src1; 19759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong EOR x7, x7, src1; 19859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong AND x7, mask, x7, lsr #1; 19959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong ORRCC x7, x7, #0x80000000; 20059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong RSB x7, x7, x7, lsl #8; 20159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong ADD src1, src1, x7, asr #7; /* add 0xFF to the negative byte, add back carry */ 20259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong EOR src1, src1, x7, asr #7; /* take absolute value of negative byte */ 20359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 20459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 20559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return src1; 20659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 20759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 20859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask) 20959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 21059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong int32 x7; 21159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 21259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __asm 21359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 21459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong EOR x7, src2, src1; /* check odd/even combination */ 21559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong ADDS src1, src2, src1; 21659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong EOR x7, x7, src1; /* only odd bytes need to add carry */ 21759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong ANDS x7, mask, x7, rrx; 21859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong RSB x7, x7, x7, lsl #8; 21959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong SUB src1, src1, x7, asr #7; /* add 0xFF to the negative byte, add back carry */ 22059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong EOR src1, src1, x7, asr #7; /* take absolute value of negative byte */ 22159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 22259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 22359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return src1; 22459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 22559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 22659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define sum_accumulate __asm{ SBC x5, x5, x10; /* accumulate low bytes */ \ 22759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong BIC x10, x6, x10; /* x10 & 0xFF00FF00 */ \ 22859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong ADD x4, x4, x10,lsr #8; /* accumulate high bytes */ \ 22959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong SBC x5, x5, x11; /* accumulate low bytes */ \ 23059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong BIC x11, x6, x11; /* x11 & 0xFF00FF00 */ \ 23159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong ADD x4, x4, x11,lsr #8; } /* accumulate high bytes */ 23259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 23359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 23459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define NUMBER 3 23559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define SHIFT 24 23659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define INC_X8 0x08000001 23759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 23859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "sad_mb_offset.h" 23959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 24059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef NUMBER 24159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define NUMBER 2 24259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef SHIFT 24359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define SHIFT 16 24459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef INC_X8 24559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define INC_X8 0x10000001 24659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "sad_mb_offset.h" 24759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 24859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef NUMBER 24959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define NUMBER 1 25059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef SHIFT 25159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define SHIFT 8 25259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef INC_X8 25359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define INC_X8 0x08000001 25459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "sad_mb_offset.h" 25559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 25659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 25759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __inline int32 simd_sad_mb(UChar *ref, UChar *blk, Int dmin, Int lx) 25859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 25959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; 26059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 26159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x9 = 0x80808080; /* const. */ 26259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x5 = 0; 26359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 26459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __asm 26559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 26659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong MOVS x8, ref, lsl #31 ; 26759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong BHI SadMBOffset3; 26859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong BCS SadMBOffset2; 26959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong BMI SadMBOffset1; 27059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 27159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong MVN x6, #0xFF00; 27259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 27359f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD0: 27459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****** process 8 pixels ******/ 27559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = *((int32*)(ref + 12)); 27659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = *((int32*)(ref + 8)); 27759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x14 = *((int32*)(blk + 12)); 27859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x12 = *((int32*)(blk + 8)); 27959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 28059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x11 & x14 */ 28159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = sad_4pixel(x11, x14, x9); 28259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 28359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x12 & x10 */ 28459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = sad_4pixel(x10, x12, x9); 28559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 28659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x10; /* accumulate low bytes */ 28759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 28859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 28959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x11; /* accumulate low bytes */ 29059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 29159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 29259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 29359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __asm 29459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 29559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****** process 8 pixels ******/ 29659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong LDR x11, [ref, #4]; 29759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong LDR x10, [ref], lx ; 29859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong LDR x14, [blk, #4]; 29959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong LDR x12, [blk], #16 ; 30059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 30159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 30259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x11 & x14 */ 30359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = sad_4pixel(x11, x14, x9); 30459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 30559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x12 & x10 */ 30659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = sad_4pixel(x10, x12, x9); 30759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 30859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x10; /* accumulate low bytes */ 30959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 31059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 31159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x11; /* accumulate low bytes */ 31259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 31359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 31459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 31559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****************/ 31659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x5 - (x4 << 8); /* extract low bytes */ 31759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 + x4; /* add with high bytes */ 31859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 + (x10 << 16); /* add with lower half word */ 31959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 32059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __asm 32159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 32259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****************/ 32359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong RSBS x11, dmin, x10, lsr #16; 32459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong ADDLSS x8, x8, #0x10000001; 32559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong BLS LOOP_SAD0; 32659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 32759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 32859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return ((uint32)x10 >> 16); 32959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 33059f566c4ec3dfc097ad8163523e522280b27e5c3James DongSadMBOffset3: 33159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 33259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return sad_mb_offset3(ref, blk, lx, dmin, x8); 33359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 33459f566c4ec3dfc097ad8163523e522280b27e5c3James DongSadMBOffset2: 33559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 33659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return sad_mb_offset2(ref, blk, lx, dmin, x8); 33759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 33859f566c4ec3dfc097ad8163523e522280b27e5c3James DongSadMBOffset1: 33959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 34059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return sad_mb_offset1(ref, blk, lx, dmin, x8); 34159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 34259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 34359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 34459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif ( defined(PV_ARM_GCC_V5) || defined(PV_ARM_GCC_V4) ) /* ARM GNU COMPILER */ 34559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 34659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2) 34759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 34859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 out; 34959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 temp1; 35059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 ss = sad; 35159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 tt = tmp; 35259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 uu = tmp2; 35359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 35459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong asm volatile("rsbs %1, %4, %3\n\t" 35559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "rsbmi %1, %1, #0\n\t" 35659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "add %0, %2, %1" 35759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong : "=&r"(out), 35859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "=&r"(temp1) 35959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong : "r"(ss), 36059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "r"(tt), 36159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "r"(uu)); 36259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return out; 36359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 36459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 36559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask) 36659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{ 36759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 out; 36859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 temp1; 36959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 s1 = src1; 37059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 s2 = src2; 37159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 mm = mask; 37259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 37359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong asm volatile("eor %0, %3, %2\n\t" 37459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "subs %1, %3, %2\n\t" 37559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "eor %0, %0, %1\n\t" 37659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "and %0, %4, %0, lsr #1\n\t" 37759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "orrcc %0, %0, #0x80000000\n\t" 37859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "rsb %0, %0, %0, lsl #8\n\t" 37959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "add %1, %1, %0, asr #7\n\t" 38059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "eor %1, %1, %0, asr #7" 38159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong : "=&r"(out), 38259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "=&r"(temp1) 38359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong : "r"(s1), 38459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "r"(s2), 38559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "r"(mm)); 38659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 38759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return temp1; 38859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 38959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 39059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask) 39159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{ 39259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 out; 39359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 temp1; 39459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 s1 = src1; 39559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 s2 = src2; 39659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong register int32 mm = mask; 39759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 39859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong asm volatile("eor %1, %3, %2\n\t" 39959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "adds %0, %3, %2\n\t" 40059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "eor %1, %1, %0\n\t" 40159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "ands %1, %4, %1,rrx\n\t" 40259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "rsb %1, %1, %1, lsl #8\n\t" 40359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "sub %0, %0, %1, asr #7\n\t" 40459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "eor %0, %0, %1, asr #7" 40559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong : "=&r"(out), 40659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "=&r"(temp1) 40759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong : "r"(s1), 40859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "r"(s2), 40959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "r"(mm)); 41059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 41159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return (out); 41259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 41359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 41459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define sum_accumulate asm volatile("sbc %0, %0, %1\n\t" \ 41559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "bic %1, %4, %1\n\t" \ 41659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "add %2, %2, %1, lsr #8\n\t" \ 41759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "sbc %0, %0, %3\n\t" \ 41859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "bic %3, %4, %3\n\t" \ 41959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "add %2, %2, %3, lsr #8" \ 42059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong :"+r"(x5), "+r"(x10), "+r"(x4), "+r"(x11) \ 42159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong :"r"(x6)); 42259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 42359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define NUMBER 3 42459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define SHIFT 24 42559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define INC_X8 0x08000001 42659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 42759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "sad_mb_offset.h" 42859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 42959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef NUMBER 43059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define NUMBER 2 43159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef SHIFT 43259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define SHIFT 16 43359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef INC_X8 43459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define INC_X8 0x10000001 43559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "sad_mb_offset.h" 43659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 43759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef NUMBER 43859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define NUMBER 1 43959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef SHIFT 44059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define SHIFT 8 44159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#undef INC_X8 44259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define INC_X8 0x08000001 44359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "sad_mb_offset.h" 44459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 44559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 44659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __inline int32 simd_sad_mb(UChar *ref, UChar *blk, Int dmin, Int lx) 44759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{ 44859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; 44959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 45059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x9 = 0x80808080; /* const. */ 45159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x5 = 0; 45259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 45359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x8 = (uint32)ref & 0x3; 45459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (x8 == 3) 45559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong goto SadMBOffset3; 45659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (x8 == 2) 45759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong goto SadMBOffset2; 45859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (x8 == 1) 45959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong goto SadMBOffset1; 46059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 46159f566c4ec3dfc097ad8163523e522280b27e5c3James Dongasm volatile("mvn %0, #0xFF00": "=r"(x6)); 46259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 46359f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD0: 46459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****** process 8 pixels ******/ 46559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = *((int32*)(ref + 12)); 46659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = *((int32*)(ref + 8)); 46759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x14 = *((int32*)(blk + 12)); 46859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x12 = *((int32*)(blk + 8)); 46959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 47059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x11 & x14 */ 47159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = sad_4pixel(x11, x14, x9); 47259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 47359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x12 & x10 */ 47459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = sad_4pixel(x10, x12, x9); 47559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 47659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x10; /* accumulate low bytes */ 47759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 47859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 47959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x11; /* accumulate low bytes */ 48059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 48159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 48259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 48359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong asm volatile("ldr %0, [%4, #4]\n\t" 48459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "ldr %1, [%4], %6\n\t" 48559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "ldr %2, [%5, #4]\n\t" 48659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "ldr %3, [%5], #16" 48759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong : "=r"(x11), "=r"(x10), "=r"(x14), "=r"(x12), "+r"(ref), "+r"(blk) 48859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong : "r"(lx)); 48959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 49059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x11 & x14 */ 49159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = sad_4pixel(x11, x14, x9); 49259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 49359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x12 & x10 */ 49459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = sad_4pixel(x10, x12, x9); 49559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 49659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x10; /* accumulate low bytes */ 49759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 49859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 49959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x11; /* accumulate low bytes */ 50059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 50159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 50259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 50359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****************/ 50459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x5 - (x4 << 8); /* extract low bytes */ 50559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 + x4; /* add with high bytes */ 50659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 + (x10 << 16); /* add with lower half word */ 50759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 50859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (((uint32)x10 >> 16) <= (uint32)dmin) /* compare with dmin */ 50959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 51059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (--x8) 51159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 51259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong goto LOOP_SAD0; 51359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 51459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 51559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 51659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 51759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return ((uint32)x10 >> 16); 51859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 51959f566c4ec3dfc097ad8163523e522280b27e5c3James DongSadMBOffset3: 52059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 52159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return sad_mb_offset3(ref, blk, lx, dmin); 52259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 52359f566c4ec3dfc097ad8163523e522280b27e5c3James DongSadMBOffset2: 52459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 52559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return sad_mb_offset2(ref, blk, lx, dmin); 52659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 52759f566c4ec3dfc097ad8163523e522280b27e5c3James DongSadMBOffset1: 52859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 52959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return sad_mb_offset1(ref, blk, lx, dmin); 53059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 53159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 53259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif // OS 53359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 53459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#ifdef __cplusplus 53559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong} 53659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif 53759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 53859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif // _SAD_INLINE_H_ 53959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 540