159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* ------------------------------------------------------------------ 259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Copyright (C) 1998-2009 PacketVideo 359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * 459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Licensed under the Apache License, Version 2.0 (the "License"); 559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * you may not use this file except in compliance with the License. 659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * You may obtain a copy of the License at 759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * 859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * http://www.apache.org/licenses/LICENSE-2.0 959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * 1059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Unless required by applicable law or agreed to in writing, software 1159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * distributed under the License is distributed on an "AS IS" BASIS, 1259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 1359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * express or implied. 1459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * See the License for the specific language governing permissions 1559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * and limitations under the License. 1659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * ------------------------------------------------------------------- 1759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong */ 1859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*********************************************************************************/ 1959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* Filename: sad_mb_offset.h */ 2059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* Description: Implementation for in-line functions used in dct.cpp */ 2159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* Modified: */ 2259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*********************************************************************************/ 2359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 2459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if !defined(PV_ARM_GCC_V4) && !defined(PV_ARM_GCC_V5) /* ARM GNU COMPILER */ 2559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 2659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3) 2759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset3(UChar *ref, UChar *blk, Int lx, Int dmin) 2859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2) 2959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset2(UChar *ref, UChar *blk, Int lx, Int dmin) 3059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1) 3159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset1(UChar *ref, UChar *blk, Int lx, Int dmin) 3259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif 3359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{ 3459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; 3559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 3659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong // x5 = (x4<<8) - x4; 3759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x5 = 0; 3859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x6 = 0xFFFF00FF; 3959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x9 = 0x80808080; /* const. */ 4059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong ref -= NUMBER; /* bic ref, ref, #3 */ 4159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong ref -= lx; 4259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong blk -= 16; 4359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x8 = 16; 4459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 4559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3) 4659f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD3: 4759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2) 4859f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD2: 4959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1) 5059f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD1: 5159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif 5259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****** process 8 pixels ******/ 5359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = *((uint32*)(ref += lx)); /* D C B A */ 5459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = *((uint32*)(ref + 4)); /* H G F E */ 5559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x12 = *((uint32*)(ref + 8)); /* L K J I */ 5659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 5759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = ((uint32)x10 >> SHIFT); /* 0 0 0 D */ 5859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 | (x11 << (32 - SHIFT)); /* G F E D */ 5959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = ((uint32)x11 >> SHIFT); /* 0 0 0 H */ 6059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = x11 | (x12 << (32 - SHIFT)); /* K J I H */ 6159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 6259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x12 = *((uint32*)(blk += 16)); 6359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x14 = *((uint32*)(blk + 4)); 6459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 6559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x11 & x14 */ 6659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = sad_4pixel(x11, x14, x9); 6759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 6859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x12 & x10 */ 6959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = sad_4pixel(x10, x12, x9); 7059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 7159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x10; /* accumulate low bytes */ 7259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 7359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 7459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x11; /* accumulate low bytes */ 7559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 7659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 7759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 7859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****** process 8 pixels ******/ 7959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = *((uint32*)(ref + 8)); /* D C B A */ 8059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = *((uint32*)(ref + 12)); /* H G F E */ 8159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x12 = *((uint32*)(ref + 16)); /* L K J I */ 8259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 8359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = ((uint32)x10 >> SHIFT); /* mvn x10, x10, lsr #24 = 0xFF 0xFF 0xFF ~D */ 8459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 | (x11 << (32 - SHIFT)); /* bic x10, x10, x11, lsl #8 = ~G ~F ~E ~D */ 8559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = ((uint32)x11 >> SHIFT); /* 0xFF 0xFF 0xFF ~H */ 8659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = x11 | (x12 << (32 - SHIFT)); /* ~K ~J ~I ~H */ 8759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 8859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x12 = *((uint32*)(blk + 8)); 8959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x14 = *((uint32*)(blk + 12)); 9059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 9159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x11 & x14 */ 9259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = sad_4pixel(x11, x14, x9); 9359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 9459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x12 & x10 */ 9559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = sad_4pixel(x10, x12, x9); 9659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 9759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x10; /* accumulate low bytes */ 9859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 9959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 10059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x5 = x5 + x11; /* accumulate low bytes */ 10159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 10259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 10359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 10459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****************/ 10559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x5 - (x4 << 8); /* extract low bytes */ 10659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 + x4; /* add with high bytes */ 10759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 + (x10 << 16); /* add with lower half word */ 10859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 10959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (((uint32)x10 >> 16) <= (uint32)dmin) /* compare with dmin */ 11059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 11159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (--x8) 11259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 11359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3) 11459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong goto LOOP_SAD3; 11559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2) 11659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong goto LOOP_SAD2; 11759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1) 11859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong goto LOOP_SAD1; 11959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif 12059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 12159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 12259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 12359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 12459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return ((uint32)x10 >> 16); 12559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong} 12659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 12759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif defined(__CC_ARM) /* only work with arm v5 */ 12859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 12959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3) 13059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset3(UChar *ref, UChar *blk, Int lx, Int dmin, int32 x8) 13159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2) 13259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset2(UChar *ref, UChar *blk, Int lx, Int dmin, int32 x8) 13359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1) 13459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset1(UChar *ref, UChar *blk, Int lx, Int dmin, int32 x8) 13559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif 13659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{ 13759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong int32 x4, x5, x6, x9, x10, x11, x12, x14; 13859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 13959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x9 = 0x80808080; /* const. */ 14059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x5 = 0; 14159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 14259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __asm{ 14359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong MVN x6, #0xff0000; 14459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong BIC ref, ref, #3; 14559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 14659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3) 14759f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD3: 14859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2) 14959f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD2: 15059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1) 15159f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD1: 15259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif 15359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 15459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****** process 8 pixels ******/ 15559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = *((int32*)(ref + 12)); 15659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x12 = *((int32*)(ref + 16)); 15759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = *((int32*)(ref + 8)); 15859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x14 = *((int32*)(blk + 12)); 15959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 16059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __asm{ 16159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong MVN x10, x10, lsr #SHIFT; 16259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong BIC x10, x10, x11, lsl #(32-SHIFT); 16359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong MVN x11, x11, lsr #SHIFT; 16459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong BIC x11, x11, x12, lsl #(32-SHIFT); 16559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 16659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong LDR x12, [blk, #8]; 16759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 16859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 16959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x11 & x14 */ 17059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = sad_4pixelN(x11, x14, x9); 17159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 17259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x12 & x10 */ 17359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = sad_4pixelN(x10, x12, x9); 17459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 17559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong sum_accumulate; 17659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 17759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __asm{ 17859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****** process 8 pixels ******/ 17959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong LDR x11, [ref, #4]; 18059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong LDR x12, [ref, #8]; 18159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong LDR x10, [ref], lx ; 18259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong LDR x14, [blk, #4]; 18359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 18459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong MVN x10, x10, lsr #SHIFT; 18559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong BIC x10, x10, x11, lsl #(32-SHIFT); 18659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong MVN x11, x11, lsr #SHIFT; 18759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong BIC x11, x11, x12, lsl #(32-SHIFT); 18859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 18959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong LDR x12, [blk], #16; 19059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 19159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 19259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x11 & x14 */ 19359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = sad_4pixelN(x11, x14, x9); 19459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 19559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x12 & x10 */ 19659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = sad_4pixelN(x10, x12, x9); 19759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 19859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong sum_accumulate; 19959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 20059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****************/ 20159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x5 - (x4 << 8); /* extract low bytes */ 20259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 + x4; /* add with high bytes */ 20359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 + (x10 << 16); /* add with lower half word */ 20459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 20559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong __asm{ 20659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong RSBS x11, dmin, x10, lsr #16 20759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong ADDLSS x8, x8, #INC_X8 20859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3) 20959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong BLS LOOP_SAD3; 21059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2) 21159f566c4ec3dfc097ad8163523e522280b27e5c3James DongBLS LOOP_SAD2; 21259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1) 21359f566c4ec3dfc097ad8163523e522280b27e5c3James DongBLS LOOP_SAD1; 21459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif 21559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 21659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 21759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return ((uint32)x10 >> 16); 21859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong} 21959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 22059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif ( defined(PV_ARM_GCC_V5) || defined(PV_ARM_GCC_V4) ) /* ARM GNU COMPILER */ 22159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 22259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3) 22359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset3(UChar *ref, UChar *blk, Int lx, Int dmin) 22459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2) 22559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset2(UChar *ref, UChar *blk, Int lx, Int dmin) 22659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1) 22759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong__inline int32 sad_mb_offset1(UChar *ref, UChar *blk, Int lx, Int dmin) 22859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif 22959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{ 23059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; 23159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 23259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong // x5 = (x4<<8) - x4; 23359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x4 = x5 = 0; 23459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x6 = 0xFFFF00FF; 23559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x9 = 0x80808080; /* const. */ 23659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong ref -= NUMBER; /* bic ref, ref, #3 */ 23759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong ref -= lx; 23859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x8 = 16; 23959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 24059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3) 24159f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD3: 24259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2) 24359f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD2: 24459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1) 24559f566c4ec3dfc097ad8163523e522280b27e5c3James DongLOOP_SAD1: 24659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif 24759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****** process 8 pixels ******/ 24859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = *((uint32*)(ref += lx)); /* D C B A */ 24959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = *((uint32*)(ref + 4)); /* H G F E */ 25059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x12 = *((uint32*)(ref + 8)); /* L K J I */ 25159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 25259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong int32 shift = SHIFT; 25359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong int32 shift2 = 32 - SHIFT; 25459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong asm volatile("ldr %3, [%4, #4]\n\t" 25559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "mvn %0, %0, lsr %5\n\t" 25659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "bic %0, %0, %1, lsl %6\n\t" 25759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "mvn %1, %1, lsr %5\n\t" 25859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "bic %1, %1, %2, lsl %6\n\t" 25959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "ldr %2, [%4, #8]" 26059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong : "+r"(x10), "+r"(x11), "+r"(x12), "=r"(x14) 26159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong : "r"(blk), "r"(shift), "r"(shift2)); 26259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 26359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x11 & x14 */ 26459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = sad_4pixel(x11, x14, x9); 26559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 26659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x12 & x10 */ 26759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = sad_4pixel(x10, x12, x9); 26859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 26959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong sum_accumulate; 27059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 27159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****** process 8 pixels ******/ 27259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = *((uint32*)(ref + 8)); /* D C B A */ 27359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = *((uint32*)(ref + 12)); /* H G F E */ 27459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x12 = *((uint32*)(ref + 16)); /* L K J I */ 27559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 27659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong asm volatile("ldr %3, [%4, #4]\n\t" 27759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "mvn %0, %0, lsr %5\n\t" 27859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "bic %0, %0, %1, lsl %6\n\t" 27959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "mvn %1, %1, lsr %5\n\t" 28059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "bic %1, %1, %2, lsl %6\n\t" 28159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong "ldr %2, [%4, #8]" 28259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong : "+r"(x10), "+r"(x11), "+r"(x12), "=r"(x14) 28359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong : "r"(blk), "r"(shift), "r"(shift2)); 28459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 28559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x11 & x14 */ 28659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x11 = sad_4pixel(x11, x14, x9); 28759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 28859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /* process x12 & x10 */ 28959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = sad_4pixel(x10, x12, x9); 29059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 29159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong sum_accumulate; 29259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 29359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong /****************/ 29459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x5 - (x4 << 8); /* extract low bytes */ 29559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 + x4; /* add with high bytes */ 29659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong x10 = x10 + (x10 << 16); /* add with lower half word */ 29759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 29859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (((uint32)x10 >> 16) <= (uint32)dmin) /* compare with dmin */ 29959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 30059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong if (--x8) 30159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong { 30259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#if (NUMBER==3) 30359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong goto LOOP_SAD3; 30459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==2) 30559f566c4ec3dfc097ad8163523e522280b27e5c3James Donggoto LOOP_SAD2; 30659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#elif (NUMBER==1) 30759f566c4ec3dfc097ad8163523e522280b27e5c3James Donggoto LOOP_SAD1; 30859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif 30959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 31059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 31159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong } 31259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 31359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong return ((uint32)x10 >> 16); 31459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong} 31559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 31659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif 31759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong 318