sad_mb_offset.h revision f5af6314db25ff3bef9bd2eeba201bc6cc60805d
129a84457aed4c45bc900998b5e11c03023264208James Dong/* ------------------------------------------------------------------ 229a84457aed4c45bc900998b5e11c03023264208James Dong * Copyright (C) 1998-2009 PacketVideo 329a84457aed4c45bc900998b5e11c03023264208James Dong * 429a84457aed4c45bc900998b5e11c03023264208James Dong * Licensed under the Apache License, Version 2.0 (the "License"); 529a84457aed4c45bc900998b5e11c03023264208James Dong * you may not use this file except in compliance with the License. 629a84457aed4c45bc900998b5e11c03023264208James Dong * You may obtain a copy of the License at 729a84457aed4c45bc900998b5e11c03023264208James Dong * 829a84457aed4c45bc900998b5e11c03023264208James Dong * http://www.apache.org/licenses/LICENSE-2.0 929a84457aed4c45bc900998b5e11c03023264208James Dong * 1029a84457aed4c45bc900998b5e11c03023264208James Dong * Unless required by applicable law or agreed to in writing, software 1129a84457aed4c45bc900998b5e11c03023264208James Dong * distributed under the License is distributed on an "AS IS" BASIS, 1229a84457aed4c45bc900998b5e11c03023264208James Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 1329a84457aed4c45bc900998b5e11c03023264208James Dong * express or implied. 1429a84457aed4c45bc900998b5e11c03023264208James Dong * See the License for the specific language governing permissions 1529a84457aed4c45bc900998b5e11c03023264208James Dong * and limitations under the License. 1629a84457aed4c45bc900998b5e11c03023264208James Dong * ------------------------------------------------------------------- 1729a84457aed4c45bc900998b5e11c03023264208James Dong */ 1829a84457aed4c45bc900998b5e11c03023264208James Dong 19f5af6314db25ff3bef9bd2eeba201bc6cc60805dMartin Storsjo/* Intentionally not using the gcc asm version, since it (if fixed so 20f5af6314db25ff3bef9bd2eeba201bc6cc60805dMartin Storsjo * as to not crash - the current register constraints are faulty) is 21f5af6314db25ff3bef9bd2eeba201bc6cc60805dMartin Storsjo * slightly slower than the plain C version on modern GCC versions. */ 22f5af6314db25ff3bef9bd2eeba201bc6cc60805dMartin Storsjo#if !defined(__CC_ARM) /* Generic C version */ 2329a84457aed4c45bc900998b5e11c03023264208James Dong 2429a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 2529a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin) 2629a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 2729a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin) 2829a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 2929a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin) 3029a84457aed4c45bc900998b5e11c03023264208James Dong#endif 3129a84457aed4c45bc900998b5e11c03023264208James Dong{ 3229a84457aed4c45bc900998b5e11c03023264208James Dong int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; 3329a84457aed4c45bc900998b5e11c03023264208James Dong 3429a84457aed4c45bc900998b5e11c03023264208James Dong // x5 = (x4<<8) - x4; 3529a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x5 = 0; 3629a84457aed4c45bc900998b5e11c03023264208James Dong x6 = 0xFFFF00FF; 3729a84457aed4c45bc900998b5e11c03023264208James Dong x9 = 0x80808080; /* const. */ 3829a84457aed4c45bc900998b5e11c03023264208James Dong ref -= NUMBER; /* bic ref, ref, #3 */ 3929a84457aed4c45bc900998b5e11c03023264208James Dong ref -= lx; 4029a84457aed4c45bc900998b5e11c03023264208James Dong blk -= 16; 4129a84457aed4c45bc900998b5e11c03023264208James Dong x8 = 16; 4229a84457aed4c45bc900998b5e11c03023264208James Dong 4329a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 4429a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD3: 4529a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 4629a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD2: 4729a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 4829a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD1: 4929a84457aed4c45bc900998b5e11c03023264208James Dong#endif 5029a84457aed4c45bc900998b5e11c03023264208James Dong /****** process 8 pixels ******/ 5129a84457aed4c45bc900998b5e11c03023264208James Dong x10 = *((uint32*)(ref += lx)); /* D C B A */ 5229a84457aed4c45bc900998b5e11c03023264208James Dong x11 = *((uint32*)(ref + 4)); /* H G F E */ 5329a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((uint32*)(ref + 8)); /* L K J I */ 5429a84457aed4c45bc900998b5e11c03023264208James Dong 5529a84457aed4c45bc900998b5e11c03023264208James Dong x10 = ((uint32)x10 >> SHIFT); /* 0 0 0 D */ 5629a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 | (x11 << (32 - SHIFT)); /* G F E D */ 5729a84457aed4c45bc900998b5e11c03023264208James Dong x11 = ((uint32)x11 >> SHIFT); /* 0 0 0 H */ 5829a84457aed4c45bc900998b5e11c03023264208James Dong x11 = x11 | (x12 << (32 - SHIFT)); /* K J I H */ 5929a84457aed4c45bc900998b5e11c03023264208James Dong 6029a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((uint32*)(blk += 16)); 6129a84457aed4c45bc900998b5e11c03023264208James Dong x14 = *((uint32*)(blk + 4)); 6229a84457aed4c45bc900998b5e11c03023264208James Dong 6329a84457aed4c45bc900998b5e11c03023264208James Dong /* process x11 & x14 */ 6429a84457aed4c45bc900998b5e11c03023264208James Dong x11 = sad_4pixel(x11, x14, x9); 6529a84457aed4c45bc900998b5e11c03023264208James Dong 6629a84457aed4c45bc900998b5e11c03023264208James Dong /* process x12 & x10 */ 6729a84457aed4c45bc900998b5e11c03023264208James Dong x10 = sad_4pixel(x10, x12, x9); 6829a84457aed4c45bc900998b5e11c03023264208James Dong 6929a84457aed4c45bc900998b5e11c03023264208James Dong x5 = x5 + x10; /* accumulate low bytes */ 7029a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 7129a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 7229a84457aed4c45bc900998b5e11c03023264208James Dong x5 = x5 + x11; /* accumulate low bytes */ 7329a84457aed4c45bc900998b5e11c03023264208James Dong x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 7429a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 7529a84457aed4c45bc900998b5e11c03023264208James Dong 7629a84457aed4c45bc900998b5e11c03023264208James Dong /****** process 8 pixels ******/ 7729a84457aed4c45bc900998b5e11c03023264208James Dong x10 = *((uint32*)(ref + 8)); /* D C B A */ 7829a84457aed4c45bc900998b5e11c03023264208James Dong x11 = *((uint32*)(ref + 12)); /* H G F E */ 7929a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((uint32*)(ref + 16)); /* L K J I */ 8029a84457aed4c45bc900998b5e11c03023264208James Dong 8129a84457aed4c45bc900998b5e11c03023264208James Dong x10 = ((uint32)x10 >> SHIFT); /* mvn x10, x10, lsr #24 = 0xFF 0xFF 0xFF ~D */ 8229a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 | (x11 << (32 - SHIFT)); /* bic x10, x10, x11, lsl #8 = ~G ~F ~E ~D */ 8329a84457aed4c45bc900998b5e11c03023264208James Dong x11 = ((uint32)x11 >> SHIFT); /* 0xFF 0xFF 0xFF ~H */ 8429a84457aed4c45bc900998b5e11c03023264208James Dong x11 = x11 | (x12 << (32 - SHIFT)); /* ~K ~J ~I ~H */ 8529a84457aed4c45bc900998b5e11c03023264208James Dong 8629a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((uint32*)(blk + 8)); 8729a84457aed4c45bc900998b5e11c03023264208James Dong x14 = *((uint32*)(blk + 12)); 8829a84457aed4c45bc900998b5e11c03023264208James Dong 8929a84457aed4c45bc900998b5e11c03023264208James Dong /* process x11 & x14 */ 9029a84457aed4c45bc900998b5e11c03023264208James Dong x11 = sad_4pixel(x11, x14, x9); 9129a84457aed4c45bc900998b5e11c03023264208James Dong 9229a84457aed4c45bc900998b5e11c03023264208James Dong /* process x12 & x10 */ 9329a84457aed4c45bc900998b5e11c03023264208James Dong x10 = sad_4pixel(x10, x12, x9); 9429a84457aed4c45bc900998b5e11c03023264208James Dong 9529a84457aed4c45bc900998b5e11c03023264208James Dong x5 = x5 + x10; /* accumulate low bytes */ 9629a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 9729a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 9829a84457aed4c45bc900998b5e11c03023264208James Dong x5 = x5 + x11; /* accumulate low bytes */ 9929a84457aed4c45bc900998b5e11c03023264208James Dong x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 10029a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 10129a84457aed4c45bc900998b5e11c03023264208James Dong 10229a84457aed4c45bc900998b5e11c03023264208James Dong /****************/ 10329a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x5 - (x4 << 8); /* extract low bytes */ 10429a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 + x4; /* add with high bytes */ 10529a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 + (x10 << 16); /* add with lower half word */ 10629a84457aed4c45bc900998b5e11c03023264208James Dong 10729a84457aed4c45bc900998b5e11c03023264208James Dong if ((int)((uint32)x10 >> 16) <= dmin) /* compare with dmin */ 10829a84457aed4c45bc900998b5e11c03023264208James Dong { 10929a84457aed4c45bc900998b5e11c03023264208James Dong if (--x8) 11029a84457aed4c45bc900998b5e11c03023264208James Dong { 11129a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 11229a84457aed4c45bc900998b5e11c03023264208James Dong goto LOOP_SAD3; 11329a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 11429a84457aed4c45bc900998b5e11c03023264208James Dong goto LOOP_SAD2; 11529a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 11629a84457aed4c45bc900998b5e11c03023264208James Dong goto LOOP_SAD1; 11729a84457aed4c45bc900998b5e11c03023264208James Dong#endif 11829a84457aed4c45bc900998b5e11c03023264208James Dong } 11929a84457aed4c45bc900998b5e11c03023264208James Dong 12029a84457aed4c45bc900998b5e11c03023264208James Dong } 12129a84457aed4c45bc900998b5e11c03023264208James Dong 12229a84457aed4c45bc900998b5e11c03023264208James Dong return ((uint32)x10 >> 16); 12329a84457aed4c45bc900998b5e11c03023264208James Dong} 12429a84457aed4c45bc900998b5e11c03023264208James Dong 12529a84457aed4c45bc900998b5e11c03023264208James Dong#elif defined(__CC_ARM) /* only work with arm v5 */ 12629a84457aed4c45bc900998b5e11c03023264208James Dong 12729a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 12829a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8) 12929a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 13029a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8) 13129a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 13229a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8) 13329a84457aed4c45bc900998b5e11c03023264208James Dong#endif 13429a84457aed4c45bc900998b5e11c03023264208James Dong{ 13529a84457aed4c45bc900998b5e11c03023264208James Dong int32 x4, x5, x6, x9, x10, x11, x12, x14; 13629a84457aed4c45bc900998b5e11c03023264208James Dong 13729a84457aed4c45bc900998b5e11c03023264208James Dong x9 = 0x80808080; /* const. */ 13829a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x5 = 0; 13929a84457aed4c45bc900998b5e11c03023264208James Dong 14029a84457aed4c45bc900998b5e11c03023264208James Dong __asm{ 14129a84457aed4c45bc900998b5e11c03023264208James Dong MVN x6, #0xff0000; 14229a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 14329a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD3: 14429a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 14529a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD2: 14629a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 14729a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD1: 14829a84457aed4c45bc900998b5e11c03023264208James Dong#endif 14929a84457aed4c45bc900998b5e11c03023264208James Dong BIC ref, ref, #3; 15029a84457aed4c45bc900998b5e11c03023264208James Dong } 15129a84457aed4c45bc900998b5e11c03023264208James Dong /****** process 8 pixels ******/ 15229a84457aed4c45bc900998b5e11c03023264208James Dong x11 = *((int32*)(ref + 12)); 15329a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((int32*)(ref + 16)); 15429a84457aed4c45bc900998b5e11c03023264208James Dong x10 = *((int32*)(ref + 8)); 15529a84457aed4c45bc900998b5e11c03023264208James Dong x14 = *((int32*)(blk + 12)); 15629a84457aed4c45bc900998b5e11c03023264208James Dong 15729a84457aed4c45bc900998b5e11c03023264208James Dong __asm{ 15829a84457aed4c45bc900998b5e11c03023264208James Dong MVN x10, x10, lsr #SHIFT; 15929a84457aed4c45bc900998b5e11c03023264208James Dong BIC x10, x10, x11, lsl #(32-SHIFT); 16029a84457aed4c45bc900998b5e11c03023264208James Dong MVN x11, x11, lsr #SHIFT; 16129a84457aed4c45bc900998b5e11c03023264208James Dong BIC x11, x11, x12, lsl #(32-SHIFT); 16229a84457aed4c45bc900998b5e11c03023264208James Dong 16329a84457aed4c45bc900998b5e11c03023264208James Dong LDR x12, [blk, #8]; 16429a84457aed4c45bc900998b5e11c03023264208James Dong } 16529a84457aed4c45bc900998b5e11c03023264208James Dong 16629a84457aed4c45bc900998b5e11c03023264208James Dong /* process x11 & x14 */ 16729a84457aed4c45bc900998b5e11c03023264208James Dong x11 = sad_4pixelN(x11, x14, x9); 16829a84457aed4c45bc900998b5e11c03023264208James Dong 16929a84457aed4c45bc900998b5e11c03023264208James Dong /* process x12 & x10 */ 17029a84457aed4c45bc900998b5e11c03023264208James Dong x10 = sad_4pixelN(x10, x12, x9); 17129a84457aed4c45bc900998b5e11c03023264208James Dong 17229a84457aed4c45bc900998b5e11c03023264208James Dong sum_accumulate; 17329a84457aed4c45bc900998b5e11c03023264208James Dong 17429a84457aed4c45bc900998b5e11c03023264208James Dong __asm{ 17529a84457aed4c45bc900998b5e11c03023264208James Dong /****** process 8 pixels ******/ 17629a84457aed4c45bc900998b5e11c03023264208James Dong LDR x11, [ref, #4]; 17729a84457aed4c45bc900998b5e11c03023264208James Dong LDR x12, [ref, #8]; 17829a84457aed4c45bc900998b5e11c03023264208James Dong LDR x10, [ref], lx ; 17929a84457aed4c45bc900998b5e11c03023264208James Dong LDR x14, [blk, #4]; 18029a84457aed4c45bc900998b5e11c03023264208James Dong 18129a84457aed4c45bc900998b5e11c03023264208James Dong MVN x10, x10, lsr #SHIFT; 18229a84457aed4c45bc900998b5e11c03023264208James Dong BIC x10, x10, x11, lsl #(32-SHIFT); 18329a84457aed4c45bc900998b5e11c03023264208James Dong MVN x11, x11, lsr #SHIFT; 18429a84457aed4c45bc900998b5e11c03023264208James Dong BIC x11, x11, x12, lsl #(32-SHIFT); 18529a84457aed4c45bc900998b5e11c03023264208James Dong 18629a84457aed4c45bc900998b5e11c03023264208James Dong LDR x12, [blk], #16; 18729a84457aed4c45bc900998b5e11c03023264208James Dong } 18829a84457aed4c45bc900998b5e11c03023264208James Dong 18929a84457aed4c45bc900998b5e11c03023264208James Dong /* process x11 & x14 */ 19029a84457aed4c45bc900998b5e11c03023264208James Dong x11 = sad_4pixelN(x11, x14, x9); 19129a84457aed4c45bc900998b5e11c03023264208James Dong 19229a84457aed4c45bc900998b5e11c03023264208James Dong /* process x12 & x10 */ 19329a84457aed4c45bc900998b5e11c03023264208James Dong x10 = sad_4pixelN(x10, x12, x9); 19429a84457aed4c45bc900998b5e11c03023264208James Dong 19529a84457aed4c45bc900998b5e11c03023264208James Dong sum_accumulate; 19629a84457aed4c45bc900998b5e11c03023264208James Dong 19729a84457aed4c45bc900998b5e11c03023264208James Dong /****************/ 19829a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x5 - (x4 << 8); /* extract low bytes */ 19929a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 + x4; /* add with high bytes */ 20029a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 + (x10 << 16); /* add with lower half word */ 20129a84457aed4c45bc900998b5e11c03023264208James Dong 20229a84457aed4c45bc900998b5e11c03023264208James Dong __asm{ 20329a84457aed4c45bc900998b5e11c03023264208James Dong RSBS x11, dmin, x10, lsr #16 20429a84457aed4c45bc900998b5e11c03023264208James Dong ADDLSS x8, x8, #INC_X8 20529a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 20629a84457aed4c45bc900998b5e11c03023264208James Dong BLS LOOP_SAD3; 20729a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 20829a84457aed4c45bc900998b5e11c03023264208James DongBLS LOOP_SAD2; 20929a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 21029a84457aed4c45bc900998b5e11c03023264208James DongBLS LOOP_SAD1; 21129a84457aed4c45bc900998b5e11c03023264208James Dong#endif 21229a84457aed4c45bc900998b5e11c03023264208James Dong } 21329a84457aed4c45bc900998b5e11c03023264208James Dong 21429a84457aed4c45bc900998b5e11c03023264208James Dong return ((uint32)x10 >> 16); 21529a84457aed4c45bc900998b5e11c03023264208James Dong} 21629a84457aed4c45bc900998b5e11c03023264208James Dong 21729a84457aed4c45bc900998b5e11c03023264208James Dong#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */ 21829a84457aed4c45bc900998b5e11c03023264208James Dong 21929a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 22029a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin) 22129a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 22229a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin) 22329a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 22429a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin) 22529a84457aed4c45bc900998b5e11c03023264208James Dong#endif 22629a84457aed4c45bc900998b5e11c03023264208James Dong{ 22729a84457aed4c45bc900998b5e11c03023264208James Dong int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; 22829a84457aed4c45bc900998b5e11c03023264208James Dong 22929a84457aed4c45bc900998b5e11c03023264208James Dong x9 = 0x80808080; /* const. */ 23029a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x5 = 0; 23129a84457aed4c45bc900998b5e11c03023264208James Dong x8 = 16; //<<===========******* 23229a84457aed4c45bc900998b5e11c03023264208James Dong 23329a84457aed4c45bc900998b5e11c03023264208James Dong__asm__ volatile("MVN %0, #0xFF0000": "=r"(x6)); 23429a84457aed4c45bc900998b5e11c03023264208James Dong 23529a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 23629a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD3: 23729a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 23829a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD2: 23929a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 24029a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD1: 24129a84457aed4c45bc900998b5e11c03023264208James Dong#endif 24229a84457aed4c45bc900998b5e11c03023264208James Dong__asm__ volatile("BIC %0, %0, #3": "=r"(ref)); 24329a84457aed4c45bc900998b5e11c03023264208James Dong /****** process 8 pixels ******/ 24429a84457aed4c45bc900998b5e11c03023264208James Dong x11 = *((int32*)(ref + 12)); 24529a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((int32*)(ref + 16)); 24629a84457aed4c45bc900998b5e11c03023264208James Dong x10 = *((int32*)(ref + 8)); 24729a84457aed4c45bc900998b5e11c03023264208James Dong x14 = *((int32*)(blk + 12)); 24829a84457aed4c45bc900998b5e11c03023264208James Dong 24929a84457aed4c45bc900998b5e11c03023264208James Dong#if (SHIFT==8) 25029a84457aed4c45bc900998b5e11c03023264208James Dong__asm__ volatile("MVN %0, %0, lsr #8\n\tBIC %0, %0, %1,lsl #24\n\tMVN %1, %1,lsr #8\n\tBIC %1, %1, %2,lsl #24": "=&r"(x10), "=&r"(x11): "r"(x12)); 25129a84457aed4c45bc900998b5e11c03023264208James Dong#elif (SHIFT==16) 25229a84457aed4c45bc900998b5e11c03023264208James Dong__asm__ volatile("MVN %0, %0, lsr #16\n\tBIC %0, %0, %1,lsl #16\n\tMVN %1, %1,lsr #16\n\tBIC %1, %1, %2,lsl #16": "=&r"(x10), "=&r"(x11): "r"(x12)); 25329a84457aed4c45bc900998b5e11c03023264208James Dong#elif (SHIFT==24) 25429a84457aed4c45bc900998b5e11c03023264208James Dong__asm__ volatile("MVN %0, %0, lsr #24\n\tBIC %0, %0, %1,lsl #8\n\tMVN %1, %1,lsr #24\n\tBIC %1, %1, %2,lsl #8": "=&r"(x10), "=&r"(x11): "r"(x12)); 25529a84457aed4c45bc900998b5e11c03023264208James Dong#endif 25629a84457aed4c45bc900998b5e11c03023264208James Dong 25729a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((int32*)(blk + 8)); 25829a84457aed4c45bc900998b5e11c03023264208James Dong 25929a84457aed4c45bc900998b5e11c03023264208James Dong /* process x11 & x14 */ 26029a84457aed4c45bc900998b5e11c03023264208James Dong x11 = sad_4pixelN(x11, x14, x9); 26129a84457aed4c45bc900998b5e11c03023264208James Dong 26229a84457aed4c45bc900998b5e11c03023264208James Dong /* process x12 & x10 */ 26329a84457aed4c45bc900998b5e11c03023264208James Dong x10 = sad_4pixelN(x10, x12, x9); 26429a84457aed4c45bc900998b5e11c03023264208James Dong 26529a84457aed4c45bc900998b5e11c03023264208James Dong sum_accumulate; 26629a84457aed4c45bc900998b5e11c03023264208James Dong 26729a84457aed4c45bc900998b5e11c03023264208James Dong /****** process 8 pixels ******/ 26829a84457aed4c45bc900998b5e11c03023264208James Dong x11 = *((int32*)(ref + 4)); 26929a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((int32*)(ref + 8)); 27029a84457aed4c45bc900998b5e11c03023264208James Dong x10 = *((int32*)ref); ref += lx; 27129a84457aed4c45bc900998b5e11c03023264208James Dong x14 = *((int32*)(blk + 4)); 27229a84457aed4c45bc900998b5e11c03023264208James Dong 27329a84457aed4c45bc900998b5e11c03023264208James Dong#if (SHIFT==8) 27429a84457aed4c45bc900998b5e11c03023264208James Dong__asm__ volatile("MVN %0, %0, lsr #8\n\tBIC %0, %0, %1,lsl #24\n\tMVN %1, %1,lsr #8\n\tBIC %1, %1, %2,lsl #24": "=&r"(x10), "=&r"(x11): "r"(x12)); 27529a84457aed4c45bc900998b5e11c03023264208James Dong#elif (SHIFT==16) 27629a84457aed4c45bc900998b5e11c03023264208James Dong__asm__ volatile("MVN %0, %0, lsr #16\n\tBIC %0, %0, %1,lsl #16\n\tMVN %1, %1,lsr #16\n\tBIC %1, %1, %2,lsl #16": "=&r"(x10), "=&r"(x11): "r"(x12)); 27729a84457aed4c45bc900998b5e11c03023264208James Dong#elif (SHIFT==24) 27829a84457aed4c45bc900998b5e11c03023264208James Dong__asm__ volatile("MVN %0, %0, lsr #24\n\tBIC %0, %0, %1,lsl #8\n\tMVN %1, %1,lsr #24\n\tBIC %1, %1, %2,lsl #8": "=&r"(x10), "=&r"(x11): "r"(x12)); 27929a84457aed4c45bc900998b5e11c03023264208James Dong#endif 28029a84457aed4c45bc900998b5e11c03023264208James Dong__asm__ volatile("LDR %0, [%1], #16": "=&r"(x12), "=r"(blk)); 28129a84457aed4c45bc900998b5e11c03023264208James Dong 28229a84457aed4c45bc900998b5e11c03023264208James Dong /* process x11 & x14 */ 28329a84457aed4c45bc900998b5e11c03023264208James Dong x11 = sad_4pixelN(x11, x14, x9); 28429a84457aed4c45bc900998b5e11c03023264208James Dong 28529a84457aed4c45bc900998b5e11c03023264208James Dong /* process x12 & x10 */ 28629a84457aed4c45bc900998b5e11c03023264208James Dong x10 = sad_4pixelN(x10, x12, x9); 28729a84457aed4c45bc900998b5e11c03023264208James Dong 28829a84457aed4c45bc900998b5e11c03023264208James Dong sum_accumulate; 28929a84457aed4c45bc900998b5e11c03023264208James Dong 29029a84457aed4c45bc900998b5e11c03023264208James Dong /****************/ 29129a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x5 - (x4 << 8); /* extract low bytes */ 29229a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 + x4; /* add with high bytes */ 29329a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 + (x10 << 16); /* add with lower half word */ 29429a84457aed4c45bc900998b5e11c03023264208James Dong 29529a84457aed4c45bc900998b5e11c03023264208James Dong if (((uint32)x10 >> 16) <= (uint32)dmin) /* compare with dmin */ 29629a84457aed4c45bc900998b5e11c03023264208James Dong { 29729a84457aed4c45bc900998b5e11c03023264208James Dong if (--x8) 29829a84457aed4c45bc900998b5e11c03023264208James Dong { 29929a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 30029a84457aed4c45bc900998b5e11c03023264208James Dong goto LOOP_SAD3; 30129a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 30229a84457aed4c45bc900998b5e11c03023264208James Donggoto LOOP_SAD2; 30329a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 30429a84457aed4c45bc900998b5e11c03023264208James Donggoto LOOP_SAD1; 30529a84457aed4c45bc900998b5e11c03023264208James Dong#endif 30629a84457aed4c45bc900998b5e11c03023264208James Dong } 30729a84457aed4c45bc900998b5e11c03023264208James Dong 30829a84457aed4c45bc900998b5e11c03023264208James Dong } 30929a84457aed4c45bc900998b5e11c03023264208James Dong 31029a84457aed4c45bc900998b5e11c03023264208James Dong return ((uint32)x10 >> 16); 31129a84457aed4c45bc900998b5e11c03023264208James Dong} 31229a84457aed4c45bc900998b5e11c03023264208James Dong 31329a84457aed4c45bc900998b5e11c03023264208James Dong#endif 31429a84457aed4c45bc900998b5e11c03023264208James Dong 315