129a84457aed4c45bc900998b5e11c03023264208James Dong/* ------------------------------------------------------------------ 229a84457aed4c45bc900998b5e11c03023264208James Dong * Copyright (C) 1998-2009 PacketVideo 329a84457aed4c45bc900998b5e11c03023264208James Dong * 429a84457aed4c45bc900998b5e11c03023264208James Dong * Licensed under the Apache License, Version 2.0 (the "License"); 529a84457aed4c45bc900998b5e11c03023264208James Dong * you may not use this file except in compliance with the License. 629a84457aed4c45bc900998b5e11c03023264208James Dong * You may obtain a copy of the License at 729a84457aed4c45bc900998b5e11c03023264208James Dong * 829a84457aed4c45bc900998b5e11c03023264208James Dong * http://www.apache.org/licenses/LICENSE-2.0 929a84457aed4c45bc900998b5e11c03023264208James Dong * 1029a84457aed4c45bc900998b5e11c03023264208James Dong * Unless required by applicable law or agreed to in writing, software 1129a84457aed4c45bc900998b5e11c03023264208James Dong * distributed under the License is distributed on an "AS IS" BASIS, 1229a84457aed4c45bc900998b5e11c03023264208James Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 1329a84457aed4c45bc900998b5e11c03023264208James Dong * express or implied. 1429a84457aed4c45bc900998b5e11c03023264208James Dong * See the License for the specific language governing permissions 1529a84457aed4c45bc900998b5e11c03023264208James Dong * and limitations under the License. 1629a84457aed4c45bc900998b5e11c03023264208James Dong * ------------------------------------------------------------------- 1729a84457aed4c45bc900998b5e11c03023264208James Dong */ 1829a84457aed4c45bc900998b5e11c03023264208James Dong 1942d515121f11389df082dd02319904c99dd50cd6Martin Storsjo/* Intentionally not using the gcc asm version, since it is 20f5af6314db25ff3bef9bd2eeba201bc6cc60805dMartin Storsjo * slightly slower than the plain C version on modern GCC versions. */ 21f5af6314db25ff3bef9bd2eeba201bc6cc60805dMartin Storsjo#if !defined(__CC_ARM) /* Generic C version */ 2229a84457aed4c45bc900998b5e11c03023264208James Dong 2329a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 2429a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin) 2529a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 2629a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin) 2729a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 2829a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin) 2929a84457aed4c45bc900998b5e11c03023264208James Dong#endif 3029a84457aed4c45bc900998b5e11c03023264208James Dong{ 3129a84457aed4c45bc900998b5e11c03023264208James Dong int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; 3229a84457aed4c45bc900998b5e11c03023264208James Dong 3329a84457aed4c45bc900998b5e11c03023264208James Dong // x5 = (x4<<8) - x4; 3429a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x5 = 0; 3529a84457aed4c45bc900998b5e11c03023264208James Dong x6 = 0xFFFF00FF; 3629a84457aed4c45bc900998b5e11c03023264208James Dong x9 = 0x80808080; /* const. */ 3729a84457aed4c45bc900998b5e11c03023264208James Dong ref -= NUMBER; /* bic ref, ref, #3 */ 3829a84457aed4c45bc900998b5e11c03023264208James Dong ref -= lx; 3929a84457aed4c45bc900998b5e11c03023264208James Dong blk -= 16; 4029a84457aed4c45bc900998b5e11c03023264208James Dong x8 = 16; 4129a84457aed4c45bc900998b5e11c03023264208James Dong 4229a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 4329a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD3: 4429a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 4529a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD2: 4629a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 4729a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD1: 4829a84457aed4c45bc900998b5e11c03023264208James Dong#endif 4929a84457aed4c45bc900998b5e11c03023264208James Dong /****** process 8 pixels ******/ 5029a84457aed4c45bc900998b5e11c03023264208James Dong x10 = *((uint32*)(ref += lx)); /* D C B A */ 5129a84457aed4c45bc900998b5e11c03023264208James Dong x11 = *((uint32*)(ref + 4)); /* H G F E */ 5229a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((uint32*)(ref + 8)); /* L K J I */ 5329a84457aed4c45bc900998b5e11c03023264208James Dong 5429a84457aed4c45bc900998b5e11c03023264208James Dong x10 = ((uint32)x10 >> SHIFT); /* 0 0 0 D */ 5529a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 | (x11 << (32 - SHIFT)); /* G F E D */ 5629a84457aed4c45bc900998b5e11c03023264208James Dong x11 = ((uint32)x11 >> SHIFT); /* 0 0 0 H */ 5729a84457aed4c45bc900998b5e11c03023264208James Dong x11 = x11 | (x12 << (32 - SHIFT)); /* K J I H */ 5829a84457aed4c45bc900998b5e11c03023264208James Dong 5929a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((uint32*)(blk += 16)); 6029a84457aed4c45bc900998b5e11c03023264208James Dong x14 = *((uint32*)(blk + 4)); 6129a84457aed4c45bc900998b5e11c03023264208James Dong 6229a84457aed4c45bc900998b5e11c03023264208James Dong /* process x11 & x14 */ 6329a84457aed4c45bc900998b5e11c03023264208James Dong x11 = sad_4pixel(x11, x14, x9); 6429a84457aed4c45bc900998b5e11c03023264208James Dong 6529a84457aed4c45bc900998b5e11c03023264208James Dong /* process x12 & x10 */ 6629a84457aed4c45bc900998b5e11c03023264208James Dong x10 = sad_4pixel(x10, x12, x9); 6729a84457aed4c45bc900998b5e11c03023264208James Dong 6829a84457aed4c45bc900998b5e11c03023264208James Dong x5 = x5 + x10; /* accumulate low bytes */ 6929a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 7029a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 7129a84457aed4c45bc900998b5e11c03023264208James Dong x5 = x5 + x11; /* accumulate low bytes */ 7229a84457aed4c45bc900998b5e11c03023264208James Dong x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 7329a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 7429a84457aed4c45bc900998b5e11c03023264208James Dong 7529a84457aed4c45bc900998b5e11c03023264208James Dong /****** process 8 pixels ******/ 7629a84457aed4c45bc900998b5e11c03023264208James Dong x10 = *((uint32*)(ref + 8)); /* D C B A */ 7729a84457aed4c45bc900998b5e11c03023264208James Dong x11 = *((uint32*)(ref + 12)); /* H G F E */ 7829a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((uint32*)(ref + 16)); /* L K J I */ 7929a84457aed4c45bc900998b5e11c03023264208James Dong 8029a84457aed4c45bc900998b5e11c03023264208James Dong x10 = ((uint32)x10 >> SHIFT); /* mvn x10, x10, lsr #24 = 0xFF 0xFF 0xFF ~D */ 8129a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 | (x11 << (32 - SHIFT)); /* bic x10, x10, x11, lsl #8 = ~G ~F ~E ~D */ 8229a84457aed4c45bc900998b5e11c03023264208James Dong x11 = ((uint32)x11 >> SHIFT); /* 0xFF 0xFF 0xFF ~H */ 8329a84457aed4c45bc900998b5e11c03023264208James Dong x11 = x11 | (x12 << (32 - SHIFT)); /* ~K ~J ~I ~H */ 8429a84457aed4c45bc900998b5e11c03023264208James Dong 8529a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((uint32*)(blk + 8)); 8629a84457aed4c45bc900998b5e11c03023264208James Dong x14 = *((uint32*)(blk + 12)); 8729a84457aed4c45bc900998b5e11c03023264208James Dong 8829a84457aed4c45bc900998b5e11c03023264208James Dong /* process x11 & x14 */ 8929a84457aed4c45bc900998b5e11c03023264208James Dong x11 = sad_4pixel(x11, x14, x9); 9029a84457aed4c45bc900998b5e11c03023264208James Dong 9129a84457aed4c45bc900998b5e11c03023264208James Dong /* process x12 & x10 */ 9229a84457aed4c45bc900998b5e11c03023264208James Dong x10 = sad_4pixel(x10, x12, x9); 9329a84457aed4c45bc900998b5e11c03023264208James Dong 9429a84457aed4c45bc900998b5e11c03023264208James Dong x5 = x5 + x10; /* accumulate low bytes */ 9529a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ 9629a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ 9729a84457aed4c45bc900998b5e11c03023264208James Dong x5 = x5 + x11; /* accumulate low bytes */ 9829a84457aed4c45bc900998b5e11c03023264208James Dong x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ 9929a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ 10029a84457aed4c45bc900998b5e11c03023264208James Dong 10129a84457aed4c45bc900998b5e11c03023264208James Dong /****************/ 10229a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x5 - (x4 << 8); /* extract low bytes */ 10329a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 + x4; /* add with high bytes */ 10429a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 + (x10 << 16); /* add with lower half word */ 10529a84457aed4c45bc900998b5e11c03023264208James Dong 10629a84457aed4c45bc900998b5e11c03023264208James Dong if ((int)((uint32)x10 >> 16) <= dmin) /* compare with dmin */ 10729a84457aed4c45bc900998b5e11c03023264208James Dong { 10829a84457aed4c45bc900998b5e11c03023264208James Dong if (--x8) 10929a84457aed4c45bc900998b5e11c03023264208James Dong { 11029a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 11129a84457aed4c45bc900998b5e11c03023264208James Dong goto LOOP_SAD3; 11229a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 11329a84457aed4c45bc900998b5e11c03023264208James Dong goto LOOP_SAD2; 11429a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 11529a84457aed4c45bc900998b5e11c03023264208James Dong goto LOOP_SAD1; 11629a84457aed4c45bc900998b5e11c03023264208James Dong#endif 11729a84457aed4c45bc900998b5e11c03023264208James Dong } 11829a84457aed4c45bc900998b5e11c03023264208James Dong 11929a84457aed4c45bc900998b5e11c03023264208James Dong } 12029a84457aed4c45bc900998b5e11c03023264208James Dong 12129a84457aed4c45bc900998b5e11c03023264208James Dong return ((uint32)x10 >> 16); 12229a84457aed4c45bc900998b5e11c03023264208James Dong} 12329a84457aed4c45bc900998b5e11c03023264208James Dong 12429a84457aed4c45bc900998b5e11c03023264208James Dong#elif defined(__CC_ARM) /* only work with arm v5 */ 12529a84457aed4c45bc900998b5e11c03023264208James Dong 12629a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 12729a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8) 12829a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 12929a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8) 13029a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 13129a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8) 13229a84457aed4c45bc900998b5e11c03023264208James Dong#endif 13329a84457aed4c45bc900998b5e11c03023264208James Dong{ 13429a84457aed4c45bc900998b5e11c03023264208James Dong int32 x4, x5, x6, x9, x10, x11, x12, x14; 13529a84457aed4c45bc900998b5e11c03023264208James Dong 13629a84457aed4c45bc900998b5e11c03023264208James Dong x9 = 0x80808080; /* const. */ 13729a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x5 = 0; 13829a84457aed4c45bc900998b5e11c03023264208James Dong 13929a84457aed4c45bc900998b5e11c03023264208James Dong __asm{ 14029a84457aed4c45bc900998b5e11c03023264208James Dong MVN x6, #0xff0000; 14129a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 14229a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD3: 14329a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 14429a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD2: 14529a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 14629a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD1: 14729a84457aed4c45bc900998b5e11c03023264208James Dong#endif 14829a84457aed4c45bc900998b5e11c03023264208James Dong BIC ref, ref, #3; 14929a84457aed4c45bc900998b5e11c03023264208James Dong } 15029a84457aed4c45bc900998b5e11c03023264208James Dong /****** process 8 pixels ******/ 15129a84457aed4c45bc900998b5e11c03023264208James Dong x11 = *((int32*)(ref + 12)); 15229a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((int32*)(ref + 16)); 15329a84457aed4c45bc900998b5e11c03023264208James Dong x10 = *((int32*)(ref + 8)); 15429a84457aed4c45bc900998b5e11c03023264208James Dong x14 = *((int32*)(blk + 12)); 15529a84457aed4c45bc900998b5e11c03023264208James Dong 15629a84457aed4c45bc900998b5e11c03023264208James Dong __asm{ 15729a84457aed4c45bc900998b5e11c03023264208James Dong MVN x10, x10, lsr #SHIFT; 15829a84457aed4c45bc900998b5e11c03023264208James Dong BIC x10, x10, x11, lsl #(32-SHIFT); 15929a84457aed4c45bc900998b5e11c03023264208James Dong MVN x11, x11, lsr #SHIFT; 16029a84457aed4c45bc900998b5e11c03023264208James Dong BIC x11, x11, x12, lsl #(32-SHIFT); 16129a84457aed4c45bc900998b5e11c03023264208James Dong 16229a84457aed4c45bc900998b5e11c03023264208James Dong LDR x12, [blk, #8]; 16329a84457aed4c45bc900998b5e11c03023264208James Dong } 16429a84457aed4c45bc900998b5e11c03023264208James Dong 16529a84457aed4c45bc900998b5e11c03023264208James Dong /* process x11 & x14 */ 16629a84457aed4c45bc900998b5e11c03023264208James Dong x11 = sad_4pixelN(x11, x14, x9); 16729a84457aed4c45bc900998b5e11c03023264208James Dong 16829a84457aed4c45bc900998b5e11c03023264208James Dong /* process x12 & x10 */ 16929a84457aed4c45bc900998b5e11c03023264208James Dong x10 = sad_4pixelN(x10, x12, x9); 17029a84457aed4c45bc900998b5e11c03023264208James Dong 17129a84457aed4c45bc900998b5e11c03023264208James Dong sum_accumulate; 17229a84457aed4c45bc900998b5e11c03023264208James Dong 17329a84457aed4c45bc900998b5e11c03023264208James Dong __asm{ 17429a84457aed4c45bc900998b5e11c03023264208James Dong /****** process 8 pixels ******/ 17529a84457aed4c45bc900998b5e11c03023264208James Dong LDR x11, [ref, #4]; 17629a84457aed4c45bc900998b5e11c03023264208James Dong LDR x12, [ref, #8]; 17729a84457aed4c45bc900998b5e11c03023264208James Dong LDR x10, [ref], lx ; 17829a84457aed4c45bc900998b5e11c03023264208James Dong LDR x14, [blk, #4]; 17929a84457aed4c45bc900998b5e11c03023264208James Dong 18029a84457aed4c45bc900998b5e11c03023264208James Dong MVN x10, x10, lsr #SHIFT; 18129a84457aed4c45bc900998b5e11c03023264208James Dong BIC x10, x10, x11, lsl #(32-SHIFT); 18229a84457aed4c45bc900998b5e11c03023264208James Dong MVN x11, x11, lsr #SHIFT; 18329a84457aed4c45bc900998b5e11c03023264208James Dong BIC x11, x11, x12, lsl #(32-SHIFT); 18429a84457aed4c45bc900998b5e11c03023264208James Dong 18529a84457aed4c45bc900998b5e11c03023264208James Dong LDR x12, [blk], #16; 18629a84457aed4c45bc900998b5e11c03023264208James Dong } 18729a84457aed4c45bc900998b5e11c03023264208James Dong 18829a84457aed4c45bc900998b5e11c03023264208James Dong /* process x11 & x14 */ 18929a84457aed4c45bc900998b5e11c03023264208James Dong x11 = sad_4pixelN(x11, x14, x9); 19029a84457aed4c45bc900998b5e11c03023264208James Dong 19129a84457aed4c45bc900998b5e11c03023264208James Dong /* process x12 & x10 */ 19229a84457aed4c45bc900998b5e11c03023264208James Dong x10 = sad_4pixelN(x10, x12, x9); 19329a84457aed4c45bc900998b5e11c03023264208James Dong 19429a84457aed4c45bc900998b5e11c03023264208James Dong sum_accumulate; 19529a84457aed4c45bc900998b5e11c03023264208James Dong 19629a84457aed4c45bc900998b5e11c03023264208James Dong /****************/ 19729a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x5 - (x4 << 8); /* extract low bytes */ 19829a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 + x4; /* add with high bytes */ 19929a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 + (x10 << 16); /* add with lower half word */ 20029a84457aed4c45bc900998b5e11c03023264208James Dong 20129a84457aed4c45bc900998b5e11c03023264208James Dong __asm{ 20229a84457aed4c45bc900998b5e11c03023264208James Dong RSBS x11, dmin, x10, lsr #16 20329a84457aed4c45bc900998b5e11c03023264208James Dong ADDLSS x8, x8, #INC_X8 20429a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 20529a84457aed4c45bc900998b5e11c03023264208James Dong BLS LOOP_SAD3; 20629a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 20729a84457aed4c45bc900998b5e11c03023264208James DongBLS LOOP_SAD2; 20829a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 20929a84457aed4c45bc900998b5e11c03023264208James DongBLS LOOP_SAD1; 21029a84457aed4c45bc900998b5e11c03023264208James Dong#endif 21129a84457aed4c45bc900998b5e11c03023264208James Dong } 21229a84457aed4c45bc900998b5e11c03023264208James Dong 21329a84457aed4c45bc900998b5e11c03023264208James Dong return ((uint32)x10 >> 16); 21429a84457aed4c45bc900998b5e11c03023264208James Dong} 21529a84457aed4c45bc900998b5e11c03023264208James Dong 21629a84457aed4c45bc900998b5e11c03023264208James Dong#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */ 21729a84457aed4c45bc900998b5e11c03023264208James Dong 21829a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 21929a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin) 22029a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 22129a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin) 22229a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 22329a84457aed4c45bc900998b5e11c03023264208James Dong__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin) 22429a84457aed4c45bc900998b5e11c03023264208James Dong#endif 22529a84457aed4c45bc900998b5e11c03023264208James Dong{ 22629a84457aed4c45bc900998b5e11c03023264208James Dong int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; 22729a84457aed4c45bc900998b5e11c03023264208James Dong 22829a84457aed4c45bc900998b5e11c03023264208James Dong x9 = 0x80808080; /* const. */ 22929a84457aed4c45bc900998b5e11c03023264208James Dong x4 = x5 = 0; 23029a84457aed4c45bc900998b5e11c03023264208James Dong x8 = 16; //<<===========******* 23129a84457aed4c45bc900998b5e11c03023264208James Dong 232ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo __asm__ volatile("MVN %0, #0xFF0000": "=r"(x6)); 23329a84457aed4c45bc900998b5e11c03023264208James Dong 23429a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 23529a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD3: 23629a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 23729a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD2: 23829a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 23929a84457aed4c45bc900998b5e11c03023264208James DongLOOP_SAD1: 24029a84457aed4c45bc900998b5e11c03023264208James Dong#endif 2413fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo __asm__ volatile("BIC %0, %0, #3": "+r"(ref)); 24229a84457aed4c45bc900998b5e11c03023264208James Dong /****** process 8 pixels ******/ 24329a84457aed4c45bc900998b5e11c03023264208James Dong x11 = *((int32*)(ref + 12)); 24429a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((int32*)(ref + 16)); 24529a84457aed4c45bc900998b5e11c03023264208James Dong x10 = *((int32*)(ref + 8)); 24629a84457aed4c45bc900998b5e11c03023264208James Dong x14 = *((int32*)(blk + 12)); 24729a84457aed4c45bc900998b5e11c03023264208James Dong 24829a84457aed4c45bc900998b5e11c03023264208James Dong#if (SHIFT==8) 249ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo __asm__ volatile( 250ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "MVN %0, %0, lsr #8\n\t" 251ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "BIC %0, %0, %1, lsl #24\n\t" 252ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "MVN %1, %1, lsr #8\n\t" 253ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "BIC %1, %1, %2, lsl #24" 2543fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo : "+r"(x10), "+r"(x11) 255ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo : "r"(x12) 256ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo ); 25729a84457aed4c45bc900998b5e11c03023264208James Dong#elif (SHIFT==16) 258ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo __asm__ volatile( 259ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "MVN %0, %0, lsr #16\n\t" 260ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "BIC %0, %0, %1, lsl #16\n\t" 261ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "MVN %1, %1, lsr #16\n\t" 262ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "BIC %1, %1, %2, lsl #16" 2633fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo : "+r"(x10), "+r"(x11) 264ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo : "r"(x12) 265ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo ); 26629a84457aed4c45bc900998b5e11c03023264208James Dong#elif (SHIFT==24) 267ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo __asm__ volatile( 268ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "MVN %0, %0, lsr #24\n\t" 269ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "BIC %0, %0, %1, lsl #8\n\t" 270ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "MVN %1, %1, lsr #24\n\t" 271ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "BIC %1, %1, %2, lsl #8" 2723fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo : "+r"(x10), "+r"(x11) 273ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo : "r"(x12) 274ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo ); 27529a84457aed4c45bc900998b5e11c03023264208James Dong#endif 27629a84457aed4c45bc900998b5e11c03023264208James Dong 27729a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((int32*)(blk + 8)); 27829a84457aed4c45bc900998b5e11c03023264208James Dong 27929a84457aed4c45bc900998b5e11c03023264208James Dong /* process x11 & x14 */ 28029a84457aed4c45bc900998b5e11c03023264208James Dong x11 = sad_4pixelN(x11, x14, x9); 28129a84457aed4c45bc900998b5e11c03023264208James Dong 28229a84457aed4c45bc900998b5e11c03023264208James Dong /* process x12 & x10 */ 28329a84457aed4c45bc900998b5e11c03023264208James Dong x10 = sad_4pixelN(x10, x12, x9); 28429a84457aed4c45bc900998b5e11c03023264208James Dong 28529a84457aed4c45bc900998b5e11c03023264208James Dong sum_accumulate; 28629a84457aed4c45bc900998b5e11c03023264208James Dong 28729a84457aed4c45bc900998b5e11c03023264208James Dong /****** process 8 pixels ******/ 28829a84457aed4c45bc900998b5e11c03023264208James Dong x11 = *((int32*)(ref + 4)); 28929a84457aed4c45bc900998b5e11c03023264208James Dong x12 = *((int32*)(ref + 8)); 29029a84457aed4c45bc900998b5e11c03023264208James Dong x10 = *((int32*)ref); ref += lx; 29129a84457aed4c45bc900998b5e11c03023264208James Dong x14 = *((int32*)(blk + 4)); 29229a84457aed4c45bc900998b5e11c03023264208James Dong 29329a84457aed4c45bc900998b5e11c03023264208James Dong#if (SHIFT==8) 294ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo __asm__ volatile( 295ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "MVN %0, %0, lsr #8\n\t" 296ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "BIC %0, %0, %1, lsl #24\n\t" 297ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "MVN %1, %1, lsr #8\n\t" 298ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "BIC %1, %1, %2, lsl #24" 2993fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo : "+r"(x10), "+r"(x11) 300ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo : "r"(x12) 301ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo ); 30229a84457aed4c45bc900998b5e11c03023264208James Dong#elif (SHIFT==16) 303ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo __asm__ volatile( 304ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "MVN %0, %0, lsr #16\n\t" 305ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "BIC %0, %0, %1, lsl #16\n\t" 306ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "MVN %1, %1, lsr #16\n\t" 307ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "BIC %1, %1, %2, lsl #16" 3083fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo : "+r"(x10), "+r"(x11) 309ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo : "r"(x12) 310ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo ); 31129a84457aed4c45bc900998b5e11c03023264208James Dong#elif (SHIFT==24) 312ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo __asm__ volatile( 313ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "MVN %0, %0, lsr #24\n\t" 314ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "BIC %0, %0, %1, lsl #8\n\t" 315ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "MVN %1, %1, lsr #24\n\t" 316ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo "BIC %1, %1, %2, lsl #8" 3173fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo : "+r"(x10), "+r"(x11) 318ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo : "r"(x12) 319ccde1257952d2c073e51ecba6180060570ffa41fMartin Storsjo ); 32029a84457aed4c45bc900998b5e11c03023264208James Dong#endif 3213fdb405597f0e062a9bb8af20199c5e67f0f764cMartin Storsjo __asm__ volatile("LDR %0, [%1], #16": "=&r"(x12), "+r"(blk)); 32229a84457aed4c45bc900998b5e11c03023264208James Dong 32329a84457aed4c45bc900998b5e11c03023264208James Dong /* process x11 & x14 */ 32429a84457aed4c45bc900998b5e11c03023264208James Dong x11 = sad_4pixelN(x11, x14, x9); 32529a84457aed4c45bc900998b5e11c03023264208James Dong 32629a84457aed4c45bc900998b5e11c03023264208James Dong /* process x12 & x10 */ 32729a84457aed4c45bc900998b5e11c03023264208James Dong x10 = sad_4pixelN(x10, x12, x9); 32829a84457aed4c45bc900998b5e11c03023264208James Dong 32929a84457aed4c45bc900998b5e11c03023264208James Dong sum_accumulate; 33029a84457aed4c45bc900998b5e11c03023264208James Dong 33129a84457aed4c45bc900998b5e11c03023264208James Dong /****************/ 33229a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x5 - (x4 << 8); /* extract low bytes */ 33329a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 + x4; /* add with high bytes */ 33429a84457aed4c45bc900998b5e11c03023264208James Dong x10 = x10 + (x10 << 16); /* add with lower half word */ 33529a84457aed4c45bc900998b5e11c03023264208James Dong 33629a84457aed4c45bc900998b5e11c03023264208James Dong if (((uint32)x10 >> 16) <= (uint32)dmin) /* compare with dmin */ 33729a84457aed4c45bc900998b5e11c03023264208James Dong { 33829a84457aed4c45bc900998b5e11c03023264208James Dong if (--x8) 33929a84457aed4c45bc900998b5e11c03023264208James Dong { 34029a84457aed4c45bc900998b5e11c03023264208James Dong#if (NUMBER==3) 34129a84457aed4c45bc900998b5e11c03023264208James Dong goto LOOP_SAD3; 34229a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==2) 343955585cca11173b07e2e7db3d636ee97b69b053bMartin Storsjo goto LOOP_SAD2; 34429a84457aed4c45bc900998b5e11c03023264208James Dong#elif (NUMBER==1) 345955585cca11173b07e2e7db3d636ee97b69b053bMartin Storsjo goto LOOP_SAD1; 34629a84457aed4c45bc900998b5e11c03023264208James Dong#endif 34729a84457aed4c45bc900998b5e11c03023264208James Dong } 34829a84457aed4c45bc900998b5e11c03023264208James Dong 34929a84457aed4c45bc900998b5e11c03023264208James Dong } 35029a84457aed4c45bc900998b5e11c03023264208James Dong 35129a84457aed4c45bc900998b5e11c03023264208James Dong return ((uint32)x10 >> 16); 35229a84457aed4c45bc900998b5e11c03023264208James Dong} 35329a84457aed4c45bc900998b5e11c03023264208James Dong 35429a84457aed4c45bc900998b5e11c03023264208James Dong#endif 35529a84457aed4c45bc900998b5e11c03023264208James Dong 356