18bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/* 28bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * Copyright (C) 2011 The Android Open Source Project 38bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * 48bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * Licensed under the Apache License, Version 2.0 (the "License"); 58bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * you may not use this file except in compliance with the License. 68bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * You may obtain a copy of the License at 78bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * 88bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * http://www.apache.org/licenses/LICENSE-2.0 98bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * 108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * Unless required by applicable law or agreed to in writing, software 118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * distributed under the License is distributed on an "AS IS" BASIS, 128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * See the License for the specific language governing permissions and 148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * limitations under the License. 158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling */ 168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*$Id: db_feature_detection.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/ 188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/***************************************************************** 208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling* Lean and mean begins here * 218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling*****************************************************************/ 228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#include "db_utilities.h" 248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#include "db_feature_detection.h" 258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef _VERBOSE_ 268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#include <iostream> 278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif 288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#include <float.h> 298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#define DB_SUB_PIXEL 318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#define BORDER 10 // 5 338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingfloat** db_AllocStrengthImage_f(float **im,int w,int h) 358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int i,n,aw; 378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling long c,size; 388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float **img,*aim,*p; 398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Determine number of 124 element chunks needed*/ 418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling n=(db_maxi(1,w-6)+123)/124; 428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Determine the total allocation width aw*/ 438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling aw=n*124+8; 448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Allocate*/ 458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling size=aw*h+16; 468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling *im=new float [size]; 478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Clean up*/ 488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling p=(*im); 498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(c=0;c<size;c++) p[c]=0.0; 508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Get a 16 byte aligned pointer*/ 518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling aim=db_AlignPointer_f(*im,16); 528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Allocate pointer table*/ 538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling img=new float* [h]; 548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Initialize the pointer table*/ 558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=0;i<h;i++) 568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling img[i]=aim+aw*i+1; 588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(img); 618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_FreeStrengthImage_f(float *im,float **img,int h) 648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling delete [] im; 668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling delete [] img; 678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width chunk_width 708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha HaeberlingMemory references occur one pixel outside the subrow*/ 718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_IxIyRow_f(float *Ix,float *Iy,const float * const *img,int i,int j,int chunk_width) 728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int c; 748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(c=0;c<chunk_width;c++) 768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ix[c]=img[i][j+c-1]-img[i][j+c+1]; 788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Iy[c]=img[i-1][j+c]-img[i+1][j+c]; 798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width 128 838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha HaeberlingMemory references occur one pixel outside the subrow*/ 848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_IxIyRow_u(int *dxx,const unsigned char * const *img,int i,int j,int nc) 858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_MMX 878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling const unsigned char *r1,*r2,*r3; 888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling r1=img[i-1]+j; r2=img[i]+j; r3=img[i+1]+j; 908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling _asm 928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov esi,16 948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov eax,r1 958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov ebx,r2 968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov ecx,r3 978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov edx,dxx 988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Get bitmask into mm7*/ 1008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov edi,7F7F7F7Fh 1018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movd mm7,edi 1028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpckldq mm7,mm7 1038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 1048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingloopstart: 1058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /***************dx part 1-12*********************************/ 1068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm0,[eax] /*1 Get upper*/ 1078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pxor mm6,mm6 /*2 Set to zero*/ 1088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm1,[ecx] /*3 Get lower*/ 1098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling psrlq mm0,1 /*4 Shift*/ 1108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling psrlq mm1,1 /*5 Shift*/ 1118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pand mm0,mm7 /*6 And*/ 1128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm2,[ebx-1] /*13 Get left*/ 1138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pand mm1,mm7 /*7 And*/ 1148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling psubb mm0,mm1 /*8 Subtract*/ 1158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pxor mm5,mm5 /*14 Set to zero*/ 1168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm1,mm0 /*9 Copy*/ 1178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pcmpgtb mm6,mm0 /*10 Create unpack mask*/ 1188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm3,[ebx+1] /*15 Get right*/ 1198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpcklbw mm0,mm6 /*11 Unpack low*/ 1208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpckhbw mm1,mm6 /*12 Unpack high*/ 1218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /***************dy part 13-24*********************************/ 1228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm4,mm0 /*25 Copy dx*/ 1238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling psrlq mm2,1 /*16 Shift*/ 1248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pmullw mm0,mm0 /*26 Multiply dx*dx*/ 1258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling psrlq mm3,1 /*17 Shift*/ 1268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pand mm2,mm7 /*18 And*/ 1278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pand mm3,mm7 /*19 And*/ 1288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 1298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling psubb mm2,mm3 /*20 Subtract*/ 1308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 1318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm3,mm2 /*21 Copy*/ 1328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pcmpgtb mm5,mm2 /*22 Create unpack mask*/ 1338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpcklbw mm2,mm5 /*23 Unpack low*/ 1348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 1358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpckhbw mm3,mm5 /*24 Unpack high*/ 1368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /***************dxx dxy dyy low part 25-49*********************************/ 1378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pmullw mm4,mm2 /*27 Multiply dx*dy*/ 1388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pmullw mm2,mm2 /*28 Multiply dy*dy*/ 1398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pxor mm6,mm6 /*29 Set to zero*/ 1408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm5,mm0 /*30 Copy dx*dx*/ 1418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pcmpgtw mm6,mm0 /*31 Create unpack mask for dx*dx*/ 1428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpcklwd mm0,mm6 /*32 Unpack dx*dx lows*/ 1438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 1448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpckhwd mm5,mm6 /*33 Unpack dx*dx highs*/ 1458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pxor mm6,mm6 /*36 Set to zero*/ 1468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [edx],mm0 /*34 Store dx*dx lows*/ 1478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm0,mm4 /*37 Copy dx*dy*/ 1488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [edx+8],mm5 /*35 Store dx*dx highs*/ 1498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pcmpgtw mm6,mm4 /*38 Create unpack mask for dx*dy*/ 1508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpcklwd mm4,mm6 /*39 Unpack dx*dy lows*/ 1518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 1528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpckhwd mm0,mm6 /*40 Unpack dx*dy highs*/ 1538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pxor mm6,mm6 /*43 Set to zero*/ 1548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [edx+512],mm4 /*41 Store dx*dy lows*/ 1558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm5,mm2 /*44 Copy dy*dy*/ 1568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [edx+520],mm0 /*42 Store dx*dy highs*/ 1578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pcmpgtw mm6,mm2 /*45 Create unpack mask for dy*dy*/ 1588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpcklwd mm2,mm6 /*46 Unpack dy*dy lows*/ 1598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm4,mm1 /*50 Copy dx*/ 1608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpckhwd mm5,mm6 /*47 Unpack dy*dy highs*/ 1618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pmullw mm1,mm1 /*51 Multiply dx*dx*/ 1628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [edx+1024],mm2 /*48 Store dy*dy lows*/ 1638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pmullw mm4,mm3 /*52 Multiply dx*dy*/ 1648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [edx+1032],mm5 /*49 Store dy*dy highs*/ 1658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /***************dxx dxy dyy high part 50-79*********************************/ 1668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pmullw mm3,mm3 /*53 Multiply dy*dy*/ 1678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pxor mm6,mm6 /*54 Set to zero*/ 1688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm5,mm1 /*55 Copy dx*dx*/ 1698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pcmpgtw mm6,mm1 /*56 Create unpack mask for dx*dx*/ 1708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pxor mm2,mm2 /*61 Set to zero*/ 1718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpcklwd mm1,mm6 /*57 Unpack dx*dx lows*/ 1728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm0,mm4 /*62 Copy dx*dy*/ 1738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpckhwd mm5,mm6 /*58 Unpack dx*dx highs*/ 1748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pcmpgtw mm2,mm4 /*63 Create unpack mask for dx*dy*/ 1758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [edx+16],mm1 /*59 Store dx*dx lows*/ 1768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpcklwd mm4,mm2 /*64 Unpack dx*dy lows*/ 1778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [edx+24],mm5 /*60 Store dx*dx highs*/ 1788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpckhwd mm0,mm2 /*65 Unpack dx*dy highs*/ 1798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [edx+528],mm4 /*66 Store dx*dy lows*/ 1808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pxor mm6,mm6 /*68 Set to zero*/ 1818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [edx+536],mm0 /*67 Store dx*dy highs*/ 1828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm5,mm3 /*69 Copy dy*dy*/ 1838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pcmpgtw mm6,mm3 /*70 Create unpack mask for dy*dy*/ 1848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add eax,8 /*75*/ 1858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpcklwd mm3,mm6 /*71 Unpack dy*dy lows*/ 1868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add ebx,8 /*76*/ 1878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling punpckhwd mm5,mm6 /*72 Unpack dy*dy highs*/ 1888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add ecx,8 /*77*/ 1898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [edx+1040],mm3 /*73 Store dy*dy lows*/ 1908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 1918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [edx+1048],mm5 /*74 Store dy*dy highs*/ 1928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 1938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add edx,32 /*78*/ 1948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dec esi /*79*/ 1958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling jnz loopstart 1968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 1978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling emms 1988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 1998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 2008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else 2018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int c; 2028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int Ix,Iy; 2038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 2048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(c=0;c<nc;c++) 2058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 2068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ix=(img[i][j+c-1]-img[i][j+c+1])>>1; 2078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Iy=(img[i-1][j+c]-img[i+1][j+c])>>1; 2088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dxx[c]=Ix*Ix; 2098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dxx[c+128]=Ix*Iy; 2108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dxx[c+256]=Iy*Iy; 2118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 2128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_MMX*/ 2138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 2148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 2158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Filter vertically five rows of derivatives of length chunk_width into gxx,gxy,gyy*/ 2168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_gxx_gxy_gyy_row_f(float *gxx,float *gxy,float *gyy,int chunk_width, 2178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *Ix0,float *Ix1,float *Ix2,float *Ix3,float *Ix4, 2188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *Iy0,float *Iy1,float *Iy2,float *Iy3,float *Iy4) 2198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 2208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int c; 2218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float dx,dy; 2228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float Ixx0,Ixy0,Iyy0,Ixx1,Ixy1,Iyy1,Ixx2,Ixy2,Iyy2,Ixx3,Ixy3,Iyy3,Ixx4,Ixy4,Iyy4; 2238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 2248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(c=0;c<chunk_width;c++) 2258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 2268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dx=Ix0[c]; 2278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dy=Iy0[c]; 2288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ixx0=dx*dx; 2298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ixy0=dx*dy; 2308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Iyy0=dy*dy; 2318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 2328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dx=Ix1[c]; 2338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dy=Iy1[c]; 2348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ixx1=dx*dx; 2358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ixy1=dx*dy; 2368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Iyy1=dy*dy; 2378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 2388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dx=Ix2[c]; 2398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dy=Iy2[c]; 2408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ixx2=dx*dx; 2418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ixy2=dx*dy; 2428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Iyy2=dy*dy; 2438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 2448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dx=Ix3[c]; 2458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dy=Iy3[c]; 2468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ixx3=dx*dx; 2478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ixy3=dx*dy; 2488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Iyy3=dy*dy; 2498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 2508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dx=Ix4[c]; 2518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dy=Iy4[c]; 2528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ixx4=dx*dx; 2538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ixy4=dx*dy; 2548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Iyy4=dy*dy; 2558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 2568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Filter vertically*/ 2578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling gxx[c]=Ixx0+Ixx1*4.0f+Ixx2*6.0f+Ixx3*4.0f+Ixx4; 2588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling gxy[c]=Ixy0+Ixy1*4.0f+Ixy2*6.0f+Ixy3*4.0f+Ixy4; 2598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling gyy[c]=Iyy0+Iyy1*4.0f+Iyy2*6.0f+Iyy3*4.0f+Iyy4; 2608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 2618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 2628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 2638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Filter vertically five rows of derivatives of length 128 into gxx,gxy,gyy*/ 2648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_gxx_gxy_gyy_row_s(int *g,int *d0,int *d1,int *d2,int *d3,int *d4,int nc) 2658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 2668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_MMX 2678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int c; 2688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 2698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling _asm 2708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 2718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov c,64 2728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov eax,d0 2738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov ebx,d1 2748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov ecx,d2 2758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov edx,d3 2768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov edi,d4 2778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov esi,g 2788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 2798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingloopstart: 2808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /***************dxx part 1-14*********************************/ 2818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm0,[eax] /*1 Get dxx0*/ 2828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 2838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm1,[ebx] /*2 Get dxx1*/ 2848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 2858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm2,[ecx] /*5 Get dxx2*/ 2868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm1,2 /*3 Shift dxx1*/ 2878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm3,[edx] /*10 Get dxx3*/ 2888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm1 /*4 Accumulate dxx1*/ 2898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm4,[eax+512] /*15 Get dxy0*/ 2908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm2,1 /*6 Shift dxx2 1*/ 2918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm2 /*7 Accumulate dxx2 1*/ 2928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm2,1 /*8 Shift dxx2 2*/ 2938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm5,[ebx+512] /*16 Get dxy1*/ 2948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm2 /*9 Accumulate dxx2 2*/ 2958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm3,2 /*11 Shift dxx3*/ 2968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 2978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm3 /*12 Accumulate dxx3*/ 2988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm5,2 /*17 Shift dxy1*/ 2998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,[edi] /*13 Accumulate dxx4*/ 3008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm4,mm5 /*18 Accumulate dxy1*/ 3018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm6,[ecx+512] /*19 Get dxy2*/ 3028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 3038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [esi],mm0 /*14 Store dxx sums*/ 3048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /***************dxy part 15-28*********************************/ 3058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm6,1 /*20 Shift dxy2 1*/ 3068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm4,mm6 /*21 Accumulate dxy2 1*/ 3078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm6,1 /*22 Shift dxy2 2*/ 3088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm0,[eax+1024] /*29 Get dyy0*/ 3098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm4,mm6 /*23 Accumulate dxy2 2*/ 3108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm7,[edx+512] /*24 Get dxy3*/ 3118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm7,2 /*25 Shift dxy3*/ 3128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm1,[ebx+1024] /*30 Get dyy1*/ 3138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm4,mm7 /*26 Accumulate dxy3*/ 3148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm4,[edi+512] /*27 Accumulate dxy4*/ 3158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm1,2 /*31 Shift dyy1*/ 3168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm2,[ecx+1024] /*33 Get dyy2*/ 3178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm1 /*32 Accumulate dyy1*/ 3188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [esi+512],mm4 /*28 Store dxy sums*/ 3198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm2,1 /*34 Shift dyy2 1*/ 3208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /***************dyy part 29-49*********************************/ 3218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm3,[edx+1024] /*38 Get dyy3*/ 3248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm2 /*35 Accumulate dyy2 1*/ 3258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,[edi+1024] /*41 Accumulate dyy4*/ 3268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm2,1 /*36 Shift dyy2 2*/ 3278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm2 /*37 Accumulate dyy2 2*/ 3288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm3,2 /*39 Shift dyy3*/ 3298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm3 /*40 Accumulate dyy3*/ 3308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add eax,8 /*43*/ 3318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add ebx,8 /*44*/ 3328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add ecx,8 /*45*/ 3338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [esi+1024],mm0 /*42 Store dyy sums*/ 3348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 3358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add edx,8 /*46*/ 3368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add edi,8 /*47*/ 3378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add esi,8 /*48*/ 3388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dec c /*49*/ 3398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling jnz loopstart 3408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling emms 3428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 3438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else 3458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int c,dd; 3468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(c=0;c<nc;c++) 3488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 3498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Filter vertically*/ 3508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dd=d2[c]; 3518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling g[c]=d0[c]+(d1[c]<<2)+(dd<<2)+(dd<<1)+(d3[c]<<2)+d4[c]; 3528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dd=d2[c+128]; 3548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling g[c+128]=d0[c+128]+(d1[c+128]<<2)+(dd<<2)+(dd<<1)+(d3[c+128]<<2)+d4[c+128]; 3558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dd=d2[c+256]; 3578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling g[c+256]=d0[c+256]+(d1[c+256]<<2)+(dd<<2)+(dd<<1)+(d3[c+256]<<2)+d4[c+256]; 3588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 3598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_MMX*/ 3608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 3618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Filter horizontally the three rows gxx,gxy,gyy into the strength subrow starting at i,j 3638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingand with width chunk_width. gxx,gxy and gyy are assumed to be four pixels wider than chunk_width 3648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingand starting at (i,j-2)*/ 3658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_HarrisStrength_row_f(float **s,float *gxx,float *gxy,float *gyy,int i,int j,int chunk_width) 3668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 3678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float Gxx,Gxy,Gyy,det,trc; 3688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int c; 3698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(c=0;c<chunk_width;c++) 3718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 3728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Gxx=gxx[c]+gxx[c+1]*4.0f+gxx[c+2]*6.0f+gxx[c+3]*4.0f+gxx[c+4]; 3738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Gxy=gxy[c]+gxy[c+1]*4.0f+gxy[c+2]*6.0f+gxy[c+3]*4.0f+gxy[c+4]; 3748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Gyy=gyy[c]+gyy[c+1]*4.0f+gyy[c+2]*6.0f+gyy[c+3]*4.0f+gyy[c+4]; 3758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling det=Gxx*Gyy-Gxy*Gxy; 3778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling trc=Gxx+Gyy; 3788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling s[i][j+c]=det-0.06f*trc*trc; 3798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 3808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 3818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Filter g of length 128 in place with 14641. Output is shifted two steps 3838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingand of length 124*/ 3848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_Filter14641_128_i(int *g,int nc) 3858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 3868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_MMX 3878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int mask; 3888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mask=0xFFFFFFFF; 3908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling _asm 3918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 3928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov esi,31 3938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov eax,g 3948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Get bitmask 00000000FFFFFFFF into mm7*/ 3968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movd mm7,mask 3978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 3988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Warming iteration one 1-16********************/ 3998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm6,[eax] /*1 Load new data*/ 4008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm6 /*2 Add 1* behind two steps*/ 4018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm2,mm6 /*3 Start with 1* in front two steps*/ 4028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm6,1 /*4*/ 4038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm1,mm6 /*5 Add 2* same place*/ 4048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm6,1 /*6*/ 4058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm1,mm6 /*7 Add 4* same place*/ 4068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pshufw mm6,mm6,4Eh /*8 Swap the two double-words using bitmask 01001110=4Eh*/ 4078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm1,mm6 /*9 Add 4* swapped*/ 4088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm5,mm6 /*10 Copy*/ 4098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pand mm6,mm7 /*11 Get low double-word only*/ 4108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm2,mm6 /*12 Add 4* in front one step*/ 4118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pxor mm6,mm5 /*13 Get high double-word only*/ 4128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm6 /*14 Add 4* behind one step*/ 4138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm0,mm1 /*15 Shift along*/ 4148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm1,mm2 /*16 Shift along*/ 4158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Warming iteration two 17-32********************/ 4168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm4,[eax+8] /*17 Load new data*/ 4178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm4 /*18 Add 1* behind two steps*/ 4188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm2,mm4 /*19 Start with 1* in front two steps*/ 4198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm4,1 /*20*/ 4208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm1,mm4 /*21 Add 2* same place*/ 4218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm4,1 /*22*/ 4228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm1,mm4 /*23 Add 4* same place*/ 4238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pshufw mm4,mm4,4Eh /*24 Swap the two double-words using bitmask 01001110=4Eh*/ 4248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm1,mm4 /*25 Add 4* swapped*/ 4258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm3,mm4 /*26 Copy*/ 4268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pand mm4,mm7 /*27 Get low double-word only*/ 4278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm2,mm4 /*28 Add 4* in front one step*/ 4288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pxor mm4,mm3 /*29 Get high double-word only*/ 4298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm4 /*30 Add 4* behind one step*/ 4308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm0,mm1 /*31 Shift along*/ 4318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm1,mm2 /*32 Shift along*/ 4328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 4338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Loop********************/ 4348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingloopstart: 4358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*First part of loop 33-47********/ 4368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm6,[eax+16] /*33 Load new data*/ 4378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 4388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm6 /*34 Add 1* behind two steps*/ 4398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm2,mm6 /*35 Start with 1* in front two steps*/ 4408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm4,[eax+24] /*48 Load new data*/ 4418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm6,1 /*36*/ 4428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm1,mm6 /*37 Add 2* same place*/ 4438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm6,1 /*38*/ 4448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm1,mm6 /*39 Add 4* same place*/ 4458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pshufw mm6,mm6,4Eh /*40 Swap the two double-words using bitmask 01001110=4Eh*/ 4468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm1,mm4 /*49 Add 1* behind two steps*/ 4478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm5,mm6 /*41 Copy*/ 4488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm1,mm6 /*42 Add 4* swapped*/ 4498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pand mm6,mm7 /*43 Get low double-word only*/ 4508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm2,mm6 /*44 Add 4* in front one step*/ 4518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pxor mm6,mm5 /*45 Get high double-word only*/ 4528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm6 /*46 Add 4* behind one step*/ 4538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm6,mm4 /*50a Copy*/ 4548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm4,1 /*51*/ 4558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 4568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [eax],mm0 /*47 Store result two steps behind*/ 4578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Second part of loop 48-66********/ 4588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm0,mm6 /*50b Start with 1* in front two steps*/ 4598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm2,mm4 /*52 Add 2* same place*/ 4608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pslld mm4,1 /*53*/ 4618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm2,mm4 /*54 Add 4* same place*/ 4628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pshufw mm4,mm4,4Eh /*55 Swap the two double-words using bitmask 01001110=4Eh*/ 4638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm2,mm4 /*56 Add 4* swapped*/ 4648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm3,mm4 /*57 Copy*/ 4658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pand mm4,mm7 /*58 Get low double-word only*/ 4668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 4678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm0,mm4 /*59 Add 4* in front one step*/ 4688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling pxor mm4,mm3 /*60 Get high double-word only*/ 4698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling paddd mm1,mm4 /*61 Add 4* behind one step*/ 4708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add eax,16 /*65*/ 4718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dec esi /*66*/ 4728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 4738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq [eax-8],mm1 /*62 Store result two steps behind*/ 4748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm1,mm0 /*63 Shift along*/ 4758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movq mm0,mm2 /*64 Shift along*/ 4768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling jnz loopstart 4778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 4788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling emms 4798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 4808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 4818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else 4828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int c; 4838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 4848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(c=0;c<nc-4;c++) 4858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 4868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling g[c]=g[c]+(g[c+1]<<2)+(g[c+2]<<2)+(g[c+2]<<1)+(g[c+3]<<2)+g[c+4]; 4878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 4888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_MMX*/ 4898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 4908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 4918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Filter horizontally the three rows gxx,gxy,gyy of length 128 into the strength subrow s 4928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingof length 124. gxx,gxy and gyy are assumed to be starting at (i,j-2) if s[i][j] is sought. 4938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlings should be 16 byte aligned*/ 4948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_HarrisStrength_row_s(float *s,int *gxx,int *gxy,int *gyy,int nc) 4958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 4968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float k; 4978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 4988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling k=0.06f; 4998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 5008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_Filter14641_128_i(gxx,nc); 5018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_Filter14641_128_i(gxy,nc); 5028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_Filter14641_128_i(gyy,nc); 5038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 5048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD 5058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 5068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 5078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling _asm 5088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 5098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov esi,15 5108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov eax,gxx 5118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov ebx,gxy 5128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov ecx,gyy 5138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov edx,s 5148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 5158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*broadcast k to all positions of xmm7*/ 5168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movss xmm7,k 5178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling shufps xmm7,xmm7,0 5188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 5198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*****Warm up 1-10**************************************/ 5208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm0,[eax+8] /*1 Convert two integers into floating point of low double-word*/ 5218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 5228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm1,[ebx+8] /*4 Convert two integers into floating point of low double-word*/ 5238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movlhps xmm0,xmm0 /*2 Move them to the high double-word*/ 5248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm2,[ecx+8] /*7 Convert two integers into floating point of low double-word*/ 5258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movlhps xmm1,xmm1 /*5 Move them to the high double-word*/ 5268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm0,[eax] /*3 Convert two integers into floating point of low double-word*/ 5278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movlhps xmm2,xmm2 /*8 Move them to the high double-word*/ 5288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm1,[ebx] /*6 Convert two integers into floating point of low double-word*/ 5298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm3,xmm0 /*10 Copy Cxx*/ 5308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm2,[ecx] /*9 Convert two integers into floating point of low double-word*/ 5318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 5328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingloopstart: 5338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*****First part of loop 11-18***********************/ 5348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mulps xmm0,xmm2 /*11 Multiply to get Gxx*Gyy*/ 5358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling addps xmm2,xmm3 /*12 Add to get Gxx+Gyy*/ 5368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm4,[eax+24] /*19 Convert two integers into floating point of low double-word*/ 5378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mulps xmm1,xmm1 /*13 Multiply to get Gxy*Gxy*/ 5388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mulps xmm2,xmm2 /*14 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/ 5398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movlhps xmm4,xmm4 /*20 Move them to the high double-word*/ 5408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm4,[eax+16] /*21 Convert two integers into floating point of low double-word*/ 5418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 5428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling subps xmm0,xmm1 /*15 Subtract to get Gxx*Gyy-Gxy*Gxy*/ 5438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mulps xmm2,xmm7 /*16 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/ 5448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm5,[ebx+24] /*22 Convert two integers into floating point of low double-word*/ 5458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 5468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movlhps xmm5,xmm5 /*23 Move them to the high double-word*/ 5478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 5488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm5,[ebx+16] /*24 Convert two integers into floating point of low double-word*/ 5498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling subps xmm0,xmm2 /*17 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/ 5508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm6,[ecx+24] /*25 Convert two integers into floating point of low double-word*/ 5518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 5528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [edx],xmm0 /*18 Store*/ 5538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*****Second part of loop 26-40***********************/ 5548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movlhps xmm6,xmm6 /*26 Move them to the high double-word*/ 5558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm6,[ecx+16] /*27 Convert two integers into floating point of low double-word*/ 5568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm3,xmm4 /*28 Copy Cxx*/ 5578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mulps xmm4,xmm6 /*29 Multiply to get Gxx*Gyy*/ 5588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling addps xmm6,xmm3 /*30 Add to get Gxx+Gyy*/ 5598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm0,[eax+40] /*(1 Next) Convert two integers into floating point of low double-word*/ 5608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mulps xmm5,xmm5 /*31 Multiply to get Gxy*Gxy*/ 5618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm1,[ebx+40] /*(4 Next) Convert two integers into floating point of low double-word*/ 5628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mulps xmm6,xmm6 /*32 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/ 5638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm2,[ecx+40] /*(7 Next) Convert two integers into floating point of low double-word*/ 5648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movlhps xmm0,xmm0 /*(2 Next) Move them to the high double-word*/ 5658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling subps xmm4,xmm5 /*33 Subtract to get Gxx*Gyy-Gxy*Gxy*/ 5668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movlhps xmm1,xmm1 /*(5 Next) Move them to the high double-word*/ 5678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm0,[eax+32] /*(3 Next)Convert two integers into floating point of low double-word*/ 5688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mulps xmm6,xmm7 /*34 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/ 5698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm1,[ebx+32] /*(6 Next) Convert two integers into floating point of low double-word*/ 5708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movlhps xmm2,xmm2 /*(8 Next) Move them to the high double-word*/ 5718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm3,xmm0 /*(10 Next) Copy Cxx*/ 5728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add eax,32 /*37*/ 5738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling subps xmm4,xmm6 /*35 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/ 5748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add ebx,32 /*38*/ 5758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cvtpi2ps xmm2,[ecx+32] /*(9 Next) Convert two integers into floating point of low double-word*/ 5768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 5778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [edx+16],xmm4 /*36 Store*/ 5788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Stall*/ 5798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add ecx,32 /*39*/ 5808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling add edx,32 /*40*/ 5818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling dec esi /*41*/ 5828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling jnz loopstart 5838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 5848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /****Cool down***************/ 5858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mulps xmm0,xmm2 /*Multiply to get Gxx*Gyy*/ 5868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling addps xmm2,xmm3 /*Add to get Gxx+Gyy*/ 5878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mulps xmm1,xmm1 /*Multiply to get Gxy*Gxy*/ 5888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mulps xmm2,xmm2 /*Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/ 5898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling subps xmm0,xmm1 /*Subtract to get Gxx*Gyy-Gxy*Gxy*/ 5908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mulps xmm2,xmm7 /*Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/ 5918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling subps xmm0,xmm2 /*Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/ 5928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [edx],xmm0 /*Store*/ 5938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 5948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 5958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else 5968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float Gxx,Gxy,Gyy,det,trc; 5978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int c; 5988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 5998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling //for(c=0;c<124;c++) 6008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(c=0;c<nc-4;c++) 6018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 6028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Gxx=(float)gxx[c]; 6038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Gxy=(float)gxy[c]; 6048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Gyy=(float)gyy[c]; 6058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling det=Gxx*Gyy-Gxy*Gxy; 6078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling trc=Gxx+Gyy; 6088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling s[c]=det-k*trc*trc; 6098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 6108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/ 6118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 6128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute the Harris corner strength of the chunk [left,top,right,bottom] of img and 6148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingstore it into the corresponding region of s. left and top have to be at least 3 and 6158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingright and bottom have to be at most width-4,height-4*/ 6168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_HarrisStrengthChunk_f(float **s,const float * const *img,int left,int top,int right,int bottom, 6178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*temp should point to at least 6188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 13*(right-left+5) of allocated memory*/ 6198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *temp) 6208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 6218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *Ix[5],*Iy[5]; 6228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *gxx,*gxy,*gyy; 6238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int i,chunk_width,chunk_width_p4; 6248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling chunk_width=right-left+1; 6268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling chunk_width_p4=chunk_width+4; 6278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling gxx=temp; 6288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling gxy=gxx+chunk_width_p4; 6298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling gyy=gxy+chunk_width_p4; 6308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=0;i<5;i++) 6318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 6328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ix[i]=gyy+chunk_width_p4+(2*i*chunk_width_p4); 6338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Iy[i]=Ix[i]+chunk_width_p4; 6348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 6358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Fill four rows of the wrap-around derivative buffers*/ 6378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=top-2;i<top+2;i++) db_IxIyRow_f(Ix[i%5],Iy[i%5],img,i,left-2,chunk_width_p4); 6388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*For each output row*/ 6408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=top;i<=bottom;i++) 6418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 6428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Step the derivative buffers*/ 6438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_IxIyRow_f(Ix[(i+2)%5],Iy[(i+2)%5],img,(i+2),left-2,chunk_width_p4); 6448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/ 6468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_gxx_gxy_gyy_row_f(gxx,gxy,gyy,chunk_width_p4, 6478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ix[(i-2)%5],Ix[(i-1)%5],Ix[i%5],Ix[(i+1)%5],Ix[(i+2)%5], 6488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Iy[(i-2)%5],Iy[(i-1)%5],Iy[i%5],Iy[(i+1)%5],Iy[(i+2)%5]); 6498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Filter gxx,gxy,gyy horizontally and compute corner response s*/ 6518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_HarrisStrength_row_f(s,gxx,gxy,gyy,i,left,chunk_width); 6528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 6538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 6548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute the Harris corner strength of the chunk [left,top,left+123,bottom] of img and 6568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingstore it into the corresponding region of s. left and top have to be at least 3 and 6578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingright and bottom have to be at most width-4,height-4. The left of the region in s should 6588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingbe 16 byte aligned*/ 6598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_HarrisStrengthChunk_u(float **s,const unsigned char * const *img,int left,int top,int bottom, 6608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*temp should point to at least 6618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 18*128 of allocated memory*/ 6628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int *temp, int nc) 6638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 6648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int *Ixx[5],*Ixy[5],*Iyy[5]; 6658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int *gxx,*gxy,*gyy; 6668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int i; 6678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling gxx=temp; 6698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling gxy=gxx+128; 6708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling gyy=gxy+128; 6718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=0;i<5;i++) 6728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 6738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ixx[i]=gyy+(3*i+1)*128; 6748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Ixy[i]=gyy+(3*i+2)*128; 6758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Iyy[i]=gyy+(3*i+3)*128; 6768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 6778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Fill four rows of the wrap-around derivative buffers*/ 6798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=top-2;i<top+2;i++) db_IxIyRow_u(Ixx[i%5],img,i,left-2,nc); 6808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*For each output row*/ 6828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=top;i<=bottom;i++) 6838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 6848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Step the derivative buffers*/ 6858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_IxIyRow_u(Ixx[(i+2)%5],img,(i+2),left-2,nc); 6868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/ 6888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_gxx_gxy_gyy_row_s(gxx,Ixx[(i-2)%5],Ixx[(i-1)%5],Ixx[i%5],Ixx[(i+1)%5],Ixx[(i+2)%5],nc); 6898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Filter gxx,gxy,gyy horizontally and compute corner response s*/ 6918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_HarrisStrength_row_s(s[i]+left,gxx,gxy,gyy,nc); 6928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 6938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 6958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute Harris corner strength of img. Strength is returned for the region 6978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingwith (3,3) as upper left and (w-4,h-4) as lower right, positioned in the 6988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingsame place in s. In other words,image should be at least 7 pixels wide and 7 pixels high 6998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingfor a meaningful result*/ 7008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_HarrisStrength_f(float **s,const float * const *img,int w,int h, 7018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*temp should point to at least 7028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 13*(chunk_width+4) of allocated memory*/ 7038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *temp, 7048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int chunk_width) 7058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 7068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int x,next_x,last,right; 7078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 7088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling last=w-4; 7098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(x=3;x<=last;x=next_x) 7108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 7118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling next_x=x+chunk_width; 7128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling right=next_x-1; 7138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(right>last) right=last; 7148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Compute the Harris strength of a chunk*/ 7158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_HarrisStrengthChunk_f(s,img,x,3,right,h-4,temp); 7168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 7178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 7188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 7198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute Harris corner strength of img. Strength is returned for the region 7208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingwith (3,3) as upper left and (w-4,h-4) as lower right, positioned in the 7218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingsame place in s. In other words,image should be at least 7 pixels wide and 7 pixels high 7228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingfor a meaningful result.Moreover, the image should be overallocated by 256 bytes. 7238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlings[i][3] should by 16 byte aligned for any i*/ 7248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_HarrisStrength_u(float **s, const unsigned char * const *img,int w,int h, 7258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*temp should point to at least 7268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 18*128 of allocated memory*/ 7278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int *temp) 7288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 7298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int x,next_x,last; 7308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int nc; 7318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 7328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling last=w-4; 7338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(x=3;x<=last;x=next_x) 7348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 7358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling next_x=x+124; 7368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 7378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling // mayban: to revert to the original full chunks state, change the line below to: nc = 128; 7388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling nc = db_mini(128,last-x+1); 7398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling //nc = 128; 7408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 7418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Compute the Harris strength of a chunk*/ 7428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_HarrisStrengthChunk_u(s,img,x,3,h-4,temp,nc); 7438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 7448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 7458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 7468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline float db_Max_128Aligned16_f(float *v) 7478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 7488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD 7498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float back; 7508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 7518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling _asm 7528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 7538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov eax,v 7548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 7558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk1*/ 7568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm0,[eax] 7578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm1,[eax+16] 7588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm2,[eax+32] 7598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm3,[eax+48] 7608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm4,[eax+64] 7618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm5,[eax+80] 7628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm6,[eax+96] 7638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm7,[eax+112] 7648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 7658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk2*/ 7668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,[eax+128] 7678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm1,[eax+144] 7688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,[eax+160] 7698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm3,[eax+176] 7708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,[eax+192] 7718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm5,[eax+208] 7728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,[eax+224] 7738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm7,[eax+240] 7748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 7758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk3*/ 7768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,[eax+256] 7778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm1,[eax+272] 7788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,[eax+288] 7798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm3,[eax+304] 7808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,[eax+320] 7818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm5,[eax+336] 7828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,[eax+352] 7838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm7,[eax+368] 7848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 7858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk4*/ 7868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,[eax+384] 7878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm1,[eax+400] 7888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,[eax+416] 7898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm3,[eax+432] 7908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,[eax+448] 7918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm5,[eax+464] 7928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,[eax+480] 7938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm7,[eax+496] 7948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 7958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Collect*/ 7968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 7978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,xmm3 7988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,xmm5 7998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,xmm7 8008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm2 8018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,xmm6 8028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm4 8038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movhlps xmm1,xmm0 8048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 8058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling shufps xmm1,xmm0,1 8068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 8078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movss back,xmm0 8088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 8098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 8108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(back); 8118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else 8128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float val,max_val; 8138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *p,*stop_p; 8148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling max_val=v[0]; 8158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(p=v+1,stop_p=v+128;p!=stop_p;) 8168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 8178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling val= *p++; 8188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(val>max_val) max_val=val; 8198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 8208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(max_val); 8218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/ 8228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 8238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 8248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline float db_Max_64Aligned16_f(float *v) 8258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 8268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD 8278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float back; 8288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 8298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling _asm 8308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 8318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov eax,v 8328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 8338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk1*/ 8348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm0,[eax] 8358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm1,[eax+16] 8368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm2,[eax+32] 8378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm3,[eax+48] 8388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm4,[eax+64] 8398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm5,[eax+80] 8408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm6,[eax+96] 8418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm7,[eax+112] 8428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 8438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk2*/ 8448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,[eax+128] 8458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm1,[eax+144] 8468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,[eax+160] 8478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm3,[eax+176] 8488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,[eax+192] 8498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm5,[eax+208] 8508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,[eax+224] 8518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm7,[eax+240] 8528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 8538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Collect*/ 8548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 8558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,xmm3 8568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,xmm5 8578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,xmm7 8588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm2 8598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,xmm6 8608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm4 8618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movhlps xmm1,xmm0 8628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 8638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling shufps xmm1,xmm0,1 8648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 8658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movss back,xmm0 8668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 8678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 8688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(back); 8698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else 8708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float val,max_val; 8718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *p,*stop_p; 8728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling max_val=v[0]; 8738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(p=v+1,stop_p=v+64;p!=stop_p;) 8748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 8758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling val= *p++; 8768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(val>max_val) max_val=val; 8778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 8788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(max_val); 8798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/ 8808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 8818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 8828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline float db_Max_32Aligned16_f(float *v) 8838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 8848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD 8858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float back; 8868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 8878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling _asm 8888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 8898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov eax,v 8908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 8918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk1*/ 8928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm0,[eax] 8938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm1,[eax+16] 8948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm2,[eax+32] 8958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm3,[eax+48] 8968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm4,[eax+64] 8978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm5,[eax+80] 8988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm6,[eax+96] 8998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm7,[eax+112] 9008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 9018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Collect*/ 9028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 9038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,xmm3 9048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,xmm5 9058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,xmm7 9068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm2 9078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,xmm6 9088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm4 9098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movhlps xmm1,xmm0 9108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 9118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling shufps xmm1,xmm0,1 9128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 9138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movss back,xmm0 9148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 9158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 9168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(back); 9178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else 9188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float val,max_val; 9198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *p,*stop_p; 9208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling max_val=v[0]; 9218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(p=v+1,stop_p=v+32;p!=stop_p;) 9228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 9238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling val= *p++; 9248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(val>max_val) max_val=val; 9258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 9268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(max_val); 9278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/ 9288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 9298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 9308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline float db_Max_16Aligned16_f(float *v) 9318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 9328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD 9338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float back; 9348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 9358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling _asm 9368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 9378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov eax,v 9388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 9398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk1*/ 9408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm0,[eax] 9418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm1,[eax+16] 9428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm2,[eax+32] 9438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm3,[eax+48] 9448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 9458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Collect*/ 9468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 9478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,xmm3 9488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm2 9498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movhlps xmm1,xmm0 9508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 9518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling shufps xmm1,xmm0,1 9528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 9538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movss back,xmm0 9548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 9558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 9568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(back); 9578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else 9588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float val,max_val; 9598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *p,*stop_p; 9608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling max_val=v[0]; 9618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(p=v+1,stop_p=v+16;p!=stop_p;) 9628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 9638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling val= *p++; 9648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(val>max_val) max_val=val; 9658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 9668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(max_val); 9678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/ 9688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 9698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 9708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline float db_Max_8Aligned16_f(float *v) 9718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 9728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD 9738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float back; 9748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 9758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling _asm 9768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 9778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov eax,v 9788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 9798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk1*/ 9808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm0,[eax] 9818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm1,[eax+16] 9828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 9838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Collect*/ 9848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 9858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movhlps xmm1,xmm0 9868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 9878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling shufps xmm1,xmm0,1 9888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,xmm1 9898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movss back,xmm0 9908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 9918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 9928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(back); 9938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else 9948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float val,max_val; 9958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *p,*stop_p; 9968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling max_val=v[0]; 9978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(p=v+1,stop_p=v+8;p!=stop_p;) 9988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 9998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling val= *p++; 10008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(val>max_val) max_val=val; 10018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 10028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(max_val); 10038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/ 10048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 10058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 10068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline float db_Max_Aligned16_f(float *v,int size) 10078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 10088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float val,max_val; 10098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *stop_v; 10108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 10118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling max_val=v[0]; 10128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(;size>=128;size-=128) 10138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 10148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling val=db_Max_128Aligned16_f(v); 10158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling v+=128; 10168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(val>max_val) max_val=val; 10178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 10188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(size&64) 10198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 10208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling val=db_Max_64Aligned16_f(v); 10218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling v+=64; 10228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(val>max_val) max_val=val; 10238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 10248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(size&32) 10258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 10268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling val=db_Max_32Aligned16_f(v); 10278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling v+=32; 10288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(val>max_val) max_val=val; 10298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 10308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(size&16) 10318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 10328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling val=db_Max_16Aligned16_f(v); 10338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling v+=16; 10348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(val>max_val) max_val=val; 10358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 10368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(size&8) 10378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 10388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling val=db_Max_8Aligned16_f(v); 10398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling v+=8; 10408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(val>max_val) max_val=val; 10418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 10428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(size&7) 10438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 10448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(stop_v=v+(size&7);v!=stop_v;) 10458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 10468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling val= *v++; 10478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(val>max_val) max_val=val; 10488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 10498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 10508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 10518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(max_val); 10528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 10538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 10548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Find maximum value of img in the region starting at (left,top) 10558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingand with width w and height h. img[left] should be 16 byte aligned*/ 10568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingfloat db_MaxImage_Aligned16_f(float **img,int left,int top,int w,int h) 10578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 10588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float val,max_val; 10598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int i,stop_i; 10608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 10618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(w && h) 10628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 10638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling stop_i=top+h; 10648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling max_val=img[top][left]; 10658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 10668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=top;i<stop_i;i++) 10678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 10688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling val=db_Max_Aligned16_f(img[i]+left,w); 10698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(val>max_val) max_val=val; 10708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 10718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(max_val); 10728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 10738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(0.0); 10748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 10758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 10768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_MaxVector_128_Aligned16_f(float *m,float *v1,float *v2) 10778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 10788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD 10798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling _asm 10808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 10818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov eax,v1 10828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov ebx,v2 10838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov ecx,m 10848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 10858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk1*/ 10868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm0,[eax] 10878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm1,[eax+16] 10888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm2,[eax+32] 10898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm3,[eax+48] 10908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm4,[eax+64] 10918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm5,[eax+80] 10928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm6,[eax+96] 10938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm7,[eax+112] 10948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,[ebx] 10958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm1,[ebx+16] 10968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,[ebx+32] 10978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm3,[ebx+48] 10988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,[ebx+64] 10998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm5,[ebx+80] 11008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,[ebx+96] 11018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm7,[ebx+112] 11028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx],xmm0 11038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+16],xmm1 11048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+32],xmm2 11058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+48],xmm3 11068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+64],xmm4 11078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+80],xmm5 11088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+96],xmm6 11098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+112],xmm7 11108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 11118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk2*/ 11128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm0,[eax+128] 11138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm1,[eax+144] 11148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm2,[eax+160] 11158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm3,[eax+176] 11168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm4,[eax+192] 11178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm5,[eax+208] 11188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm6,[eax+224] 11198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm7,[eax+240] 11208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,[ebx+128] 11218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm1,[ebx+144] 11228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,[ebx+160] 11238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm3,[ebx+176] 11248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,[ebx+192] 11258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm5,[ebx+208] 11268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,[ebx+224] 11278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm7,[ebx+240] 11288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+128],xmm0 11298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+144],xmm1 11308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+160],xmm2 11318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+176],xmm3 11328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+192],xmm4 11338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+208],xmm5 11348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+224],xmm6 11358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+240],xmm7 11368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 11378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk3*/ 11388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm0,[eax+256] 11398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm1,[eax+272] 11408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm2,[eax+288] 11418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm3,[eax+304] 11428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm4,[eax+320] 11438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm5,[eax+336] 11448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm6,[eax+352] 11458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm7,[eax+368] 11468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,[ebx+256] 11478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm1,[ebx+272] 11488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,[ebx+288] 11498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm3,[ebx+304] 11508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,[ebx+320] 11518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm5,[ebx+336] 11528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,[ebx+352] 11538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm7,[ebx+368] 11548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+256],xmm0 11558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+272],xmm1 11568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+288],xmm2 11578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+304],xmm3 11588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+320],xmm4 11598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+336],xmm5 11608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+352],xmm6 11618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+368],xmm7 11628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 11638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk4*/ 11648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm0,[eax+384] 11658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm1,[eax+400] 11668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm2,[eax+416] 11678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm3,[eax+432] 11688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm4,[eax+448] 11698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm5,[eax+464] 11708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm6,[eax+480] 11718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps xmm7,[eax+496] 11728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,[ebx+384] 11738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm1,[ebx+400] 11748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,[ebx+416] 11758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm3,[ebx+432] 11768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,[ebx+448] 11778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm5,[ebx+464] 11788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,[ebx+480] 11798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm7,[ebx+496] 11808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+384],xmm0 11818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+400],xmm1 11828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+416],xmm2 11838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+432],xmm3 11848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+448],xmm4 11858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+464],xmm5 11868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+480],xmm6 11878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+496],xmm7 11888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 11898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else 11908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int i; 11918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float a,b; 11928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=0;i<128;i++) 11938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 11948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling a=v1[i]; 11958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling b=v2[i]; 11968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(a>=b) m[i]=a; 11978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling else m[i]=b; 11988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 11998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/ 12008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 12018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 12028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_MaxVector_128_SecondSourceDestAligned16_f(float *m,float *v1,float *v2) 12038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 12048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD 12058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling _asm 12068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 12078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov eax,v1 12088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov ebx,v2 12098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling mov ecx,m 12108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 12118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk1*/ 12128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm0,[eax] 12138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm1,[eax+16] 12148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm2,[eax+32] 12158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm3,[eax+48] 12168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm4,[eax+64] 12178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm5,[eax+80] 12188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm6,[eax+96] 12198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm7,[eax+112] 12208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,[ebx] 12218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm1,[ebx+16] 12228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,[ebx+32] 12238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm3,[ebx+48] 12248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,[ebx+64] 12258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm5,[ebx+80] 12268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,[ebx+96] 12278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm7,[ebx+112] 12288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx],xmm0 12298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+16],xmm1 12308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+32],xmm2 12318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+48],xmm3 12328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+64],xmm4 12338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+80],xmm5 12348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+96],xmm6 12358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+112],xmm7 12368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 12378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk2*/ 12388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm0,[eax+128] 12398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm1,[eax+144] 12408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm2,[eax+160] 12418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm3,[eax+176] 12428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm4,[eax+192] 12438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm5,[eax+208] 12448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm6,[eax+224] 12458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm7,[eax+240] 12468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,[ebx+128] 12478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm1,[ebx+144] 12488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,[ebx+160] 12498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm3,[ebx+176] 12508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,[ebx+192] 12518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm5,[ebx+208] 12528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,[ebx+224] 12538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm7,[ebx+240] 12548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+128],xmm0 12558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+144],xmm1 12568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+160],xmm2 12578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+176],xmm3 12588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+192],xmm4 12598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+208],xmm5 12608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+224],xmm6 12618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+240],xmm7 12628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 12638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk3*/ 12648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm0,[eax+256] 12658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm1,[eax+272] 12668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm2,[eax+288] 12678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm3,[eax+304] 12688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm4,[eax+320] 12698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm5,[eax+336] 12708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm6,[eax+352] 12718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm7,[eax+368] 12728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,[ebx+256] 12738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm1,[ebx+272] 12748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,[ebx+288] 12758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm3,[ebx+304] 12768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,[ebx+320] 12778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm5,[ebx+336] 12788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,[ebx+352] 12798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm7,[ebx+368] 12808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+256],xmm0 12818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+272],xmm1 12828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+288],xmm2 12838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+304],xmm3 12848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+320],xmm4 12858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+336],xmm5 12868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+352],xmm6 12878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+368],xmm7 12888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 12898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Chunk4*/ 12908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm0,[eax+384] 12918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm1,[eax+400] 12928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm2,[eax+416] 12938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm3,[eax+432] 12948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm4,[eax+448] 12958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm5,[eax+464] 12968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm6,[eax+480] 12978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movups xmm7,[eax+496] 12988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm0,[ebx+384] 12998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm1,[ebx+400] 13008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm2,[ebx+416] 13018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm3,[ebx+432] 13028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm4,[ebx+448] 13038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm5,[ebx+464] 13048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm6,[ebx+480] 13058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling maxps xmm7,[ebx+496] 13068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+384],xmm0 13078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+400],xmm1 13088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+416],xmm2 13098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+432],xmm3 13108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+448],xmm4 13118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+464],xmm5 13128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+480],xmm6 13138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling movaps [ecx+496],xmm7 13148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 13158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else 13168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int i; 13178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float a,b; 13188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=0;i<128;i++) 13198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 13208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling a=v1[i]; 13218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling b=v2[i]; 13228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(a>=b) m[i]=a; 13238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling else m[i]=b; 13248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 13258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/ 13268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 13278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 13288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top), of width 124 and 13298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingstopping at bottom. The output is shifted two steps left and overwrites 128 elements for each row. 13308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha HaeberlingThe input s should be of width at least 128, and exist for 2 pixels outside the specified region. 13318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlings[i][left-2] and sf[i][left-2] should be 16 byte aligned. Top must be at least 3*/ 13328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_MaxSuppressFilterChunk_5x5_Aligned16_f(float **sf,float **s,int left,int top,int bottom, 13338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*temp should point to at least 13348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6*132 floats of 16-byte-aligned allocated memory*/ 13358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *temp) 13368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 13378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD 13388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int i,lm2; 13398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *two[4]; 13408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *four,*five; 13418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 13428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling lm2=left-2; 13438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 13448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Set pointers to pre-allocated memory*/ 13458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling four=temp; 13468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling five=four+132; 13478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=0;i<4;i++) 13488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 13498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling two[i]=five+(i+1)*132; 13508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 13518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 13528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Set rests of four and five to zero to avoid 13538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling floating point exceptions*/ 13548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=129;i<132;i++) 13558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 13568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling four[i]=0.0; 13578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling five[i]=0.0; 13588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 13598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 13608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Fill three rows of the wrap-around max buffers*/ 13618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=top-3;i<top;i++) db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2); 13628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 13638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*For each output row*/ 13648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(;i<=bottom;i++) 13658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 13668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Compute max of the lowest pair of rows in the five row window*/ 13678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2); 13688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Compute max of the lowest and highest pair of rows in the five row window*/ 13698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_MaxVector_128_Aligned16_f(four,two[i&3],two[(i-3)&3]); 13708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Compute max of all rows*/ 13718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_MaxVector_128_Aligned16_f(five,four,two[(i-1)&3]); 13728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Compute max of 2x5 chunks*/ 13738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_MaxVector_128_SecondSourceDestAligned16_f(five,five+1,five); 13748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Compute max of pairs of 2x5 chunks*/ 13758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_MaxVector_128_SecondSourceDestAligned16_f(five,five+3,five); 13768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*Compute max of pairs of 5x5 except middle*/ 13778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_MaxVector_128_SecondSourceDestAligned16_f(sf[i]+lm2,four+2,five); 13788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 13798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 13808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else 13818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int i,j,right; 13828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float sv; 13838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 13848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling right=left+128; 13858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=top;i<=bottom;i++) for(j=left;j<right;j++) 13868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 13878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling sv=s[i][j]; 13888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 13898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if( sv>s[i-2][j-2] && sv>s[i-2][j-1] && sv>s[i-2][j] && sv>s[i-2][j+1] && sv>s[i-2][j+2] && 13908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling sv>s[i-1][j-2] && sv>s[i-1][j-1] && sv>s[i-1][j] && sv>s[i-1][j+1] && sv>s[i-1][j+2] && 13918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling sv>s[ i][j-2] && sv>s[ i][j-1] && sv>s[ i][j+1] && sv>s[ i][j+2] && 13928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling sv>s[i+1][j-2] && sv>s[i+1][j-1] && sv>s[i+1][j] && sv>s[i+1][j+1] && sv>s[i+1][j+2] && 13938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling sv>s[i+2][j-2] && sv>s[i+2][j-1] && sv>s[i+2][j] && sv>s[i+2][j+1] && sv>s[i+2][j+2]) 13948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 13958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling sf[i][j-2]=0.0; 13968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 13978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling else sf[i][j-2]=sv; 13988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 13998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/ 14008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 14018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top) and 14038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingstopping at bottom. The output is shifted two steps left. The input s should exist for 2 pixels 14048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingoutside the specified region. s[i][left-2] and sf[i][left-2] should be 16 byte aligned. 14058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha HaeberlingTop must be at least 3. Reading and writing from and to the input and output images is done 14068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingas if the region had a width equal to a multiple of 124. If this is not the case, the images 14078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingshould be over-allocated and the input cleared for a sufficient region*/ 14088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_MaxSuppressFilter_5x5_Aligned16_f(float **sf,float **s,int left,int top,int right,int bottom, 14098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling /*temp should point to at least 14108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 6*132 floats of 16-byte-aligned allocated memory*/ 14118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float *temp) 14128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 14138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int x,next_x; 14148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(x=left;x<=right;x=next_x) 14168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 14178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling next_x=x+124; 14188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_MaxSuppressFilterChunk_5x5_Aligned16_f(sf,s,x,top,bottom,temp); 14198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 14208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 14218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Extract corners from the chunk (left,top) to (right,bottom). Store in x_temp,y_temp and s_temp 14238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingwhich should point to space of at least as many positions as there are pixels in the chunk*/ 14248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline int db_CornersFromChunk(float **strength,int left,int top,int right,int bottom,float threshold,double *x_temp,double *y_temp,double *s_temp) 14258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 14268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int i,j,nr; 14278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float s; 14288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling nr=0; 14308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=top;i<=bottom;i++) for(j=left;j<=right;j++) 14318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 14328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling s=strength[i][j]; 14338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(s>=threshold && 14358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling s>strength[i-2][j-2] && s>strength[i-2][j-1] && s>strength[i-2][j] && s>strength[i-2][j+1] && s>strength[i-2][j+2] && 14368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling s>strength[i-1][j-2] && s>strength[i-1][j-1] && s>strength[i-1][j] && s>strength[i-1][j+1] && s>strength[i-1][j+2] && 14378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling s>strength[ i][j-2] && s>strength[ i][j-1] && s>strength[ i][j+1] && s>strength[ i][j+2] && 14388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling s>strength[i+1][j-2] && s>strength[i+1][j-1] && s>strength[i+1][j] && s>strength[i+1][j+1] && s>strength[i+1][j+2] && 14398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling s>strength[i+2][j-2] && s>strength[i+2][j-1] && s>strength[i+2][j] && s>strength[i+2][j+1] && s>strength[i+2][j+2]) 14408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 14418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling x_temp[nr]=(double) j; 14428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling y_temp[nr]=(double) i; 14438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling s_temp[nr]=(double) s; 14448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling nr++; 14458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 14468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 14478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(nr); 14488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 14498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling//Sub-pixel accuracy using 2D quadratic interpolation.(YCJ) 14528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_SubPixel(float **strength, const double xd, const double yd, double &xs, double &ys) 14538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 14548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int x = (int) xd; 14558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int y = (int) yd; 14568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float fxx = strength[y][x-1] - strength[y][x] - strength[y][x] + strength[y][x+1]; 14588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float fyy = strength[y-1][x] - strength[y][x] - strength[y][x] + strength[y+1][x]; 14598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float fxy = (strength[y-1][x-1] - strength[y-1][x+1] - strength[y+1][x-1] + strength[y+1][x+1])/(float)4.0; 14608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float denom = (fxx * fyy - fxy * fxy) * (float) 2.0; 14628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling xs = xd; 14648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling ys = yd; 14658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if ( db_absf(denom) <= FLT_EPSILON ) 14678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 14688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return; 14698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 14708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling else 14718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 14728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float fx = strength[y][x+1] - strength[y][x-1]; 14738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float fy = strength[y+1][x] - strength[y-1][x]; 14748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float dx = (fyy * fx - fxy * fy) / denom; 14768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float dy = (fxx * fy - fxy * fx) / denom; 14778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if ( db_absf(dx) > 1.0 || db_absf(dy) > 1.0 ) 14798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 14808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return; 14818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 14828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling else 14838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 14848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling xs -= dx; 14858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling ys -= dy; 14868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 14878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 14888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return; 14908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 14918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 14928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Extract corners from the image part from (left,top) to (right,bottom). 14938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha HaeberlingStore in x and y, extracting at most satnr corners in each block of size (bw,bh). 14948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha HaeberlingThe pointer temp_d should point to at least 5*bw*bh positions. 14958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingarea_factor holds how many corners max to extract per 10000 pixels*/ 14968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_ExtractCornersSaturated(float **strength,int left,int top,int right,int bottom, 14978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int bw,int bh,unsigned long area_factor, 14988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float threshold,double *temp_d, 14998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling double *x_coord,double *y_coord,int *nr_corners) 15008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 15018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling double *x_temp,*y_temp,*s_temp,*select_temp; 15028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling double loc_thresh; 15038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling unsigned long bwbh,area,saturation; 15048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int x,next_x,last_x; 15058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int y,next_y,last_y; 15068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int nr,nr_points,i,stop; 15078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 15088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling bwbh=bw*bh; 15098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling x_temp=temp_d; 15108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling y_temp=x_temp+bwbh; 15118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling s_temp=y_temp+bwbh; 15128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling select_temp=s_temp+bwbh; 15138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 15148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_SUB_PIXEL 15158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling // subpixel processing may sometimes push the corner ourside the real border 15168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling // increasing border size: 15178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling left++; 15188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling top++; 15198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling bottom--; 15208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling right--; 15218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_SUB_PIXEL*/ 15228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 15238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling nr_points=0; 15248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(y=top;y<=bottom;y=next_y) 15258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 15268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling next_y=y+bh; 15278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling last_y=next_y-1; 15288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(last_y>bottom) last_y=bottom; 15298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(x=left;x<=right;x=next_x) 15308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 15318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling next_x=x+bw; 15328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling last_x=next_x-1; 15338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(last_x>right) last_x=right; 15348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 15358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling area=(last_x-x+1)*(last_y-y+1); 15368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling saturation=(area*area_factor)/10000; 15378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling nr=db_CornersFromChunk(strength,x,y,last_x,last_y,threshold,x_temp,y_temp,s_temp); 15388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(nr) 15398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 15408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(((unsigned long)nr)>saturation) loc_thresh=db_LeanQuickSelect(s_temp,nr,nr-saturation,select_temp); 15418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling else loc_thresh=threshold; 15428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 15438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling stop=nr_points+saturation; 15448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for(i=0;(i<nr)&&(nr_points<stop);i++) 15458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 15468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(s_temp[i]>=loc_thresh) 15478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 15488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling #ifdef DB_SUB_PIXEL 15498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_SubPixel(strength, x_temp[i], y_temp[i], x_coord[nr_points], y_coord[nr_points]); 15508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling #else 15518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling x_coord[nr_points]=x_temp[i]; 15528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling y_coord[nr_points]=y_temp[i]; 15538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling #endif 15548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 15558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling nr_points++; 15568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 15578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 15588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 15598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 15608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 15618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling *nr_corners=nr_points; 15628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 15638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 15648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingdb_CornerDetector_f::db_CornerDetector_f() 15658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 15668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_w=0; m_h=0; 15678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 15688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 15698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingdb_CornerDetector_f::~db_CornerDetector_f() 15708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 15718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Clean(); 15728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 15738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 15748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_CornerDetector_f::Clean() 15758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 15768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(m_w!=0) 15778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 15788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling delete [] m_temp_f; 15798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling delete [] m_temp_d; 15808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h); 15818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 15828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_w=0; m_h=0; 15838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 15848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 15858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingunsigned long db_CornerDetector_f::Init(int im_width,int im_height,int target_nr_corners, 15868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int nr_horizontal_blocks,int nr_vertical_blocks, 15878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling double absolute_threshold,double relative_threshold) 15888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 15898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int chunkwidth=208; 15908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int block_width,block_height; 15918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling unsigned long area_factor; 15928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int active_width,active_height; 15938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 15948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling active_width=db_maxi(1,im_width-10); 15958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling active_height=db_maxi(1,im_height-10); 15968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling block_width=db_maxi(1,active_width/nr_horizontal_blocks); 15978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling block_height=db_maxi(1,active_height/nr_vertical_blocks); 15988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 15998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/ 16008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling (((double)active_width)*((double)active_height))))); 16018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(Start(im_width,im_height,block_width,block_height,area_factor, 16038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling absolute_threshold,relative_threshold,chunkwidth)); 16048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 16058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingunsigned long db_CornerDetector_f::Start(int im_width,int im_height, 16078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int block_width,int block_height,unsigned long area_factor, 16088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling double absolute_threshold,double relative_threshold,int chunkwidth) 16098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 16108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Clean(); 16118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_w=im_width; 16138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_h=im_height; 16148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_cw=chunkwidth; 16158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_bw=block_width; 16168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_bh=block_height; 16178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_area_factor=area_factor; 16188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_r_thresh=relative_threshold; 16198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_a_thresh=absolute_threshold; 16208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000); 16218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_temp_f=new float[13*(m_cw+4)]; 16238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_temp_d=new double[5*m_bw*m_bh]; 16248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h); 16258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(m_max_nr); 16278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 16288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_CornerDetector_f::DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const 16308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 16318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float max_val,threshold; 16328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_HarrisStrength_f(m_strength,img,m_w,m_h,m_temp_f,m_cw); 16348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(m_r_thresh) 16368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 16378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6); 16388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh); 16398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 16408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling else threshold= (float) m_a_thresh; 16418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold, 16438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_temp_d,x_coord,y_coord,nr_corners); 16448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 16458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingdb_CornerDetector_u::db_CornerDetector_u() 16478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 16488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_w=0; m_h=0; 16498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 16508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingdb_CornerDetector_u::~db_CornerDetector_u() 16528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 16538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Clean(); 16548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 16558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingdb_CornerDetector_u::db_CornerDetector_u(const db_CornerDetector_u& cd) 16578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 16588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor, 16598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cd.m_a_thresh, cd.m_r_thresh); 16608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 16618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingdb_CornerDetector_u& db_CornerDetector_u::operator=(const db_CornerDetector_u& cd) 16638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 16648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if ( this == &cd ) return *this; 16658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Clean(); 16678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor, 16698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling cd.m_a_thresh, cd.m_r_thresh); 16708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return *this; 16728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 16738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_CornerDetector_u::Clean() 16758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 16768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(m_w!=0) 16778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 16788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling delete [] m_temp_i; 16798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling delete [] m_temp_d; 16808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h); 16818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 16828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_w=0; m_h=0; 16838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 16848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingunsigned long db_CornerDetector_u::Init(int im_width,int im_height,int target_nr_corners, 16868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int nr_horizontal_blocks,int nr_vertical_blocks, 16878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling double absolute_threshold,double relative_threshold) 16888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 16898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int block_width,block_height; 16908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling unsigned long area_factor; 16918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int active_width,active_height; 16928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling active_width=db_maxi(1,im_width-10); 16948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling active_height=db_maxi(1,im_height-10); 16958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling block_width=db_maxi(1,active_width/nr_horizontal_blocks); 16968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling block_height=db_maxi(1,active_height/nr_vertical_blocks); 16978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/ 16998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling (((double)active_width)*((double)active_height))))); 17008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(Start(im_width,im_height,block_width,block_height,area_factor, 17028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 16.0*absolute_threshold,relative_threshold)); 17038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 17048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingunsigned long db_CornerDetector_u::Start(int im_width,int im_height, 17068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int block_width,int block_height,unsigned long area_factor, 17078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling double absolute_threshold,double relative_threshold) 17088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 17098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling Clean(); 17108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_w=im_width; 17128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_h=im_height; 17138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_bw=block_width; 17148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_bh=block_height; 17158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_area_factor=area_factor; 17168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_r_thresh=relative_threshold; 17178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_a_thresh=absolute_threshold; 17188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000); 17198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_temp_i=new int[18*128]; 17218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_temp_d=new double[5*m_bw*m_bh]; 17228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h); 17238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling return(m_max_nr); 17258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 17268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_CornerDetector_u::DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners, 17288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling const unsigned char * const *msk, unsigned char fgnd) const 17298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{ 17308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling float max_val,threshold; 17318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_HarrisStrength_u(m_strength,img,m_w,m_h,m_temp_i); 17338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if(m_r_thresh) 17368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 17378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6); 17388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh); 17398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 17408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling else threshold= (float) m_a_thresh; 17418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold, 17438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_temp_d,x_coord,y_coord,nr_corners); 17448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if ( msk ) 17478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 17488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int nr_corners_mask=0; 17498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling for ( int i = 0; i < *nr_corners; ++i) 17518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 17528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int cor_x = db_roundi(*(x_coord+i)); 17538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling int cor_y = db_roundi(*(y_coord+i)); 17548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if ( msk[cor_y][cor_x] == fgnd ) 17558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling { 17568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling x_coord[nr_corners_mask] = x_coord[i]; 17578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling y_coord[nr_corners_mask] = y_coord[i]; 17588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling nr_corners_mask++; 17598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 17608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 17618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling *nr_corners = nr_corners_mask; 17628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling } 17638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 17648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 17658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_CornerDetector_u::ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners) { 17668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling if ( m_w!=0 ) 17678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling db_ExtractCornersSaturated(strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,float(m_a_thresh), 17688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling m_temp_d,x_coord,y_coord,nr_corners); 17698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling} 17708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling 1771