1e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*
2e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen * Copyright (C) 2011 The Android Open Source Project
3e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen *
4e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen * Licensed under the Apache License, Version 2.0 (the "License");
5e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen * you may not use this file except in compliance with the License.
6e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen * You may obtain a copy of the License at
7e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen *
8e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen *      http://www.apache.org/licenses/LICENSE-2.0
9e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen *
10e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen * Unless required by applicable law or agreed to in writing, software
11e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen * distributed under the License is distributed on an "AS IS" BASIS,
12e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen * See the License for the specific language governing permissions and
14e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen * limitations under the License.
15e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen */
16e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
17e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*$Id: db_feature_detection.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/
18e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
19e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*****************************************************************
20e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen*    Lean and mean begins here                                   *
21e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen*****************************************************************/
22e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
23e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#include "db_utilities.h"
24e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#include "db_feature_detection.h"
25e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef _VERBOSE_
26e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#include <iostream>
27e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif
28e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#include <float.h>
29e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
30e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#define DB_SUB_PIXEL
31e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
32e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#define BORDER 10 // 5
33e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
34e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenfloat** db_AllocStrengthImage_f(float **im,int w,int h)
35e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
36e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int i,n,aw;
37e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    long c,size;
38e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float **img,*aim,*p;
39e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
40e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*Determine number of 124 element chunks needed*/
41e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    n=(db_maxi(1,w-6)+123)/124;
42e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*Determine the total allocation width aw*/
43e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    aw=n*124+8;
44e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*Allocate*/
45e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    size=aw*h+16;
46e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    *im=new float [size];
47e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*Clean up*/
48e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    p=(*im);
49e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(c=0;c<size;c++) p[c]=0.0;
50e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*Get a 16 byte aligned pointer*/
51e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    aim=db_AlignPointer_f(*im,16);
52e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*Allocate pointer table*/
53e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    img=new float* [h];
54e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*Initialize the pointer table*/
55e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=0;i<h;i++)
56e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
57e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        img[i]=aim+aw*i+1;
58e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
59e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
60e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(img);
61e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
62e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
63e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenvoid db_FreeStrengthImage_f(float *im,float **img,int h)
64e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
65e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    delete [] im;
66e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    delete [] img;
67e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
68e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
69e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width chunk_width
70e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta ChenMemory references occur one pixel outside the subrow*/
71e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline void db_IxIyRow_f(float *Ix,float *Iy,const float * const *img,int i,int j,int chunk_width)
72e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
73e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int c;
74e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
75e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(c=0;c<chunk_width;c++)
76e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
77e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ix[c]=img[i][j+c-1]-img[i][j+c+1];
78e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Iy[c]=img[i-1][j+c]-img[i+1][j+c];
79e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
80e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
81e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
82e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width 128
83e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta ChenMemory references occur one pixel outside the subrow*/
84e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline void db_IxIyRow_u(int *dxx,const unsigned char * const *img,int i,int j,int nc)
85e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
86e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef DB_USE_MMX
87e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    const unsigned char *r1,*r2,*r3;
88e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
89e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    r1=img[i-1]+j; r2=img[i]+j; r3=img[i+1]+j;
90e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
91e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    _asm
92e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
93e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov esi,16
94e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov eax,r1
95e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov ebx,r2
96e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov ecx,r3
97e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov edx,dxx
98e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
99e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Get bitmask into mm7*/
100e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov       edi,7F7F7F7Fh
101e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movd      mm7,edi
102e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpckldq mm7,mm7
103e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
104e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenloopstart:
105e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /***************dx part 1-12*********************************/
106e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm0,[eax]       /*1 Get upper*/
107e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pxor      mm6,mm6         /*2 Set to zero*/
108e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm1,[ecx]       /*3 Get lower*/
109e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         psrlq     mm0,1           /*4 Shift*/
110e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        psrlq      mm1,1           /*5 Shift*/
111e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pand      mm0,mm7         /*6 And*/
112e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm2,[ebx-1]     /*13 Get left*/
113e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pand      mm1,mm7         /*7 And*/
114e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        psubb      mm0,mm1         /*8 Subtract*/
115e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pxor      mm5,mm5         /*14 Set to zero*/
116e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm1,mm0         /*9 Copy*/
117e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pcmpgtb   mm6,mm0         /*10 Create unpack mask*/
118e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm3,[ebx+1]     /*15 Get right*/
119e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         punpcklbw mm0,mm6         /*11 Unpack low*/
120e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpckhbw  mm1,mm6         /*12 Unpack high*/
121e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /***************dy part 13-24*********************************/
122e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movq      mm4,mm0         /*25 Copy dx*/
123e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        psrlq      mm2,1           /*16 Shift*/
124e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pmullw    mm0,mm0         /*26 Multiply dx*dx*/
125e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        psrlq      mm3,1           /*17 Shift*/
126e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pand      mm2,mm7         /*18 And*/
127e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pand       mm3,mm7         /*19 And*/
128e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
129e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        psubb      mm2,mm3         /*20 Subtract*/
130e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
131e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm3,mm2         /*21 Copy*/
132e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pcmpgtb   mm5,mm2         /*22 Create unpack mask*/
133e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpcklbw  mm2,mm5         /*23 Unpack low*/
134e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
135e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpckhbw  mm3,mm5         /*24 Unpack high*/
136e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /***************dxx dxy dyy low part 25-49*********************************/
137e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pmullw    mm4,mm2         /*27 Multiply dx*dy*/
138e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pmullw     mm2,mm2         /*28 Multiply dy*dy*/
139e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pxor      mm6,mm6         /*29 Set to zero*/
140e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm5,mm0         /*30 Copy dx*dx*/
141e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pcmpgtw   mm6,mm0         /*31 Create unpack mask for dx*dx*/
142e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpcklwd  mm0,mm6         /*32 Unpack dx*dx lows*/
143e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
144e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpckhwd  mm5,mm6         /*33 Unpack dx*dx highs*/
145e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pxor      mm6,mm6         /*36 Set to zero*/
146e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       [edx],mm0       /*34 Store dx*dx lows*/
147e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movq      mm0,mm4         /*37 Copy dx*dy*/
148e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       [edx+8],mm5     /*35 Store dx*dx highs*/
149e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pcmpgtw   mm6,mm4         /*38 Create unpack mask for dx*dy*/
150e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpcklwd  mm4,mm6         /*39 Unpack dx*dy lows*/
151e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
152e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpckhwd  mm0,mm6         /*40 Unpack dx*dy highs*/
153e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pxor      mm6,mm6         /*43 Set to zero*/
154e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       [edx+512],mm4   /*41 Store dx*dy lows*/
155e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movq      mm5,mm2         /*44 Copy dy*dy*/
156e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       [edx+520],mm0   /*42 Store dx*dy highs*/
157e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pcmpgtw   mm6,mm2         /*45 Create unpack mask for dy*dy*/
158e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpcklwd  mm2,mm6         /*46 Unpack dy*dy lows*/
159e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movq      mm4,mm1         /*50 Copy dx*/
160e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpckhwd  mm5,mm6         /*47 Unpack dy*dy highs*/
161e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pmullw    mm1,mm1         /*51 Multiply dx*dx*/
162e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       [edx+1024],mm2  /*48 Store dy*dy lows*/
163e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pmullw    mm4,mm3         /*52 Multiply dx*dy*/
164e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       [edx+1032],mm5  /*49 Store dy*dy highs*/
165e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /***************dxx dxy dyy high part 50-79*********************************/
166e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pmullw    mm3,mm3         /*53 Multiply dy*dy*/
167e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pxor       mm6,mm6         /*54 Set to zero*/
168e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movq      mm5,mm1         /*55 Copy dx*dx*/
169e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pcmpgtw    mm6,mm1         /*56 Create unpack mask for dx*dx*/
170e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pxor      mm2,mm2         /*61 Set to zero*/
171e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpcklwd  mm1,mm6         /*57 Unpack dx*dx lows*/
172e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movq      mm0,mm4         /*62 Copy dx*dy*/
173e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpckhwd  mm5,mm6         /*58 Unpack dx*dx highs*/
174e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pcmpgtw   mm2,mm4         /*63 Create unpack mask for dx*dy*/
175e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       [edx+16],mm1    /*59 Store dx*dx lows*/
176e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         punpcklwd mm4,mm2         /*64 Unpack dx*dy lows*/
177e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       [edx+24],mm5    /*60 Store dx*dx highs*/
178e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         punpckhwd mm0,mm2         /*65 Unpack dx*dy highs*/
179e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       [edx+528],mm4   /*66 Store dx*dy lows*/
180e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pxor      mm6,mm6         /*68 Set to zero*/
181e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       [edx+536],mm0   /*67 Store dx*dy highs*/
182e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movq      mm5,mm3         /*69 Copy dy*dy*/
183e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pcmpgtw    mm6,mm3         /*70 Create unpack mask for dy*dy*/
184e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         add       eax,8           /*75*/
185e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpcklwd  mm3,mm6         /*71 Unpack dy*dy lows*/
186e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         add       ebx,8           /*76*/
187e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        punpckhwd  mm5,mm6         /*72 Unpack dy*dy highs*/
188e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         add       ecx,8           /*77*/
189e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       [edx+1040],mm3  /*73 Store dy*dy lows*/
190e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
191e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       [edx+1048],mm5  /*74 Store dy*dy highs*/
192e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
193e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        add        edx,32          /*78*/
194e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         dec esi                   /*79*/
195e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        jnz loopstart
196e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
197e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        emms
198e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
199e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
200e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#else
201e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int c;
202e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int Ix,Iy;
203e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
204e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(c=0;c<nc;c++)
205e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
206e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ix=(img[i][j+c-1]-img[i][j+c+1])>>1;
207e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Iy=(img[i-1][j+c]-img[i+1][j+c])>>1;
208e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dxx[c]=Ix*Ix;
209e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dxx[c+128]=Ix*Iy;
210e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dxx[c+256]=Iy*Iy;
211e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
212e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif /*DB_USE_MMX*/
213e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
214e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
215e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Filter vertically five rows of derivatives of length chunk_width into gxx,gxy,gyy*/
216e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline void db_gxx_gxy_gyy_row_f(float *gxx,float *gxy,float *gyy,int chunk_width,
217e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                 float *Ix0,float *Ix1,float *Ix2,float *Ix3,float *Ix4,
218e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                 float *Iy0,float *Iy1,float *Iy2,float *Iy3,float *Iy4)
219e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
220e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int c;
221e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float dx,dy;
222e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float Ixx0,Ixy0,Iyy0,Ixx1,Ixy1,Iyy1,Ixx2,Ixy2,Iyy2,Ixx3,Ixy3,Iyy3,Ixx4,Ixy4,Iyy4;
223e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
224e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(c=0;c<chunk_width;c++)
225e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
226e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dx=Ix0[c];
227e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dy=Iy0[c];
228e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ixx0=dx*dx;
229e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ixy0=dx*dy;
230e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Iyy0=dy*dy;
231e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
232e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dx=Ix1[c];
233e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dy=Iy1[c];
234e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ixx1=dx*dx;
235e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ixy1=dx*dy;
236e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Iyy1=dy*dy;
237e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
238e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dx=Ix2[c];
239e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dy=Iy2[c];
240e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ixx2=dx*dx;
241e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ixy2=dx*dy;
242e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Iyy2=dy*dy;
243e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
244e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dx=Ix3[c];
245e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dy=Iy3[c];
246e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ixx3=dx*dx;
247e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ixy3=dx*dy;
248e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Iyy3=dy*dy;
249e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
250e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dx=Ix4[c];
251e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dy=Iy4[c];
252e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ixx4=dx*dx;
253e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ixy4=dx*dy;
254e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Iyy4=dy*dy;
255e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
256e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Filter vertically*/
257e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        gxx[c]=Ixx0+Ixx1*4.0f+Ixx2*6.0f+Ixx3*4.0f+Ixx4;
258e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        gxy[c]=Ixy0+Ixy1*4.0f+Ixy2*6.0f+Ixy3*4.0f+Ixy4;
259e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        gyy[c]=Iyy0+Iyy1*4.0f+Iyy2*6.0f+Iyy3*4.0f+Iyy4;
260e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
261e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
262e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
263e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Filter vertically five rows of derivatives of length 128 into gxx,gxy,gyy*/
264e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline void db_gxx_gxy_gyy_row_s(int *g,int *d0,int *d1,int *d2,int *d3,int *d4,int nc)
265e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
266e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef DB_USE_MMX
267e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int c;
268e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
269e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    _asm
270e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
271e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov c,64
272e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov eax,d0
273e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov ebx,d1
274e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov ecx,d2
275e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov edx,d3
276e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov edi,d4
277e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov esi,g
278e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
279e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenloopstart:
280e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /***************dxx part 1-14*********************************/
281e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm0,[eax]      /*1 Get dxx0*/
282e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
283e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm1,[ebx]      /*2 Get dxx1*/
284e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
285e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm2,[ecx]      /*5 Get dxx2*/
286e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm1,2          /*3 Shift dxx1*/
287e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm3,[edx]      /*10 Get dxx3*/
288e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         paddd      mm0,mm1        /*4 Accumulate dxx1*/
289e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm4,[eax+512]  /*15 Get dxy0*/
290e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm2,1          /*6 Shift dxx2 1*/
291e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm0,mm2        /*7 Accumulate dxx2 1*/
292e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm2,1          /*8 Shift dxx2 2*/
293e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm5,[ebx+512]  /*16 Get dxy1*/
294e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         paddd      mm0,mm2        /*9 Accumulate dxx2 2*/
295e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pslld       mm3,2          /*11 Shift dxx3*/
296e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
297e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm0,mm3        /*12 Accumulate dxx3*/
298e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm5,2          /*17 Shift dxy1*/
299e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm0,[edi]      /*13 Accumulate dxx4*/
300e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         paddd      mm4,mm5        /*18 Accumulate dxy1*/
301e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm6,[ecx+512]  /*19 Get dxy2*/
302e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
303e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        [esi],mm0      /*14 Store dxx sums*/
304e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /***************dxy part 15-28*********************************/
305e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm6,1          /*20 Shift dxy2 1*/
306e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm4,mm6        /*21 Accumulate dxy2 1*/
307e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm6,1          /*22 Shift dxy2 2*/
308e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm0,[eax+1024] /*29 Get dyy0*/
309e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         paddd      mm4,mm6        /*23 Accumulate dxy2 2*/
310e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm7,[edx+512]  /*24 Get dxy3*/
311e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm7,2          /*25 Shift dxy3*/
312e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm1,[ebx+1024] /*30 Get dyy1*/
313e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         paddd      mm4,mm7        /*26 Accumulate dxy3*/
314e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm4,[edi+512]  /*27 Accumulate dxy4*/
315e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm1,2          /*31 Shift dyy1*/
316e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm2,[ecx+1024] /*33 Get dyy2*/
317e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         paddd      mm0,mm1        /*32 Accumulate dyy1*/
318e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        [esi+512],mm4  /*28 Store dxy sums*/
319e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm2,1          /*34 Shift dyy2 1*/
320e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /***************dyy part 29-49*********************************/
321e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
322e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
323e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm3,[edx+1024] /*38 Get dyy3*/
324e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         paddd      mm0,mm2        /*35 Accumulate dyy2 1*/
325e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm0,[edi+1024] /*41 Accumulate dyy4*/
326e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm2,1          /*36 Shift dyy2 2*/
327e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm0,mm2        /*37 Accumulate dyy2 2*/
328e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm3,2          /*39 Shift dyy3*/
329e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm0,mm3        /*40 Accumulate dyy3*/
330e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         add        eax,8           /*43*/
331e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        add         ebx,8           /*44*/
332e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         add        ecx,8           /*45*/
333e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        [esi+1024],mm0 /*42 Store dyy sums*/
334e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
335e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        add         edx,8           /*46*/
336e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         add        edi,8           /*47*/
337e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        add         esi,8           /*48*/
338e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         dec        c               /*49*/
339e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        jnz         loopstart
340e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
341e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        emms
342e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
343e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
344e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#else
345e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int c,dd;
346e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
347e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(c=0;c<nc;c++)
348e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
349e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Filter vertically*/
350e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dd=d2[c];
351e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        g[c]=d0[c]+(d1[c]<<2)+(dd<<2)+(dd<<1)+(d3[c]<<2)+d4[c];
352e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
353e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dd=d2[c+128];
354e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        g[c+128]=d0[c+128]+(d1[c+128]<<2)+(dd<<2)+(dd<<1)+(d3[c+128]<<2)+d4[c+128];
355e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
356e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dd=d2[c+256];
357e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        g[c+256]=d0[c+256]+(d1[c+256]<<2)+(dd<<2)+(dd<<1)+(d3[c+256]<<2)+d4[c+256];
358e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
359e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif /*DB_USE_MMX*/
360e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
361e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
362e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Filter horizontally the three rows gxx,gxy,gyy into the strength subrow starting at i,j
363e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenand with width chunk_width. gxx,gxy and gyy are assumed to be four pixels wider than chunk_width
364e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenand starting at (i,j-2)*/
365e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline void db_HarrisStrength_row_f(float **s,float *gxx,float *gxy,float *gyy,int i,int j,int chunk_width)
366e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
367e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float Gxx,Gxy,Gyy,det,trc;
368e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int c;
369e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
370e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(c=0;c<chunk_width;c++)
371e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
372e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Gxx=gxx[c]+gxx[c+1]*4.0f+gxx[c+2]*6.0f+gxx[c+3]*4.0f+gxx[c+4];
373e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Gxy=gxy[c]+gxy[c+1]*4.0f+gxy[c+2]*6.0f+gxy[c+3]*4.0f+gxy[c+4];
374e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Gyy=gyy[c]+gyy[c+1]*4.0f+gyy[c+2]*6.0f+gyy[c+3]*4.0f+gyy[c+4];
375e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
376e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        det=Gxx*Gyy-Gxy*Gxy;
377e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        trc=Gxx+Gyy;
378e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        s[i][j+c]=det-0.06f*trc*trc;
379e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
380e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
381e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
382e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Filter g of length 128 in place with 14641. Output is shifted two steps
383e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenand of length 124*/
384e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline void db_Filter14641_128_i(int *g,int nc)
385e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
386e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef DB_USE_MMX
387e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int mask;
388e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
389e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    mask=0xFFFFFFFF;
390e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    _asm
391e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
392e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov esi,31
393e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov eax,g
394e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
395e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Get bitmask 00000000FFFFFFFF into mm7*/
396e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movd mm7,mask
397e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
398e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Warming iteration one 1-16********************/
399e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm6,[eax]      /*1 Load new data*/
400e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd      mm0,mm6        /*2 Add 1* behind two steps*/
401e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm2,mm6        /*3 Start with 1* in front two steps*/
402e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pslld      mm6,1          /*4*/
403e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd      mm1,mm6        /*5 Add 2* same place*/
404e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pslld      mm6,1          /*6*/
405e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd      mm1,mm6        /*7 Add 4* same place*/
406e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pshufw     mm6,mm6,4Eh    /*8 Swap the two double-words using bitmask 01001110=4Eh*/
407e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd      mm1,mm6        /*9 Add 4* swapped*/
408e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm5,mm6        /*10 Copy*/
409e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pand       mm6,mm7        /*11 Get low double-word only*/
410e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd      mm2,mm6        /*12 Add 4* in front one step*/
411e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pxor       mm6,mm5        /*13 Get high double-word only*/
412e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd      mm0,mm6        /*14 Add 4* behind one step*/
413e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm0,mm1        /*15 Shift along*/
414e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm1,mm2        /*16 Shift along*/
415e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Warming iteration two 17-32********************/
416e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm4,[eax+8]    /*17 Load new data*/
417e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd      mm0,mm4        /*18 Add 1* behind two steps*/
418e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm2,mm4        /*19 Start with 1* in front two steps*/
419e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pslld      mm4,1          /*20*/
420e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd      mm1,mm4        /*21 Add 2* same place*/
421e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pslld      mm4,1          /*22*/
422e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd      mm1,mm4        /*23 Add 4* same place*/
423e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pshufw     mm4,mm4,4Eh    /*24 Swap the two double-words using bitmask 01001110=4Eh*/
424e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd      mm1,mm4        /*25 Add 4* swapped*/
425e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm3,mm4        /*26 Copy*/
426e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pand       mm4,mm7        /*27 Get low double-word only*/
427e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd      mm2,mm4        /*28 Add 4* in front one step*/
428e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pxor       mm4,mm3        /*29 Get high double-word only*/
429e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd      mm0,mm4        /*30 Add 4* behind one step*/
430e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm0,mm1        /*31 Shift along*/
431e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq       mm1,mm2        /*32 Shift along*/
432e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
433e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Loop********************/
434e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenloopstart:
435e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*First part of loop 33-47********/
436e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm6,[eax+16]   /*33 Load new data*/
437e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
438e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm0,mm6        /*34 Add 1* behind two steps*/
439e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movq       mm2,mm6        /*35 Start with 1* in front two steps*/
440e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm4,[eax+24]   /*48 Load new data*/
441e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm6,1          /*36*/
442e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm1,mm6        /*37 Add 2* same place*/
443e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm6,1          /*38*/
444e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm1,mm6        /*39 Add 4* same place*/
445e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pshufw     mm6,mm6,4Eh    /*40 Swap the two double-words using bitmask 01001110=4Eh*/
446e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm1,mm4        /*49 Add 1* behind two steps*/
447e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movq       mm5,mm6        /*41 Copy*/
448e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm1,mm6        /*42 Add 4* swapped*/
449e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pand       mm6,mm7        /*43 Get low double-word only*/
450e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm2,mm6        /*44 Add 4* in front one step*/
451e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pxor       mm6,mm5        /*45 Get high double-word only*/
452e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm0,mm6        /*46 Add 4* behind one step*/
453e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movq       mm6,mm4        /*50a Copy*/
454e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pslld       mm4,1          /*51*/
455e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
456e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        [eax],mm0      /*47 Store result two steps behind*/
457e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Second part of loop 48-66********/
458e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movq       mm0,mm6        /*50b Start with 1* in front two steps*/
459e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm2,mm4        /*52 Add 2* same place*/
460e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pslld      mm4,1          /*53*/
461e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm2,mm4        /*54 Add 4* same place*/
462e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pshufw     mm4,mm4,4Eh    /*55 Swap the two double-words using bitmask 01001110=4Eh*/
463e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm2,mm4        /*56 Add 4* swapped*/
464e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movq       mm3,mm4        /*57 Copy*/
465e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        pand        mm4,mm7        /*58 Get low double-word only*/
466e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
467e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm0,mm4        /*59 Add 4* in front one step*/
468e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         pxor       mm4,mm3        /*60 Get high double-word only*/
469e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        paddd       mm1,mm4        /*61 Add 4* behind one step*/
470e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         add        eax,16         /*65*/
471e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dec         esi            /*66*/
472e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
473e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        [eax-8],mm1    /*62 Store result two steps behind*/
474e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movq       mm1,mm0        /*63 Shift along*/
475e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movq        mm0,mm2        /*64 Shift along*/
476e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        jnz loopstart
477e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
478e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        emms
479e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
480e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
481e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#else
482e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int c;
483e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
484e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(c=0;c<nc-4;c++)
485e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
486e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        g[c]=g[c]+(g[c+1]<<2)+(g[c+2]<<2)+(g[c+2]<<1)+(g[c+3]<<2)+g[c+4];
487e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
488e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif /*DB_USE_MMX*/
489e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
490e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
491e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Filter horizontally the three rows gxx,gxy,gyy of length 128 into the strength subrow s
492e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenof length 124. gxx,gxy and gyy are assumed to be starting at (i,j-2) if s[i][j] is sought.
493e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chens should be 16 byte aligned*/
494e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline void db_HarrisStrength_row_s(float *s,int *gxx,int *gxy,int *gyy,int nc)
495e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
496e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float k;
497e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
498e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    k=0.06f;
499e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
500e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    db_Filter14641_128_i(gxx,nc);
501e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    db_Filter14641_128_i(gxy,nc);
502e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    db_Filter14641_128_i(gyy,nc);
503e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
504e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef DB_USE_SIMD
505e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
506e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
507e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    _asm
508e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
509e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov esi,15
510e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov eax,gxx
511e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov ebx,gxy
512e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov ecx,gyy
513e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov edx,s
514e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
515e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*broadcast k to all positions of xmm7*/
516e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movss   xmm7,k
517e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        shufps  xmm7,xmm7,0
518e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
519e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*****Warm up 1-10**************************************/
520e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm0,[eax+8] /*1 Convert two integers into floating point of low double-word*/
521e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
522e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm1,[ebx+8] /*4 Convert two integers into floating point of low double-word*/
523e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movlhps  xmm0,xmm0    /*2 Move them to the high double-word*/
524e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm2,[ecx+8] /*7 Convert two integers into floating point of low double-word*/
525e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movlhps  xmm1,xmm1    /*5 Move them to the high double-word*/
526e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm0,[eax]   /*3 Convert two integers into floating point of low double-word*/
527e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movlhps  xmm2,xmm2    /*8 Move them to the high double-word*/
528e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm1,[ebx]   /*6 Convert two integers into floating point of low double-word*/
529e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movaps   xmm3,xmm0    /*10 Copy Cxx*/
530e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm2,[ecx]   /*9 Convert two integers into floating point of low double-word*/
531e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
532e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenloopstart:
533e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*****First part of loop 11-18***********************/
534e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mulps     xmm0,xmm2     /*11 Multiply to get Gxx*Gyy*/
535e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         addps    xmm2,xmm3     /*12 Add to get Gxx+Gyy*/
536e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm4,[eax+24] /*19 Convert two integers into floating point of low double-word*/
537e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         mulps    xmm1,xmm1     /*13 Multiply to get Gxy*Gxy*/
538e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mulps     xmm2,xmm2     /*14 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
539e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movlhps  xmm4,xmm4     /*20 Move them to the high double-word*/
540e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm4,[eax+16] /*21 Convert two integers into floating point of low double-word*/
541e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
542e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        subps     xmm0,xmm1     /*15 Subtract to get Gxx*Gyy-Gxy*Gxy*/
543e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         mulps    xmm2,xmm7     /*16 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
544e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm5,[ebx+24] /*22 Convert two integers into floating point of low double-word*/
545e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
546e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movlhps   xmm5,xmm5     /*23 Move them to the high double-word*/
547e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
548e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm5,[ebx+16] /*24 Convert two integers into floating point of low double-word*/
549e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         subps    xmm0,xmm2     /*17 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
550e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm6,[ecx+24] /*25 Convert two integers into floating point of low double-word*/
551e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
552e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps    [edx],xmm0    /*18 Store*/
553e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*****Second part of loop 26-40***********************/
554e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movlhps  xmm6,xmm6     /*26 Move them to the high double-word*/
555e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm6,[ecx+16] /*27 Convert two integers into floating point of low double-word*/
556e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movaps   xmm3,xmm4     /*28 Copy Cxx*/
557e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mulps     xmm4,xmm6     /*29 Multiply to get Gxx*Gyy*/
558e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         addps    xmm6,xmm3     /*30 Add to get Gxx+Gyy*/
559e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm0,[eax+40] /*(1 Next) Convert two integers into floating point of low double-word*/
560e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         mulps    xmm5,xmm5     /*31 Multiply to get Gxy*Gxy*/
561e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm1,[ebx+40] /*(4 Next) Convert two integers into floating point of low double-word*/
562e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         mulps    xmm6,xmm6     /*32 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
563e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm2,[ecx+40] /*(7 Next) Convert two integers into floating point of low double-word*/
564e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movlhps  xmm0,xmm0     /*(2 Next) Move them to the high double-word*/
565e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        subps     xmm4,xmm5     /*33 Subtract to get Gxx*Gyy-Gxy*Gxy*/
566e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movlhps  xmm1,xmm1     /*(5 Next) Move them to the high double-word*/
567e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm0,[eax+32] /*(3 Next)Convert two integers into floating point of low double-word*/
568e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         mulps    xmm6,xmm7     /*34 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
569e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm1,[ebx+32] /*(6 Next) Convert two integers into floating point of low double-word*/
570e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         movlhps  xmm2,xmm2     /*(8 Next) Move them to the high double-word*/
571e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps    xmm3,xmm0     /*(10 Next) Copy Cxx*/
572e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         add      eax,32        /*37*/
573e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        subps     xmm4,xmm6     /*35 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
574e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         add      ebx,32        /*38*/
575e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cvtpi2ps  xmm2,[ecx+32] /*(9 Next) Convert two integers into floating point of low double-word*/
576e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
577e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps    [edx+16],xmm4 /*36 Store*/
578e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         /*Stall*/
579e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        add       ecx,32        /*39*/
580e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen         add      edx,32        /*40*/
581e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        dec       esi           /*41*/
582e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        jnz loopstart
583e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
584e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /****Cool down***************/
585e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mulps    xmm0,xmm2    /*Multiply to get Gxx*Gyy*/
586e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        addps    xmm2,xmm3    /*Add to get Gxx+Gyy*/
587e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mulps    xmm1,xmm1    /*Multiply to get Gxy*Gxy*/
588e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mulps    xmm2,xmm2    /*Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
589e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        subps    xmm0,xmm1    /*Subtract to get Gxx*Gyy-Gxy*Gxy*/
590e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mulps    xmm2,xmm7    /*Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
591e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        subps    xmm0,xmm2    /*Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
592e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps   [edx],xmm0   /*Store*/
593e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
594e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
595e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#else
596e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float Gxx,Gxy,Gyy,det,trc;
597e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int c;
598e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
599e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    //for(c=0;c<124;c++)
600e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(c=0;c<nc-4;c++)
601e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
602e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Gxx=(float)gxx[c];
603e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Gxy=(float)gxy[c];
604e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Gyy=(float)gyy[c];
605e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
606e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        det=Gxx*Gyy-Gxy*Gxy;
607e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        trc=Gxx+Gyy;
608e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        s[c]=det-k*trc*trc;
609e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
610e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif /*DB_USE_SIMD*/
611e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
612e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
613e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Compute the Harris corner strength of the chunk [left,top,right,bottom] of img and
614e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenstore it into the corresponding region of s. left and top have to be at least 3 and
615e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenright and bottom have to be at most width-4,height-4*/
616e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline void db_HarrisStrengthChunk_f(float **s,const float * const *img,int left,int top,int right,int bottom,
617e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                      /*temp should point to at least
618e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                      13*(right-left+5) of allocated memory*/
619e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                      float *temp)
620e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
621e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float *Ix[5],*Iy[5];
622e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float *gxx,*gxy,*gyy;
623e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int i,chunk_width,chunk_width_p4;
624e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
625e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    chunk_width=right-left+1;
626e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    chunk_width_p4=chunk_width+4;
627e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    gxx=temp;
628e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    gxy=gxx+chunk_width_p4;
629e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    gyy=gxy+chunk_width_p4;
630e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=0;i<5;i++)
631e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
632e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ix[i]=gyy+chunk_width_p4+(2*i*chunk_width_p4);
633e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Iy[i]=Ix[i]+chunk_width_p4;
634e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
635e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
636e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*Fill four rows of the wrap-around derivative buffers*/
637e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=top-2;i<top+2;i++) db_IxIyRow_f(Ix[i%5],Iy[i%5],img,i,left-2,chunk_width_p4);
638e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
639e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*For each output row*/
640e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=top;i<=bottom;i++)
641e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
642e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Step the derivative buffers*/
643e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_IxIyRow_f(Ix[(i+2)%5],Iy[(i+2)%5],img,(i+2),left-2,chunk_width_p4);
644e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
645e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
646e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_gxx_gxy_gyy_row_f(gxx,gxy,gyy,chunk_width_p4,
647e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                 Ix[(i-2)%5],Ix[(i-1)%5],Ix[i%5],Ix[(i+1)%5],Ix[(i+2)%5],
648e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                 Iy[(i-2)%5],Iy[(i-1)%5],Iy[i%5],Iy[(i+1)%5],Iy[(i+2)%5]);
649e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
650e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
651e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_HarrisStrength_row_f(s,gxx,gxy,gyy,i,left,chunk_width);
652e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
653e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
654e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
655e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Compute the Harris corner strength of the chunk [left,top,left+123,bottom] of img and
656e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenstore it into the corresponding region of s. left and top have to be at least 3 and
657e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenright and bottom have to be at most width-4,height-4. The left of the region in s should
658e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenbe 16 byte aligned*/
659e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline void db_HarrisStrengthChunk_u(float **s,const unsigned char * const *img,int left,int top,int bottom,
660e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                      /*temp should point to at least
661e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                      18*128 of allocated memory*/
662e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                      int *temp, int nc)
663e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
664e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int *Ixx[5],*Ixy[5],*Iyy[5];
665e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int *gxx,*gxy,*gyy;
666e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int i;
667e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
668e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    gxx=temp;
669e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    gxy=gxx+128;
670e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    gyy=gxy+128;
671e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=0;i<5;i++)
672e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
673e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ixx[i]=gyy+(3*i+1)*128;
674e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Ixy[i]=gyy+(3*i+2)*128;
675e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        Iyy[i]=gyy+(3*i+3)*128;
676e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
677e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
678e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*Fill four rows of the wrap-around derivative buffers*/
679e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=top-2;i<top+2;i++) db_IxIyRow_u(Ixx[i%5],img,i,left-2,nc);
680e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
681e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*For each output row*/
682e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=top;i<=bottom;i++)
683e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
684e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Step the derivative buffers*/
685e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_IxIyRow_u(Ixx[(i+2)%5],img,(i+2),left-2,nc);
686e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
687e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
688e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_gxx_gxy_gyy_row_s(gxx,Ixx[(i-2)%5],Ixx[(i-1)%5],Ixx[i%5],Ixx[(i+1)%5],Ixx[(i+2)%5],nc);
689e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
690e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
691e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_HarrisStrength_row_s(s[i]+left,gxx,gxy,gyy,nc);
692e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
693e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
694e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
695e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
696e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Compute Harris corner strength of img. Strength is returned for the region
697e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenwith (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
698e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chensame place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
699e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenfor a meaningful result*/
700e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenvoid db_HarrisStrength_f(float **s,const float * const *img,int w,int h,
701e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                    /*temp should point to at least
702e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                    13*(chunk_width+4) of allocated memory*/
703e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                    float *temp,
704e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                    int chunk_width)
705e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
706e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int x,next_x,last,right;
707e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
708e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    last=w-4;
709e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(x=3;x<=last;x=next_x)
710e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
711e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        next_x=x+chunk_width;
712e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        right=next_x-1;
713e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(right>last) right=last;
714e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Compute the Harris strength of a chunk*/
715e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_HarrisStrengthChunk_f(s,img,x,3,right,h-4,temp);
716e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
717e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
718e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
719e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Compute Harris corner strength of img. Strength is returned for the region
720e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenwith (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
721e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chensame place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
722e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenfor a meaningful result.Moreover, the image should be overallocated by 256 bytes.
723e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chens[i][3] should by 16 byte aligned for any i*/
724e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenvoid db_HarrisStrength_u(float **s, const unsigned char * const *img,int w,int h,
725e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                    /*temp should point to at least
726e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                    18*128 of allocated memory*/
727e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                    int *temp)
728e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
729e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int x,next_x,last;
730e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int nc;
731e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
732e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    last=w-4;
733e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(x=3;x<=last;x=next_x)
734e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
735e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        next_x=x+124;
736e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
737e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        // mayban: to revert to the original full chunks state, change the line below to: nc = 128;
738e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        nc = db_mini(128,last-x+1);
739e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        //nc = 128;
740e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
741e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Compute the Harris strength of a chunk*/
742e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_HarrisStrengthChunk_u(s,img,x,3,h-4,temp,nc);
743e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
744e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
745e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
746e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline float db_Max_128Aligned16_f(float *v)
747e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
748e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef DB_USE_SIMD
749e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float back;
750e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
751e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    _asm
752e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
753e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov eax,v
754e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
755e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk1*/
756e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm0,[eax]
757e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm1,[eax+16]
758e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm2,[eax+32]
759e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm3,[eax+48]
760e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm4,[eax+64]
761e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm5,[eax+80]
762e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm6,[eax+96]
763e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm7,[eax+112]
764e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
765e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk2*/
766e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm0,[eax+128]
767e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm1,[eax+144]
768e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm2,[eax+160]
769e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm3,[eax+176]
770e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm4,[eax+192]
771e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm5,[eax+208]
772e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm6,[eax+224]
773e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm7,[eax+240]
774e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
775e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk3*/
776e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm0,[eax+256]
777e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm1,[eax+272]
778e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm2,[eax+288]
779e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm3,[eax+304]
780e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm4,[eax+320]
781e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm5,[eax+336]
782e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm6,[eax+352]
783e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm7,[eax+368]
784e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
785e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk4*/
786e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm0,[eax+384]
787e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm1,[eax+400]
788e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm2,[eax+416]
789e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm3,[eax+432]
790e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm4,[eax+448]
791e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm5,[eax+464]
792e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm6,[eax+480]
793e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm7,[eax+496]
794e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
795e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Collect*/
796e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
797e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm2,xmm3
798e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm4,xmm5
799e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm6,xmm7
800e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm2
801e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm4,xmm6
802e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm4
803e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movhlps xmm1,xmm0
804e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
805e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        shufps  xmm1,xmm0,1
806e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
807e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movss   back,xmm0
808e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
809e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
810e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(back);
811e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#else
812e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float val,max_val;
813e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float *p,*stop_p;
814e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    max_val=v[0];
815e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(p=v+1,stop_p=v+128;p!=stop_p;)
816e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
817e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        val= *p++;
818e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(val>max_val) max_val=val;
819e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
820e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(max_val);
821e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif /*DB_USE_SIMD*/
822e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
823e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
824e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline float db_Max_64Aligned16_f(float *v)
825e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
826e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef DB_USE_SIMD
827e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float back;
828e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
829e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    _asm
830e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
831e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov eax,v
832e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
833e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk1*/
834e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm0,[eax]
835e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm1,[eax+16]
836e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm2,[eax+32]
837e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm3,[eax+48]
838e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm4,[eax+64]
839e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm5,[eax+80]
840e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm6,[eax+96]
841e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm7,[eax+112]
842e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
843e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk2*/
844e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm0,[eax+128]
845e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm1,[eax+144]
846e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm2,[eax+160]
847e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm3,[eax+176]
848e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm4,[eax+192]
849e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm5,[eax+208]
850e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm6,[eax+224]
851e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps xmm7,[eax+240]
852e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
853e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Collect*/
854e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
855e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm2,xmm3
856e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm4,xmm5
857e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm6,xmm7
858e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm2
859e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm4,xmm6
860e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm4
861e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movhlps xmm1,xmm0
862e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
863e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        shufps  xmm1,xmm0,1
864e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
865e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movss   back,xmm0
866e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
867e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
868e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(back);
869e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#else
870e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float val,max_val;
871e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float *p,*stop_p;
872e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    max_val=v[0];
873e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(p=v+1,stop_p=v+64;p!=stop_p;)
874e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
875e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        val= *p++;
876e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(val>max_val) max_val=val;
877e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
878e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(max_val);
879e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif /*DB_USE_SIMD*/
880e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
881e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
882e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline float db_Max_32Aligned16_f(float *v)
883e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
884e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef DB_USE_SIMD
885e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float back;
886e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
887e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    _asm
888e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
889e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov eax,v
890e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
891e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk1*/
892e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm0,[eax]
893e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm1,[eax+16]
894e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm2,[eax+32]
895e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm3,[eax+48]
896e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm4,[eax+64]
897e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm5,[eax+80]
898e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm6,[eax+96]
899e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm7,[eax+112]
900e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
901e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Collect*/
902e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
903e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm2,xmm3
904e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm4,xmm5
905e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm6,xmm7
906e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm2
907e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm4,xmm6
908e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm4
909e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movhlps xmm1,xmm0
910e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
911e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        shufps  xmm1,xmm0,1
912e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
913e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movss   back,xmm0
914e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
915e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
916e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(back);
917e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#else
918e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float val,max_val;
919e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float *p,*stop_p;
920e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    max_val=v[0];
921e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(p=v+1,stop_p=v+32;p!=stop_p;)
922e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
923e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        val= *p++;
924e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(val>max_val) max_val=val;
925e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
926e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(max_val);
927e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif /*DB_USE_SIMD*/
928e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
929e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
930e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline float db_Max_16Aligned16_f(float *v)
931e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
932e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef DB_USE_SIMD
933e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float back;
934e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
935e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    _asm
936e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
937e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov eax,v
938e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
939e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk1*/
940e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm0,[eax]
941e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm1,[eax+16]
942e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm2,[eax+32]
943e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm3,[eax+48]
944e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
945e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Collect*/
946e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
947e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm2,xmm3
948e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm2
949e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movhlps xmm1,xmm0
950e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
951e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        shufps  xmm1,xmm0,1
952e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
953e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movss   back,xmm0
954e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
955e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
956e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(back);
957e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#else
958e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float val,max_val;
959e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float *p,*stop_p;
960e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    max_val=v[0];
961e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(p=v+1,stop_p=v+16;p!=stop_p;)
962e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
963e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        val= *p++;
964e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(val>max_val) max_val=val;
965e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
966e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(max_val);
967e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif /*DB_USE_SIMD*/
968e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
969e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
970e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline float db_Max_8Aligned16_f(float *v)
971e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
972e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef DB_USE_SIMD
973e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float back;
974e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
975e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    _asm
976e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
977e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov eax,v
978e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
979e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk1*/
980e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm0,[eax]
981e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm1,[eax+16]
982e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
983e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Collect*/
984e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
985e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movhlps xmm1,xmm0
986e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
987e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        shufps  xmm1,xmm0,1
988e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps   xmm0,xmm1
989e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movss   back,xmm0
990e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
991e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
992e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(back);
993e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#else
994e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float val,max_val;
995e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float *p,*stop_p;
996e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    max_val=v[0];
997e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(p=v+1,stop_p=v+8;p!=stop_p;)
998e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
999e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        val= *p++;
1000e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(val>max_val) max_val=val;
1001e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1002e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(max_val);
1003e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif /*DB_USE_SIMD*/
1004e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1005e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1006e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline float db_Max_Aligned16_f(float *v,int size)
1007e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1008e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float val,max_val;
1009e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float *stop_v;
1010e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1011e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    max_val=v[0];
1012e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(;size>=128;size-=128)
1013e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1014e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        val=db_Max_128Aligned16_f(v);
1015e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        v+=128;
1016e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(val>max_val) max_val=val;
1017e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1018e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if(size&64)
1019e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1020e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        val=db_Max_64Aligned16_f(v);
1021e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        v+=64;
1022e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(val>max_val) max_val=val;
1023e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1024e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if(size&32)
1025e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1026e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        val=db_Max_32Aligned16_f(v);
1027e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        v+=32;
1028e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(val>max_val) max_val=val;
1029e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1030e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if(size&16)
1031e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1032e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        val=db_Max_16Aligned16_f(v);
1033e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        v+=16;
1034e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(val>max_val) max_val=val;
1035e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1036e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if(size&8)
1037e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1038e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        val=db_Max_8Aligned16_f(v);
1039e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        v+=8;
1040e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(val>max_val) max_val=val;
1041e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1042e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if(size&7)
1043e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1044e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        for(stop_v=v+(size&7);v!=stop_v;)
1045e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        {
1046e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            val= *v++;
1047e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            if(val>max_val) max_val=val;
1048e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        }
1049e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1050e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1051e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(max_val);
1052e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1053e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1054e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Find maximum value of img in the region starting at (left,top)
1055e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenand with width w and height h. img[left] should be 16 byte aligned*/
1056e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenfloat db_MaxImage_Aligned16_f(float **img,int left,int top,int w,int h)
1057e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1058e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float val,max_val;
1059e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int i,stop_i;
1060e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1061e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if(w && h)
1062e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1063e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        stop_i=top+h;
1064e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        max_val=img[top][left];
1065e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1066e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        for(i=top;i<stop_i;i++)
1067e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        {
1068e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            val=db_Max_Aligned16_f(img[i]+left,w);
1069e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            if(val>max_val) max_val=val;
1070e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        }
1071e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        return(max_val);
1072e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1073e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(0.0);
1074e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1075e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1076e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline void db_MaxVector_128_Aligned16_f(float *m,float *v1,float *v2)
1077e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1078e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef DB_USE_SIMD
1079e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    _asm
1080e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1081e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov eax,v1
1082e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov ebx,v2
1083e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov ecx,m
1084e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1085e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk1*/
1086e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm0,[eax]
1087e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm1,[eax+16]
1088e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm2,[eax+32]
1089e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm3,[eax+48]
1090e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm4,[eax+64]
1091e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm5,[eax+80]
1092e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm6,[eax+96]
1093e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm7,[eax+112]
1094e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm0,[ebx]
1095e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm1,[ebx+16]
1096e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm2,[ebx+32]
1097e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm3,[ebx+48]
1098e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm4,[ebx+64]
1099e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm5,[ebx+80]
1100e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm6,[ebx+96]
1101e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm7,[ebx+112]
1102e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx],xmm0
1103e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+16],xmm1
1104e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+32],xmm2
1105e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+48],xmm3
1106e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+64],xmm4
1107e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+80],xmm5
1108e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+96],xmm6
1109e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+112],xmm7
1110e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1111e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk2*/
1112e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm0,[eax+128]
1113e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm1,[eax+144]
1114e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm2,[eax+160]
1115e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm3,[eax+176]
1116e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm4,[eax+192]
1117e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm5,[eax+208]
1118e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm6,[eax+224]
1119e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm7,[eax+240]
1120e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm0,[ebx+128]
1121e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm1,[ebx+144]
1122e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm2,[ebx+160]
1123e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm3,[ebx+176]
1124e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm4,[ebx+192]
1125e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm5,[ebx+208]
1126e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm6,[ebx+224]
1127e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm7,[ebx+240]
1128e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+128],xmm0
1129e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+144],xmm1
1130e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+160],xmm2
1131e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+176],xmm3
1132e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+192],xmm4
1133e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+208],xmm5
1134e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+224],xmm6
1135e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+240],xmm7
1136e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1137e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk3*/
1138e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm0,[eax+256]
1139e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm1,[eax+272]
1140e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm2,[eax+288]
1141e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm3,[eax+304]
1142e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm4,[eax+320]
1143e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm5,[eax+336]
1144e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm6,[eax+352]
1145e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm7,[eax+368]
1146e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm0,[ebx+256]
1147e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm1,[ebx+272]
1148e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm2,[ebx+288]
1149e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm3,[ebx+304]
1150e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm4,[ebx+320]
1151e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm5,[ebx+336]
1152e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm6,[ebx+352]
1153e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm7,[ebx+368]
1154e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+256],xmm0
1155e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+272],xmm1
1156e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+288],xmm2
1157e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+304],xmm3
1158e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+320],xmm4
1159e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+336],xmm5
1160e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+352],xmm6
1161e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+368],xmm7
1162e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1163e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk4*/
1164e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm0,[eax+384]
1165e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm1,[eax+400]
1166e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm2,[eax+416]
1167e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm3,[eax+432]
1168e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm4,[eax+448]
1169e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm5,[eax+464]
1170e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm6,[eax+480]
1171e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps xmm7,[eax+496]
1172e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm0,[ebx+384]
1173e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm1,[ebx+400]
1174e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm2,[ebx+416]
1175e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm3,[ebx+432]
1176e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm4,[ebx+448]
1177e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm5,[ebx+464]
1178e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm6,[ebx+480]
1179e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm7,[ebx+496]
1180e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+384],xmm0
1181e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+400],xmm1
1182e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+416],xmm2
1183e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+432],xmm3
1184e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+448],xmm4
1185e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+464],xmm5
1186e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+480],xmm6
1187e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+496],xmm7
1188e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1189e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#else
1190e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int i;
1191e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float a,b;
1192e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=0;i<128;i++)
1193e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1194e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        a=v1[i];
1195e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        b=v2[i];
1196e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(a>=b) m[i]=a;
1197e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        else m[i]=b;
1198e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1199e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif /*DB_USE_SIMD*/
1200e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1201e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1202e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline void db_MaxVector_128_SecondSourceDestAligned16_f(float *m,float *v1,float *v2)
1203e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1204e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef DB_USE_SIMD
1205e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    _asm
1206e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1207e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov eax,v1
1208e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov ebx,v2
1209e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        mov ecx,m
1210e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1211e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk1*/
1212e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm0,[eax]
1213e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm1,[eax+16]
1214e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm2,[eax+32]
1215e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm3,[eax+48]
1216e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm4,[eax+64]
1217e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm5,[eax+80]
1218e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm6,[eax+96]
1219e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm7,[eax+112]
1220e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm0,[ebx]
1221e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm1,[ebx+16]
1222e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm2,[ebx+32]
1223e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm3,[ebx+48]
1224e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm4,[ebx+64]
1225e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm5,[ebx+80]
1226e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm6,[ebx+96]
1227e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm7,[ebx+112]
1228e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx],xmm0
1229e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+16],xmm1
1230e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+32],xmm2
1231e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+48],xmm3
1232e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+64],xmm4
1233e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+80],xmm5
1234e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+96],xmm6
1235e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+112],xmm7
1236e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1237e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk2*/
1238e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm0,[eax+128]
1239e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm1,[eax+144]
1240e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm2,[eax+160]
1241e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm3,[eax+176]
1242e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm4,[eax+192]
1243e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm5,[eax+208]
1244e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm6,[eax+224]
1245e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm7,[eax+240]
1246e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm0,[ebx+128]
1247e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm1,[ebx+144]
1248e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm2,[ebx+160]
1249e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm3,[ebx+176]
1250e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm4,[ebx+192]
1251e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm5,[ebx+208]
1252e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm6,[ebx+224]
1253e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm7,[ebx+240]
1254e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+128],xmm0
1255e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+144],xmm1
1256e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+160],xmm2
1257e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+176],xmm3
1258e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+192],xmm4
1259e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+208],xmm5
1260e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+224],xmm6
1261e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+240],xmm7
1262e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1263e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk3*/
1264e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm0,[eax+256]
1265e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm1,[eax+272]
1266e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm2,[eax+288]
1267e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm3,[eax+304]
1268e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm4,[eax+320]
1269e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm5,[eax+336]
1270e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm6,[eax+352]
1271e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm7,[eax+368]
1272e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm0,[ebx+256]
1273e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm1,[ebx+272]
1274e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm2,[ebx+288]
1275e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm3,[ebx+304]
1276e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm4,[ebx+320]
1277e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm5,[ebx+336]
1278e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm6,[ebx+352]
1279e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm7,[ebx+368]
1280e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+256],xmm0
1281e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+272],xmm1
1282e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+288],xmm2
1283e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+304],xmm3
1284e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+320],xmm4
1285e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+336],xmm5
1286e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+352],xmm6
1287e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+368],xmm7
1288e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1289e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Chunk4*/
1290e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm0,[eax+384]
1291e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm1,[eax+400]
1292e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm2,[eax+416]
1293e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm3,[eax+432]
1294e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm4,[eax+448]
1295e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm5,[eax+464]
1296e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm6,[eax+480]
1297e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movups xmm7,[eax+496]
1298e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm0,[ebx+384]
1299e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm1,[ebx+400]
1300e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm2,[ebx+416]
1301e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm3,[ebx+432]
1302e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm4,[ebx+448]
1303e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm5,[ebx+464]
1304e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm6,[ebx+480]
1305e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        maxps  xmm7,[ebx+496]
1306e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+384],xmm0
1307e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+400],xmm1
1308e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+416],xmm2
1309e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+432],xmm3
1310e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+448],xmm4
1311e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+464],xmm5
1312e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+480],xmm6
1313e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        movaps [ecx+496],xmm7
1314e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1315e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#else
1316e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int i;
1317e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float a,b;
1318e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=0;i<128;i++)
1319e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1320e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        a=v1[i];
1321e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        b=v2[i];
1322e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(a>=b) m[i]=a;
1323e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        else m[i]=b;
1324e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1325e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif /*DB_USE_SIMD*/
1326e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1327e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1328e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top), of width 124 and
1329e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenstopping at bottom. The output is shifted two steps left and overwrites 128 elements for each row.
1330e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta ChenThe input s should be of width at least 128, and exist for 2 pixels outside the specified region.
1331e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chens[i][left-2] and sf[i][left-2] should be 16 byte aligned. Top must be at least 3*/
1332e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline void db_MaxSuppressFilterChunk_5x5_Aligned16_f(float **sf,float **s,int left,int top,int bottom,
1333e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                      /*temp should point to at least
1334e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                      6*132 floats of 16-byte-aligned allocated memory*/
1335e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                      float *temp)
1336e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1337e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef DB_USE_SIMD
1338e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int i,lm2;
1339e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float *two[4];
1340e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float *four,*five;
1341e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1342e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    lm2=left-2;
1343e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1344e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*Set pointers to pre-allocated memory*/
1345e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    four=temp;
1346e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    five=four+132;
1347e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=0;i<4;i++)
1348e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1349e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        two[i]=five+(i+1)*132;
1350e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1351e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1352e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*Set rests of four and five to zero to avoid
1353e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    floating point exceptions*/
1354e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=129;i<132;i++)
1355e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1356e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        four[i]=0.0;
1357e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        five[i]=0.0;
1358e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1359e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1360e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*Fill three rows of the wrap-around max buffers*/
1361e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=top-3;i<top;i++) db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
1362e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1363e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    /*For each output row*/
1364e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(;i<=bottom;i++)
1365e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1366e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Compute max of the lowest pair of rows in the five row window*/
1367e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
1368e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Compute max of the lowest and highest pair of rows in the five row window*/
1369e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_MaxVector_128_Aligned16_f(four,two[i&3],two[(i-3)&3]);
1370e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Compute max of all rows*/
1371e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_MaxVector_128_Aligned16_f(five,four,two[(i-1)&3]);
1372e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Compute max of 2x5 chunks*/
1373e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_MaxVector_128_SecondSourceDestAligned16_f(five,five+1,five);
1374e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Compute max of pairs of 2x5 chunks*/
1375e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_MaxVector_128_SecondSourceDestAligned16_f(five,five+3,five);
1376e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        /*Compute max of pairs of 5x5 except middle*/
1377e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_MaxVector_128_SecondSourceDestAligned16_f(sf[i]+lm2,four+2,five);
1378e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1379e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1380e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#else
1381e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int i,j,right;
1382e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float sv;
1383e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1384e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    right=left+128;
1385e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=top;i<=bottom;i++) for(j=left;j<right;j++)
1386e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1387e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        sv=s[i][j];
1388e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1389e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if( sv>s[i-2][j-2] && sv>s[i-2][j-1] && sv>s[i-2][j] && sv>s[i-2][j+1] && sv>s[i-2][j+2] &&
1390e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            sv>s[i-1][j-2] && sv>s[i-1][j-1] && sv>s[i-1][j] && sv>s[i-1][j+1] && sv>s[i-1][j+2] &&
1391e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            sv>s[  i][j-2] && sv>s[  i][j-1] &&                 sv>s[  i][j+1] && sv>s[  i][j+2] &&
1392e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            sv>s[i+1][j-2] && sv>s[i+1][j-1] && sv>s[i+1][j] && sv>s[i+1][j+1] && sv>s[i+1][j+2] &&
1393e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            sv>s[i+2][j-2] && sv>s[i+2][j-1] && sv>s[i+2][j] && sv>s[i+2][j+1] && sv>s[i+2][j+2])
1394e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        {
1395e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            sf[i][j-2]=0.0;
1396e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        }
1397e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        else sf[i][j-2]=sv;
1398e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1399e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif /*DB_USE_SIMD*/
1400e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1401e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1402e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top) and
1403e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenstopping at bottom. The output is shifted two steps left. The input s should exist for 2 pixels
1404e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenoutside the specified region. s[i][left-2] and sf[i][left-2] should be 16 byte aligned.
1405e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta ChenTop must be at least 3. Reading and writing from and to the input and output images is done
1406e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenas if the region had a width equal to a multiple of 124. If this is not the case, the images
1407e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenshould be over-allocated and the input cleared for a sufficient region*/
1408e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenvoid db_MaxSuppressFilter_5x5_Aligned16_f(float **sf,float **s,int left,int top,int right,int bottom,
1409e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                          /*temp should point to at least
1410e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                          6*132 floats of 16-byte-aligned allocated memory*/
1411e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                          float *temp)
1412e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1413e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int x,next_x;
1414e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1415e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(x=left;x<=right;x=next_x)
1416e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1417e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        next_x=x+124;
1418e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_MaxSuppressFilterChunk_5x5_Aligned16_f(sf,s,x,top,bottom,temp);
1419e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1420e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1421e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1422e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Extract corners from the chunk (left,top) to (right,bottom). Store in x_temp,y_temp and s_temp
1423e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenwhich should point to space of at least as many positions as there are pixels in the chunk*/
1424e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline int db_CornersFromChunk(float **strength,int left,int top,int right,int bottom,float threshold,double *x_temp,double *y_temp,double *s_temp)
1425e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1426e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int i,j,nr;
1427e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float s;
1428e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1429e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    nr=0;
1430e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(i=top;i<=bottom;i++) for(j=left;j<=right;j++)
1431e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1432e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        s=strength[i][j];
1433e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1434e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(s>=threshold &&
1435e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            s>strength[i-2][j-2] && s>strength[i-2][j-1] && s>strength[i-2][j] && s>strength[i-2][j+1] && s>strength[i-2][j+2] &&
1436e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            s>strength[i-1][j-2] && s>strength[i-1][j-1] && s>strength[i-1][j] && s>strength[i-1][j+1] && s>strength[i-1][j+2] &&
1437e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            s>strength[  i][j-2] && s>strength[  i][j-1] &&                       s>strength[  i][j+1] && s>strength[  i][j+2] &&
1438e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            s>strength[i+1][j-2] && s>strength[i+1][j-1] && s>strength[i+1][j] && s>strength[i+1][j+1] && s>strength[i+1][j+2] &&
1439e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            s>strength[i+2][j-2] && s>strength[i+2][j-1] && s>strength[i+2][j] && s>strength[i+2][j+1] && s>strength[i+2][j+2])
1440e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        {
1441e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            x_temp[nr]=(double) j;
1442e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            y_temp[nr]=(double) i;
1443e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            s_temp[nr]=(double) s;
1444e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            nr++;
1445e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        }
1446e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1447e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(nr);
1448e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1449e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1450e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1451e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen//Sub-pixel accuracy using 2D quadratic interpolation.(YCJ)
1452e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Cheninline void db_SubPixel(float **strength, const double xd, const double yd, double &xs, double &ys)
1453e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1454e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int x = (int) xd;
1455e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int y = (int) yd;
1456e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1457e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float fxx = strength[y][x-1] - strength[y][x] - strength[y][x] + strength[y][x+1];
1458e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float fyy = strength[y-1][x] - strength[y][x] - strength[y][x] + strength[y+1][x];
1459e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float fxy = (strength[y-1][x-1] - strength[y-1][x+1] - strength[y+1][x-1] + strength[y+1][x+1])/(float)4.0;
1460e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1461e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float denom = (fxx * fyy - fxy * fxy) * (float) 2.0;
1462e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1463e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    xs = xd;
1464e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    ys = yd;
1465e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1466e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if ( db_absf(denom) <= FLT_EPSILON )
1467e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1468e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        return;
1469e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1470e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    else
1471e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1472e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        float fx = strength[y][x+1] - strength[y][x-1];
1473e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        float fy = strength[y+1][x] - strength[y-1][x];
1474e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1475e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        float dx = (fyy * fx - fxy * fy) / denom;
1476e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        float dy = (fxx * fy - fxy * fx) / denom;
1477e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1478e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if ( db_absf(dx) > 1.0 || db_absf(dy) > 1.0 )
1479e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        {
1480e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            return;
1481e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        }
1482e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        else
1483e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        {
1484e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            xs -= dx;
1485e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            ys -= dy;
1486e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        }
1487e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1488e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1489e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return;
1490e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1491e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1492e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen/*Extract corners from the image part from (left,top) to (right,bottom).
1493e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta ChenStore in x and y, extracting at most satnr corners in each block of size (bw,bh).
1494e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta ChenThe pointer temp_d should point to at least 5*bw*bh positions.
1495e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenarea_factor holds how many corners max to extract per 10000 pixels*/
1496e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenvoid db_ExtractCornersSaturated(float **strength,int left,int top,int right,int bottom,
1497e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                int bw,int bh,unsigned long area_factor,
1498e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                float threshold,double *temp_d,
1499e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                double *x_coord,double *y_coord,int *nr_corners)
1500e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1501e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    double *x_temp,*y_temp,*s_temp,*select_temp;
1502e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    double loc_thresh;
1503e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    unsigned long bwbh,area,saturation;
1504e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int x,next_x,last_x;
1505e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int y,next_y,last_y;
1506e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int nr,nr_points,i,stop;
1507e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1508e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    bwbh=bw*bh;
1509e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    x_temp=temp_d;
1510e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    y_temp=x_temp+bwbh;
1511e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    s_temp=y_temp+bwbh;
1512e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    select_temp=s_temp+bwbh;
1513e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1514e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#ifdef DB_SUB_PIXEL
1515e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    // subpixel processing may sometimes push the corner ourside the real border
1516e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    // increasing border size:
1517e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    left++;
1518e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    top++;
1519e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    bottom--;
1520e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    right--;
1521e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen#endif /*DB_SUB_PIXEL*/
1522e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1523e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    nr_points=0;
1524e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    for(y=top;y<=bottom;y=next_y)
1525e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1526e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        next_y=y+bh;
1527e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        last_y=next_y-1;
1528e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        if(last_y>bottom) last_y=bottom;
1529e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        for(x=left;x<=right;x=next_x)
1530e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        {
1531e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            next_x=x+bw;
1532e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            last_x=next_x-1;
1533e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            if(last_x>right) last_x=right;
1534e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1535e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            area=(last_x-x+1)*(last_y-y+1);
1536e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            saturation=(area*area_factor)/10000;
1537e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            nr=db_CornersFromChunk(strength,x,y,last_x,last_y,threshold,x_temp,y_temp,s_temp);
1538e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            if(nr)
1539e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            {
1540e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                if(((unsigned long)nr)>saturation) loc_thresh=db_LeanQuickSelect(s_temp,nr,nr-saturation,select_temp);
1541e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                else loc_thresh=threshold;
1542e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1543e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                stop=nr_points+saturation;
1544e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                for(i=0;(i<nr)&&(nr_points<stop);i++)
1545e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                {
1546e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                    if(s_temp[i]>=loc_thresh)
1547e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                    {
1548e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                        #ifdef DB_SUB_PIXEL
1549e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                               db_SubPixel(strength, x_temp[i], y_temp[i], x_coord[nr_points], y_coord[nr_points]);
1550e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                        #else
1551e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                               x_coord[nr_points]=x_temp[i];
1552e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                               y_coord[nr_points]=y_temp[i];
1553e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                        #endif
1554e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1555e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                        nr_points++;
1556e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                    }
1557e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                }
1558e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            }
1559e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        }
1560e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1561e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    *nr_corners=nr_points;
1562e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1563e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1564e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chendb_CornerDetector_f::db_CornerDetector_f()
1565e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1566e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_w=0; m_h=0;
1567e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1568e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1569e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chendb_CornerDetector_f::~db_CornerDetector_f()
1570e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1571e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    Clean();
1572e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1573e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1574e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenvoid db_CornerDetector_f::Clean()
1575e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1576e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if(m_w!=0)
1577e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1578e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        delete [] m_temp_f;
1579e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        delete [] m_temp_d;
1580e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
1581e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1582e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_w=0; m_h=0;
1583e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1584e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1585e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenunsigned long db_CornerDetector_f::Init(int im_width,int im_height,int target_nr_corners,
1586e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                            int nr_horizontal_blocks,int nr_vertical_blocks,
1587e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                            double absolute_threshold,double relative_threshold)
1588e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1589e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int chunkwidth=208;
1590e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int block_width,block_height;
1591e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    unsigned long area_factor;
1592e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int active_width,active_height;
1593e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1594e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    active_width=db_maxi(1,im_width-10);
1595e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    active_height=db_maxi(1,im_height-10);
1596e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    block_width=db_maxi(1,active_width/nr_horizontal_blocks);
1597e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    block_height=db_maxi(1,active_height/nr_vertical_blocks);
1598e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1599e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
1600e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        (((double)active_width)*((double)active_height)))));
1601e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1602e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(Start(im_width,im_height,block_width,block_height,area_factor,
1603e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        absolute_threshold,relative_threshold,chunkwidth));
1604e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1605e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1606e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenunsigned long db_CornerDetector_f::Start(int im_width,int im_height,
1607e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                             int block_width,int block_height,unsigned long area_factor,
1608e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                             double absolute_threshold,double relative_threshold,int chunkwidth)
1609e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1610e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    Clean();
1611e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1612e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_w=im_width;
1613e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_h=im_height;
1614e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_cw=chunkwidth;
1615e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_bw=block_width;
1616e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_bh=block_height;
1617e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_area_factor=area_factor;
1618e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_r_thresh=relative_threshold;
1619e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_a_thresh=absolute_threshold;
1620e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
1621e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1622e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_temp_f=new float[13*(m_cw+4)];
1623e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_temp_d=new double[5*m_bw*m_bh];
1624e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
1625e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1626e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(m_max_nr);
1627e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1628e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1629e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenvoid db_CornerDetector_f::DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const
1630e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1631e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float max_val,threshold;
1632e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1633e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    db_HarrisStrength_f(m_strength,img,m_w,m_h,m_temp_f,m_cw);
1634e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1635e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if(m_r_thresh)
1636e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1637e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
1638e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
1639e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1640e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    else threshold= (float) m_a_thresh;
1641e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1642e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
1643e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        m_temp_d,x_coord,y_coord,nr_corners);
1644e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1645e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1646e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chendb_CornerDetector_u::db_CornerDetector_u()
1647e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1648e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_w=0; m_h=0;
1649e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1650e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1651e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chendb_CornerDetector_u::~db_CornerDetector_u()
1652e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1653e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    Clean();
1654e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1655e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1656e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chendb_CornerDetector_u::db_CornerDetector_u(const db_CornerDetector_u& cd)
1657e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1658e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
1659e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cd.m_a_thresh, cd.m_r_thresh);
1660e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1661e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1662e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chendb_CornerDetector_u& db_CornerDetector_u::operator=(const db_CornerDetector_u& cd)
1663e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1664e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if ( this == &cd ) return *this;
1665e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1666e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    Clean();
1667e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1668e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
1669e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        cd.m_a_thresh, cd.m_r_thresh);
1670e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1671e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return *this;
1672e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1673e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1674e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenvoid db_CornerDetector_u::Clean()
1675e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1676e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if(m_w!=0)
1677e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1678e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        delete [] m_temp_i;
1679e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        delete [] m_temp_d;
1680e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
1681e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1682e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_w=0; m_h=0;
1683e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1684e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1685e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenunsigned long db_CornerDetector_u::Init(int im_width,int im_height,int target_nr_corners,
1686e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                            int nr_horizontal_blocks,int nr_vertical_blocks,
1687e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                            double absolute_threshold,double relative_threshold)
1688e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1689e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int block_width,block_height;
1690e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    unsigned long area_factor;
1691e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    int active_width,active_height;
1692e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1693e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    active_width=db_maxi(1,im_width-10);
1694e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    active_height=db_maxi(1,im_height-10);
1695e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    block_width=db_maxi(1,active_width/nr_horizontal_blocks);
1696e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    block_height=db_maxi(1,active_height/nr_vertical_blocks);
1697e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1698e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
1699e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        (((double)active_width)*((double)active_height)))));
1700e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1701e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(Start(im_width,im_height,block_width,block_height,area_factor,
1702e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        16.0*absolute_threshold,relative_threshold));
1703e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1704e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1705e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenunsigned long db_CornerDetector_u::Start(int im_width,int im_height,
1706e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                             int block_width,int block_height,unsigned long area_factor,
1707e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                             double absolute_threshold,double relative_threshold)
1708e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1709e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    Clean();
1710e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1711e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_w=im_width;
1712e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_h=im_height;
1713e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_bw=block_width;
1714e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_bh=block_height;
1715e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_area_factor=area_factor;
1716e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_r_thresh=relative_threshold;
1717e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_a_thresh=absolute_threshold;
1718e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
1719e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1720e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_temp_i=new int[18*128];
1721e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_temp_d=new double[5*m_bw*m_bh];
1722e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
1723e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1724e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    return(m_max_nr);
1725e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1726e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1727e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenvoid db_CornerDetector_u::DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners,
1728e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                                        const unsigned char * const *msk, unsigned char fgnd) const
1729e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen{
1730e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    float max_val,threshold;
1731e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1732e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    db_HarrisStrength_u(m_strength,img,m_w,m_h,m_temp_i);
1733e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1734e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1735e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if(m_r_thresh)
1736e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1737e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
1738e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
1739e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1740e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    else threshold= (float) m_a_thresh;
1741e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1742e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
1743e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        m_temp_d,x_coord,y_coord,nr_corners);
1744e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1745e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1746e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if ( msk )
1747e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    {
1748e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        int nr_corners_mask=0;
1749e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1750e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        for ( int i = 0; i < *nr_corners; ++i)
1751e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        {
1752e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            int cor_x = db_roundi(*(x_coord+i));
1753e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            int cor_y = db_roundi(*(y_coord+i));
1754e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            if ( msk[cor_y][cor_x] == fgnd )
1755e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            {
1756e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                x_coord[nr_corners_mask] = x_coord[i];
1757e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                y_coord[nr_corners_mask] = y_coord[i];
1758e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen                nr_corners_mask++;
1759e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            }
1760e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        }
1761e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        *nr_corners = nr_corners_mask;
1762e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    }
1763e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1764e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1765e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chenvoid db_CornerDetector_u::ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners) {
1766e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen    if ( m_w!=0 )
1767e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen        db_ExtractCornersSaturated(strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,float(m_a_thresh),
1768e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen            m_temp_d,x_coord,y_coord,nr_corners);
1769e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen}
1770e295e32b68cf04f0d99138bf4a6d25555f3aef99Wei-Ta Chen
1771