1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*$Id: db_feature_detection.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/
18
19/*****************************************************************
20*    Lean and mean begins here                                   *
21*****************************************************************/
22
23#include "db_utilities.h"
24#include "db_feature_detection.h"
25#ifdef _VERBOSE_
26#include <iostream>
27#endif
28#include <float.h>
29
30#define DB_SUB_PIXEL
31
32#define BORDER 10 // 5
33
34float** db_AllocStrengthImage_f(float **im,int w,int h)
35{
36    int i,n,aw;
37    long c,size;
38    float **img,*aim,*p;
39
40    /*Determine number of 124 element chunks needed*/
41    n=(db_maxi(1,w-6)+123)/124;
42    /*Determine the total allocation width aw*/
43    aw=n*124+8;
44    /*Allocate*/
45    size=aw*h+16;
46    *im=new float [size];
47    /*Clean up*/
48    p=(*im);
49    for(c=0;c<size;c++) p[c]=0.0;
50    /*Get a 16 byte aligned pointer*/
51    aim=db_AlignPointer_f(*im,16);
52    /*Allocate pointer table*/
53    img=new float* [h];
54    /*Initialize the pointer table*/
55    for(i=0;i<h;i++)
56    {
57        img[i]=aim+aw*i+1;
58    }
59
60    return(img);
61}
62
63void db_FreeStrengthImage_f(float *im,float **img,int h)
64{
65    delete [] im;
66    delete [] img;
67}
68
69/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width chunk_width
70Memory references occur one pixel outside the subrow*/
71inline void db_IxIyRow_f(float *Ix,float *Iy,const float * const *img,int i,int j,int chunk_width)
72{
73    int c;
74
75    for(c=0;c<chunk_width;c++)
76    {
77        Ix[c]=img[i][j+c-1]-img[i][j+c+1];
78        Iy[c]=img[i-1][j+c]-img[i+1][j+c];
79    }
80}
81
82/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width 128
83Memory references occur one pixel outside the subrow*/
84inline void db_IxIyRow_u(int *dxx,const unsigned char * const *img,int i,int j,int nc)
85{
86#ifdef DB_USE_MMX
87    const unsigned char *r1,*r2,*r3;
88
89    r1=img[i-1]+j; r2=img[i]+j; r3=img[i+1]+j;
90
91    _asm
92    {
93        mov esi,16
94        mov eax,r1
95        mov ebx,r2
96        mov ecx,r3
97        mov edx,dxx
98
99        /*Get bitmask into mm7*/
100        mov       edi,7F7F7F7Fh
101        movd      mm7,edi
102        punpckldq mm7,mm7
103
104loopstart:
105        /***************dx part 1-12*********************************/
106        movq       mm0,[eax]       /*1 Get upper*/
107         pxor      mm6,mm6         /*2 Set to zero*/
108        movq       mm1,[ecx]       /*3 Get lower*/
109         psrlq     mm0,1           /*4 Shift*/
110        psrlq      mm1,1           /*5 Shift*/
111         pand      mm0,mm7         /*6 And*/
112        movq       mm2,[ebx-1]     /*13 Get left*/
113         pand      mm1,mm7         /*7 And*/
114        psubb      mm0,mm1         /*8 Subtract*/
115         pxor      mm5,mm5         /*14 Set to zero*/
116        movq       mm1,mm0         /*9 Copy*/
117         pcmpgtb   mm6,mm0         /*10 Create unpack mask*/
118        movq       mm3,[ebx+1]     /*15 Get right*/
119         punpcklbw mm0,mm6         /*11 Unpack low*/
120        punpckhbw  mm1,mm6         /*12 Unpack high*/
121        /***************dy part 13-24*********************************/
122         movq      mm4,mm0         /*25 Copy dx*/
123        psrlq      mm2,1           /*16 Shift*/
124         pmullw    mm0,mm0         /*26 Multiply dx*dx*/
125        psrlq      mm3,1           /*17 Shift*/
126         pand      mm2,mm7         /*18 And*/
127        pand       mm3,mm7         /*19 And*/
128         /*Stall*/
129        psubb      mm2,mm3         /*20 Subtract*/
130         /*Stall*/
131        movq       mm3,mm2         /*21 Copy*/
132         pcmpgtb   mm5,mm2         /*22 Create unpack mask*/
133        punpcklbw  mm2,mm5         /*23 Unpack low*/
134         /*Stall*/
135        punpckhbw  mm3,mm5         /*24 Unpack high*/
136        /***************dxx dxy dyy low part 25-49*********************************/
137         pmullw    mm4,mm2         /*27 Multiply dx*dy*/
138        pmullw     mm2,mm2         /*28 Multiply dy*dy*/
139         pxor      mm6,mm6         /*29 Set to zero*/
140        movq       mm5,mm0         /*30 Copy dx*dx*/
141         pcmpgtw   mm6,mm0         /*31 Create unpack mask for dx*dx*/
142        punpcklwd  mm0,mm6         /*32 Unpack dx*dx lows*/
143         /*Stall*/
144        punpckhwd  mm5,mm6         /*33 Unpack dx*dx highs*/
145         pxor      mm6,mm6         /*36 Set to zero*/
146        movq       [edx],mm0       /*34 Store dx*dx lows*/
147         movq      mm0,mm4         /*37 Copy dx*dy*/
148        movq       [edx+8],mm5     /*35 Store dx*dx highs*/
149         pcmpgtw   mm6,mm4         /*38 Create unpack mask for dx*dy*/
150        punpcklwd  mm4,mm6         /*39 Unpack dx*dy lows*/
151         /*Stall*/
152        punpckhwd  mm0,mm6         /*40 Unpack dx*dy highs*/
153         pxor      mm6,mm6         /*43 Set to zero*/
154        movq       [edx+512],mm4   /*41 Store dx*dy lows*/
155         movq      mm5,mm2         /*44 Copy dy*dy*/
156        movq       [edx+520],mm0   /*42 Store dx*dy highs*/
157         pcmpgtw   mm6,mm2         /*45 Create unpack mask for dy*dy*/
158        punpcklwd  mm2,mm6         /*46 Unpack dy*dy lows*/
159         movq      mm4,mm1         /*50 Copy dx*/
160        punpckhwd  mm5,mm6         /*47 Unpack dy*dy highs*/
161         pmullw    mm1,mm1         /*51 Multiply dx*dx*/
162        movq       [edx+1024],mm2  /*48 Store dy*dy lows*/
163         pmullw    mm4,mm3         /*52 Multiply dx*dy*/
164        movq       [edx+1032],mm5  /*49 Store dy*dy highs*/
165        /***************dxx dxy dyy high part 50-79*********************************/
166         pmullw    mm3,mm3         /*53 Multiply dy*dy*/
167        pxor       mm6,mm6         /*54 Set to zero*/
168         movq      mm5,mm1         /*55 Copy dx*dx*/
169        pcmpgtw    mm6,mm1         /*56 Create unpack mask for dx*dx*/
170         pxor      mm2,mm2         /*61 Set to zero*/
171        punpcklwd  mm1,mm6         /*57 Unpack dx*dx lows*/
172         movq      mm0,mm4         /*62 Copy dx*dy*/
173        punpckhwd  mm5,mm6         /*58 Unpack dx*dx highs*/
174         pcmpgtw   mm2,mm4         /*63 Create unpack mask for dx*dy*/
175        movq       [edx+16],mm1    /*59 Store dx*dx lows*/
176         punpcklwd mm4,mm2         /*64 Unpack dx*dy lows*/
177        movq       [edx+24],mm5    /*60 Store dx*dx highs*/
178         punpckhwd mm0,mm2         /*65 Unpack dx*dy highs*/
179        movq       [edx+528],mm4   /*66 Store dx*dy lows*/
180         pxor      mm6,mm6         /*68 Set to zero*/
181        movq       [edx+536],mm0   /*67 Store dx*dy highs*/
182         movq      mm5,mm3         /*69 Copy dy*dy*/
183        pcmpgtw    mm6,mm3         /*70 Create unpack mask for dy*dy*/
184         add       eax,8           /*75*/
185        punpcklwd  mm3,mm6         /*71 Unpack dy*dy lows*/
186         add       ebx,8           /*76*/
187        punpckhwd  mm5,mm6         /*72 Unpack dy*dy highs*/
188         add       ecx,8           /*77*/
189        movq       [edx+1040],mm3  /*73 Store dy*dy lows*/
190         /*Stall*/
191        movq       [edx+1048],mm5  /*74 Store dy*dy highs*/
192         /*Stall*/
193        add        edx,32          /*78*/
194         dec esi                   /*79*/
195        jnz loopstart
196
197        emms
198    }
199
200#else
201    int c;
202    int Ix,Iy;
203
204    for(c=0;c<nc;c++)
205    {
206        Ix=(img[i][j+c-1]-img[i][j+c+1])>>1;
207        Iy=(img[i-1][j+c]-img[i+1][j+c])>>1;
208        dxx[c]=Ix*Ix;
209        dxx[c+128]=Ix*Iy;
210        dxx[c+256]=Iy*Iy;
211    }
212#endif /*DB_USE_MMX*/
213}
214
215/*Filter vertically five rows of derivatives of length chunk_width into gxx,gxy,gyy*/
216inline void db_gxx_gxy_gyy_row_f(float *gxx,float *gxy,float *gyy,int chunk_width,
217                                 float *Ix0,float *Ix1,float *Ix2,float *Ix3,float *Ix4,
218                                 float *Iy0,float *Iy1,float *Iy2,float *Iy3,float *Iy4)
219{
220    int c;
221    float dx,dy;
222    float Ixx0,Ixy0,Iyy0,Ixx1,Ixy1,Iyy1,Ixx2,Ixy2,Iyy2,Ixx3,Ixy3,Iyy3,Ixx4,Ixy4,Iyy4;
223
224    for(c=0;c<chunk_width;c++)
225    {
226        dx=Ix0[c];
227        dy=Iy0[c];
228        Ixx0=dx*dx;
229        Ixy0=dx*dy;
230        Iyy0=dy*dy;
231
232        dx=Ix1[c];
233        dy=Iy1[c];
234        Ixx1=dx*dx;
235        Ixy1=dx*dy;
236        Iyy1=dy*dy;
237
238        dx=Ix2[c];
239        dy=Iy2[c];
240        Ixx2=dx*dx;
241        Ixy2=dx*dy;
242        Iyy2=dy*dy;
243
244        dx=Ix3[c];
245        dy=Iy3[c];
246        Ixx3=dx*dx;
247        Ixy3=dx*dy;
248        Iyy3=dy*dy;
249
250        dx=Ix4[c];
251        dy=Iy4[c];
252        Ixx4=dx*dx;
253        Ixy4=dx*dy;
254        Iyy4=dy*dy;
255
256        /*Filter vertically*/
257        gxx[c]=Ixx0+Ixx1*4.0f+Ixx2*6.0f+Ixx3*4.0f+Ixx4;
258        gxy[c]=Ixy0+Ixy1*4.0f+Ixy2*6.0f+Ixy3*4.0f+Ixy4;
259        gyy[c]=Iyy0+Iyy1*4.0f+Iyy2*6.0f+Iyy3*4.0f+Iyy4;
260    }
261}
262
263/*Filter vertically five rows of derivatives of length 128 into gxx,gxy,gyy*/
264inline void db_gxx_gxy_gyy_row_s(int *g,int *d0,int *d1,int *d2,int *d3,int *d4,int nc)
265{
266#ifdef DB_USE_MMX
267    int c;
268
269    _asm
270    {
271        mov c,64
272        mov eax,d0
273        mov ebx,d1
274        mov ecx,d2
275        mov edx,d3
276        mov edi,d4
277        mov esi,g
278
279loopstart:
280        /***************dxx part 1-14*********************************/
281        movq        mm0,[eax]      /*1 Get dxx0*/
282         /*Stall*/
283        movq        mm1,[ebx]      /*2 Get dxx1*/
284         /*Stall*/
285        movq        mm2,[ecx]      /*5 Get dxx2*/
286         pslld      mm1,2          /*3 Shift dxx1*/
287        movq        mm3,[edx]      /*10 Get dxx3*/
288         paddd      mm0,mm1        /*4 Accumulate dxx1*/
289        movq        mm4,[eax+512]  /*15 Get dxy0*/
290         pslld      mm2,1          /*6 Shift dxx2 1*/
291        paddd       mm0,mm2        /*7 Accumulate dxx2 1*/
292         pslld      mm2,1          /*8 Shift dxx2 2*/
293        movq        mm5,[ebx+512]  /*16 Get dxy1*/
294         paddd      mm0,mm2        /*9 Accumulate dxx2 2*/
295        pslld       mm3,2          /*11 Shift dxx3*/
296         /*Stall*/
297        paddd       mm0,mm3        /*12 Accumulate dxx3*/
298         pslld      mm5,2          /*17 Shift dxy1*/
299        paddd       mm0,[edi]      /*13 Accumulate dxx4*/
300         paddd      mm4,mm5        /*18 Accumulate dxy1*/
301        movq        mm6,[ecx+512]  /*19 Get dxy2*/
302         /*Stall*/
303        movq        [esi],mm0      /*14 Store dxx sums*/
304        /***************dxy part 15-28*********************************/
305         pslld      mm6,1          /*20 Shift dxy2 1*/
306        paddd       mm4,mm6        /*21 Accumulate dxy2 1*/
307         pslld      mm6,1          /*22 Shift dxy2 2*/
308        movq        mm0,[eax+1024] /*29 Get dyy0*/
309         paddd      mm4,mm6        /*23 Accumulate dxy2 2*/
310        movq        mm7,[edx+512]  /*24 Get dxy3*/
311         pslld      mm7,2          /*25 Shift dxy3*/
312        movq        mm1,[ebx+1024] /*30 Get dyy1*/
313         paddd      mm4,mm7        /*26 Accumulate dxy3*/
314        paddd       mm4,[edi+512]  /*27 Accumulate dxy4*/
315         pslld      mm1,2          /*31 Shift dyy1*/
316        movq        mm2,[ecx+1024] /*33 Get dyy2*/
317         paddd      mm0,mm1        /*32 Accumulate dyy1*/
318        movq        [esi+512],mm4  /*28 Store dxy sums*/
319         pslld      mm2,1          /*34 Shift dyy2 1*/
320        /***************dyy part 29-49*********************************/
321
322
323        movq        mm3,[edx+1024] /*38 Get dyy3*/
324         paddd      mm0,mm2        /*35 Accumulate dyy2 1*/
325        paddd       mm0,[edi+1024] /*41 Accumulate dyy4*/
326         pslld      mm2,1          /*36 Shift dyy2 2*/
327        paddd       mm0,mm2        /*37 Accumulate dyy2 2*/
328         pslld      mm3,2          /*39 Shift dyy3*/
329        paddd       mm0,mm3        /*40 Accumulate dyy3*/
330         add        eax,8           /*43*/
331        add         ebx,8           /*44*/
332         add        ecx,8           /*45*/
333        movq        [esi+1024],mm0 /*42 Store dyy sums*/
334         /*Stall*/
335        add         edx,8           /*46*/
336         add        edi,8           /*47*/
337        add         esi,8           /*48*/
338         dec        c               /*49*/
339        jnz         loopstart
340
341        emms
342    }
343
344#else
345    int c,dd;
346
347    for(c=0;c<nc;c++)
348    {
349        /*Filter vertically*/
350        dd=d2[c];
351        g[c]=d0[c]+(d1[c]<<2)+(dd<<2)+(dd<<1)+(d3[c]<<2)+d4[c];
352
353        dd=d2[c+128];
354        g[c+128]=d0[c+128]+(d1[c+128]<<2)+(dd<<2)+(dd<<1)+(d3[c+128]<<2)+d4[c+128];
355
356        dd=d2[c+256];
357        g[c+256]=d0[c+256]+(d1[c+256]<<2)+(dd<<2)+(dd<<1)+(d3[c+256]<<2)+d4[c+256];
358    }
359#endif /*DB_USE_MMX*/
360}
361
362/*Filter horizontally the three rows gxx,gxy,gyy into the strength subrow starting at i,j
363and with width chunk_width. gxx,gxy and gyy are assumed to be four pixels wider than chunk_width
364and starting at (i,j-2)*/
365inline void db_HarrisStrength_row_f(float **s,float *gxx,float *gxy,float *gyy,int i,int j,int chunk_width)
366{
367    float Gxx,Gxy,Gyy,det,trc;
368    int c;
369
370    for(c=0;c<chunk_width;c++)
371    {
372        Gxx=gxx[c]+gxx[c+1]*4.0f+gxx[c+2]*6.0f+gxx[c+3]*4.0f+gxx[c+4];
373        Gxy=gxy[c]+gxy[c+1]*4.0f+gxy[c+2]*6.0f+gxy[c+3]*4.0f+gxy[c+4];
374        Gyy=gyy[c]+gyy[c+1]*4.0f+gyy[c+2]*6.0f+gyy[c+3]*4.0f+gyy[c+4];
375
376        det=Gxx*Gyy-Gxy*Gxy;
377        trc=Gxx+Gyy;
378        s[i][j+c]=det-0.06f*trc*trc;
379    }
380}
381
382/*Filter g of length 128 in place with 14641. Output is shifted two steps
383and of length 124*/
384inline void db_Filter14641_128_i(int *g,int nc)
385{
386#ifdef DB_USE_MMX
387    int mask;
388
389    mask=0xFFFFFFFF;
390    _asm
391    {
392        mov esi,31
393        mov eax,g
394
395        /*Get bitmask 00000000FFFFFFFF into mm7*/
396        movd mm7,mask
397
398        /*Warming iteration one 1-16********************/
399        movq       mm6,[eax]      /*1 Load new data*/
400        paddd      mm0,mm6        /*2 Add 1* behind two steps*/
401        movq       mm2,mm6        /*3 Start with 1* in front two steps*/
402        pslld      mm6,1          /*4*/
403        paddd      mm1,mm6        /*5 Add 2* same place*/
404        pslld      mm6,1          /*6*/
405        paddd      mm1,mm6        /*7 Add 4* same place*/
406        pshufw     mm6,mm6,4Eh    /*8 Swap the two double-words using bitmask 01001110=4Eh*/
407        paddd      mm1,mm6        /*9 Add 4* swapped*/
408        movq       mm5,mm6        /*10 Copy*/
409        pand       mm6,mm7        /*11 Get low double-word only*/
410        paddd      mm2,mm6        /*12 Add 4* in front one step*/
411        pxor       mm6,mm5        /*13 Get high double-word only*/
412        paddd      mm0,mm6        /*14 Add 4* behind one step*/
413        movq       mm0,mm1        /*15 Shift along*/
414        movq       mm1,mm2        /*16 Shift along*/
415        /*Warming iteration two 17-32********************/
416        movq       mm4,[eax+8]    /*17 Load new data*/
417        paddd      mm0,mm4        /*18 Add 1* behind two steps*/
418        movq       mm2,mm4        /*19 Start with 1* in front two steps*/
419        pslld      mm4,1          /*20*/
420        paddd      mm1,mm4        /*21 Add 2* same place*/
421        pslld      mm4,1          /*22*/
422        paddd      mm1,mm4        /*23 Add 4* same place*/
423        pshufw     mm4,mm4,4Eh    /*24 Swap the two double-words using bitmask 01001110=4Eh*/
424        paddd      mm1,mm4        /*25 Add 4* swapped*/
425        movq       mm3,mm4        /*26 Copy*/
426        pand       mm4,mm7        /*27 Get low double-word only*/
427        paddd      mm2,mm4        /*28 Add 4* in front one step*/
428        pxor       mm4,mm3        /*29 Get high double-word only*/
429        paddd      mm0,mm4        /*30 Add 4* behind one step*/
430        movq       mm0,mm1        /*31 Shift along*/
431        movq       mm1,mm2        /*32 Shift along*/
432
433        /*Loop********************/
434loopstart:
435        /*First part of loop 33-47********/
436        movq        mm6,[eax+16]   /*33 Load new data*/
437         /*Stall*/
438        paddd       mm0,mm6        /*34 Add 1* behind two steps*/
439         movq       mm2,mm6        /*35 Start with 1* in front two steps*/
440        movq        mm4,[eax+24]   /*48 Load new data*/
441         pslld      mm6,1          /*36*/
442        paddd       mm1,mm6        /*37 Add 2* same place*/
443         pslld      mm6,1          /*38*/
444        paddd       mm1,mm6        /*39 Add 4* same place*/
445         pshufw     mm6,mm6,4Eh    /*40 Swap the two double-words using bitmask 01001110=4Eh*/
446        paddd       mm1,mm4        /*49 Add 1* behind two steps*/
447         movq       mm5,mm6        /*41 Copy*/
448        paddd       mm1,mm6        /*42 Add 4* swapped*/
449         pand       mm6,mm7        /*43 Get low double-word only*/
450        paddd       mm2,mm6        /*44 Add 4* in front one step*/
451         pxor       mm6,mm5        /*45 Get high double-word only*/
452        paddd       mm0,mm6        /*46 Add 4* behind one step*/
453         movq       mm6,mm4        /*50a Copy*/
454        pslld       mm4,1          /*51*/
455         /*Stall*/
456        movq        [eax],mm0      /*47 Store result two steps behind*/
457        /*Second part of loop 48-66********/
458         movq       mm0,mm6        /*50b Start with 1* in front two steps*/
459        paddd       mm2,mm4        /*52 Add 2* same place*/
460         pslld      mm4,1          /*53*/
461        paddd       mm2,mm4        /*54 Add 4* same place*/
462         pshufw     mm4,mm4,4Eh    /*55 Swap the two double-words using bitmask 01001110=4Eh*/
463        paddd       mm2,mm4        /*56 Add 4* swapped*/
464         movq       mm3,mm4        /*57 Copy*/
465        pand        mm4,mm7        /*58 Get low double-word only*/
466         /*Stall*/
467        paddd       mm0,mm4        /*59 Add 4* in front one step*/
468         pxor       mm4,mm3        /*60 Get high double-word only*/
469        paddd       mm1,mm4        /*61 Add 4* behind one step*/
470         add        eax,16         /*65*/
471        dec         esi            /*66*/
472         /*Stall*/
473        movq        [eax-8],mm1    /*62 Store result two steps behind*/
474         movq       mm1,mm0        /*63 Shift along*/
475        movq        mm0,mm2        /*64 Shift along*/
476        jnz loopstart
477
478        emms
479    }
480
481#else
482    int c;
483
484    for(c=0;c<nc-4;c++)
485    {
486        g[c]=g[c]+(g[c+1]<<2)+(g[c+2]<<2)+(g[c+2]<<1)+(g[c+3]<<2)+g[c+4];
487    }
488#endif /*DB_USE_MMX*/
489}
490
491/*Filter horizontally the three rows gxx,gxy,gyy of length 128 into the strength subrow s
492of length 124. gxx,gxy and gyy are assumed to be starting at (i,j-2) if s[i][j] is sought.
493s should be 16 byte aligned*/
494inline void db_HarrisStrength_row_s(float *s,int *gxx,int *gxy,int *gyy,int nc)
495{
496    float k;
497
498    k=0.06f;
499
500    db_Filter14641_128_i(gxx,nc);
501    db_Filter14641_128_i(gxy,nc);
502    db_Filter14641_128_i(gyy,nc);
503
504#ifdef DB_USE_SIMD
505
506
507    _asm
508    {
509        mov esi,15
510        mov eax,gxx
511        mov ebx,gxy
512        mov ecx,gyy
513        mov edx,s
514
515        /*broadcast k to all positions of xmm7*/
516        movss   xmm7,k
517        shufps  xmm7,xmm7,0
518
519        /*****Warm up 1-10**************************************/
520        cvtpi2ps  xmm0,[eax+8] /*1 Convert two integers into floating point of low double-word*/
521         /*Stall*/
522        cvtpi2ps  xmm1,[ebx+8] /*4 Convert two integers into floating point of low double-word*/
523         movlhps  xmm0,xmm0    /*2 Move them to the high double-word*/
524        cvtpi2ps  xmm2,[ecx+8] /*7 Convert two integers into floating point of low double-word*/
525         movlhps  xmm1,xmm1    /*5 Move them to the high double-word*/
526        cvtpi2ps  xmm0,[eax]   /*3 Convert two integers into floating point of low double-word*/
527         movlhps  xmm2,xmm2    /*8 Move them to the high double-word*/
528        cvtpi2ps  xmm1,[ebx]   /*6 Convert two integers into floating point of low double-word*/
529         movaps   xmm3,xmm0    /*10 Copy Cxx*/
530        cvtpi2ps  xmm2,[ecx]   /*9 Convert two integers into floating point of low double-word*/
531         /*Stall*/
532loopstart:
533        /*****First part of loop 11-18***********************/
534        mulps     xmm0,xmm2     /*11 Multiply to get Gxx*Gyy*/
535         addps    xmm2,xmm3     /*12 Add to get Gxx+Gyy*/
536        cvtpi2ps  xmm4,[eax+24] /*19 Convert two integers into floating point of low double-word*/
537         mulps    xmm1,xmm1     /*13 Multiply to get Gxy*Gxy*/
538        mulps     xmm2,xmm2     /*14 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
539         movlhps  xmm4,xmm4     /*20 Move them to the high double-word*/
540        cvtpi2ps  xmm4,[eax+16] /*21 Convert two integers into floating point of low double-word*/
541         /*Stall*/
542        subps     xmm0,xmm1     /*15 Subtract to get Gxx*Gyy-Gxy*Gxy*/
543         mulps    xmm2,xmm7     /*16 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
544        cvtpi2ps  xmm5,[ebx+24] /*22 Convert two integers into floating point of low double-word*/
545         /*Stall*/
546        movlhps   xmm5,xmm5     /*23 Move them to the high double-word*/
547         /*Stall*/
548        cvtpi2ps  xmm5,[ebx+16] /*24 Convert two integers into floating point of low double-word*/
549         subps    xmm0,xmm2     /*17 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
550        cvtpi2ps  xmm6,[ecx+24] /*25 Convert two integers into floating point of low double-word*/
551         /*Stall*/
552        movaps    [edx],xmm0    /*18 Store*/
553        /*****Second part of loop 26-40***********************/
554         movlhps  xmm6,xmm6     /*26 Move them to the high double-word*/
555        cvtpi2ps  xmm6,[ecx+16] /*27 Convert two integers into floating point of low double-word*/
556         movaps   xmm3,xmm4     /*28 Copy Cxx*/
557        mulps     xmm4,xmm6     /*29 Multiply to get Gxx*Gyy*/
558         addps    xmm6,xmm3     /*30 Add to get Gxx+Gyy*/
559        cvtpi2ps  xmm0,[eax+40] /*(1 Next) Convert two integers into floating point of low double-word*/
560         mulps    xmm5,xmm5     /*31 Multiply to get Gxy*Gxy*/
561        cvtpi2ps  xmm1,[ebx+40] /*(4 Next) Convert two integers into floating point of low double-word*/
562         mulps    xmm6,xmm6     /*32 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
563        cvtpi2ps  xmm2,[ecx+40] /*(7 Next) Convert two integers into floating point of low double-word*/
564         movlhps  xmm0,xmm0     /*(2 Next) Move them to the high double-word*/
565        subps     xmm4,xmm5     /*33 Subtract to get Gxx*Gyy-Gxy*Gxy*/
566         movlhps  xmm1,xmm1     /*(5 Next) Move them to the high double-word*/
567        cvtpi2ps  xmm0,[eax+32] /*(3 Next)Convert two integers into floating point of low double-word*/
568         mulps    xmm6,xmm7     /*34 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
569        cvtpi2ps  xmm1,[ebx+32] /*(6 Next) Convert two integers into floating point of low double-word*/
570         movlhps  xmm2,xmm2     /*(8 Next) Move them to the high double-word*/
571        movaps    xmm3,xmm0     /*(10 Next) Copy Cxx*/
572         add      eax,32        /*37*/
573        subps     xmm4,xmm6     /*35 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
574         add      ebx,32        /*38*/
575        cvtpi2ps  xmm2,[ecx+32] /*(9 Next) Convert two integers into floating point of low double-word*/
576         /*Stall*/
577        movaps    [edx+16],xmm4 /*36 Store*/
578         /*Stall*/
579        add       ecx,32        /*39*/
580         add      edx,32        /*40*/
581        dec       esi           /*41*/
582        jnz loopstart
583
584        /****Cool down***************/
585        mulps    xmm0,xmm2    /*Multiply to get Gxx*Gyy*/
586        addps    xmm2,xmm3    /*Add to get Gxx+Gyy*/
587        mulps    xmm1,xmm1    /*Multiply to get Gxy*Gxy*/
588        mulps    xmm2,xmm2    /*Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
589        subps    xmm0,xmm1    /*Subtract to get Gxx*Gyy-Gxy*Gxy*/
590        mulps    xmm2,xmm7    /*Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
591        subps    xmm0,xmm2    /*Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
592        movaps   [edx],xmm0   /*Store*/
593    }
594
595#else
596    float Gxx,Gxy,Gyy,det,trc;
597    int c;
598
599    //for(c=0;c<124;c++)
600    for(c=0;c<nc-4;c++)
601    {
602        Gxx=(float)gxx[c];
603        Gxy=(float)gxy[c];
604        Gyy=(float)gyy[c];
605
606        det=Gxx*Gyy-Gxy*Gxy;
607        trc=Gxx+Gyy;
608        s[c]=det-k*trc*trc;
609    }
610#endif /*DB_USE_SIMD*/
611}
612
613/*Compute the Harris corner strength of the chunk [left,top,right,bottom] of img and
614store it into the corresponding region of s. left and top have to be at least 3 and
615right and bottom have to be at most width-4,height-4*/
616inline void db_HarrisStrengthChunk_f(float **s,const float * const *img,int left,int top,int right,int bottom,
617                                      /*temp should point to at least
618                                      13*(right-left+5) of allocated memory*/
619                                      float *temp)
620{
621    float *Ix[5],*Iy[5];
622    float *gxx,*gxy,*gyy;
623    int i,chunk_width,chunk_width_p4;
624
625    chunk_width=right-left+1;
626    chunk_width_p4=chunk_width+4;
627    gxx=temp;
628    gxy=gxx+chunk_width_p4;
629    gyy=gxy+chunk_width_p4;
630    for(i=0;i<5;i++)
631    {
632        Ix[i]=gyy+chunk_width_p4+(2*i*chunk_width_p4);
633        Iy[i]=Ix[i]+chunk_width_p4;
634    }
635
636    /*Fill four rows of the wrap-around derivative buffers*/
637    for(i=top-2;i<top+2;i++) db_IxIyRow_f(Ix[i%5],Iy[i%5],img,i,left-2,chunk_width_p4);
638
639    /*For each output row*/
640    for(i=top;i<=bottom;i++)
641    {
642        /*Step the derivative buffers*/
643        db_IxIyRow_f(Ix[(i+2)%5],Iy[(i+2)%5],img,(i+2),left-2,chunk_width_p4);
644
645        /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
646        db_gxx_gxy_gyy_row_f(gxx,gxy,gyy,chunk_width_p4,
647                                 Ix[(i-2)%5],Ix[(i-1)%5],Ix[i%5],Ix[(i+1)%5],Ix[(i+2)%5],
648                                 Iy[(i-2)%5],Iy[(i-1)%5],Iy[i%5],Iy[(i+1)%5],Iy[(i+2)%5]);
649
650        /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
651        db_HarrisStrength_row_f(s,gxx,gxy,gyy,i,left,chunk_width);
652    }
653}
654
655/*Compute the Harris corner strength of the chunk [left,top,left+123,bottom] of img and
656store it into the corresponding region of s. left and top have to be at least 3 and
657right and bottom have to be at most width-4,height-4. The left of the region in s should
658be 16 byte aligned*/
659inline void db_HarrisStrengthChunk_u(float **s,const unsigned char * const *img,int left,int top,int bottom,
660                                      /*temp should point to at least
661                                      18*128 of allocated memory*/
662                                      int *temp, int nc)
663{
664    int *Ixx[5],*Ixy[5],*Iyy[5];
665    int *gxx,*gxy,*gyy;
666    int i;
667
668    gxx=temp;
669    gxy=gxx+128;
670    gyy=gxy+128;
671    for(i=0;i<5;i++)
672    {
673        Ixx[i]=gyy+(3*i+1)*128;
674        Ixy[i]=gyy+(3*i+2)*128;
675        Iyy[i]=gyy+(3*i+3)*128;
676    }
677
678    /*Fill four rows of the wrap-around derivative buffers*/
679    for(i=top-2;i<top+2;i++) db_IxIyRow_u(Ixx[i%5],img,i,left-2,nc);
680
681    /*For each output row*/
682    for(i=top;i<=bottom;i++)
683    {
684        /*Step the derivative buffers*/
685        db_IxIyRow_u(Ixx[(i+2)%5],img,(i+2),left-2,nc);
686
687        /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
688        db_gxx_gxy_gyy_row_s(gxx,Ixx[(i-2)%5],Ixx[(i-1)%5],Ixx[i%5],Ixx[(i+1)%5],Ixx[(i+2)%5],nc);
689
690        /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
691        db_HarrisStrength_row_s(s[i]+left,gxx,gxy,gyy,nc);
692    }
693
694}
695
696/*Compute Harris corner strength of img. Strength is returned for the region
697with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
698same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
699for a meaningful result*/
700void db_HarrisStrength_f(float **s,const float * const *img,int w,int h,
701                                    /*temp should point to at least
702                                    13*(chunk_width+4) of allocated memory*/
703                                    float *temp,
704                                    int chunk_width)
705{
706    int x,next_x,last,right;
707
708    last=w-4;
709    for(x=3;x<=last;x=next_x)
710    {
711        next_x=x+chunk_width;
712        right=next_x-1;
713        if(right>last) right=last;
714        /*Compute the Harris strength of a chunk*/
715        db_HarrisStrengthChunk_f(s,img,x,3,right,h-4,temp);
716    }
717}
718
719/*Compute Harris corner strength of img. Strength is returned for the region
720with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
721same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
722for a meaningful result.Moreover, the image should be overallocated by 256 bytes.
723s[i][3] should by 16 byte aligned for any i*/
724void db_HarrisStrength_u(float **s, const unsigned char * const *img,int w,int h,
725                                    /*temp should point to at least
726                                    18*128 of allocated memory*/
727                                    int *temp)
728{
729    int x,next_x,last;
730    int nc;
731
732    last=w-4;
733    for(x=3;x<=last;x=next_x)
734    {
735        next_x=x+124;
736
737        // mayban: to revert to the original full chunks state, change the line below to: nc = 128;
738        nc = db_mini(128,last-x+1);
739        //nc = 128;
740
741        /*Compute the Harris strength of a chunk*/
742        db_HarrisStrengthChunk_u(s,img,x,3,h-4,temp,nc);
743    }
744}
745
746inline float db_Max_128Aligned16_f(float *v)
747{
748#ifdef DB_USE_SIMD
749    float back;
750
751    _asm
752    {
753        mov eax,v
754
755        /*Chunk1*/
756        movaps xmm0,[eax]
757        movaps xmm1,[eax+16]
758        movaps xmm2,[eax+32]
759        movaps xmm3,[eax+48]
760        movaps xmm4,[eax+64]
761        movaps xmm5,[eax+80]
762        movaps xmm6,[eax+96]
763        movaps xmm7,[eax+112]
764
765        /*Chunk2*/
766        maxps xmm0,[eax+128]
767        maxps xmm1,[eax+144]
768        maxps xmm2,[eax+160]
769        maxps xmm3,[eax+176]
770        maxps xmm4,[eax+192]
771        maxps xmm5,[eax+208]
772        maxps xmm6,[eax+224]
773        maxps xmm7,[eax+240]
774
775        /*Chunk3*/
776        maxps xmm0,[eax+256]
777        maxps xmm1,[eax+272]
778        maxps xmm2,[eax+288]
779        maxps xmm3,[eax+304]
780        maxps xmm4,[eax+320]
781        maxps xmm5,[eax+336]
782        maxps xmm6,[eax+352]
783        maxps xmm7,[eax+368]
784
785        /*Chunk4*/
786        maxps xmm0,[eax+384]
787        maxps xmm1,[eax+400]
788        maxps xmm2,[eax+416]
789        maxps xmm3,[eax+432]
790        maxps xmm4,[eax+448]
791        maxps xmm5,[eax+464]
792        maxps xmm6,[eax+480]
793        maxps xmm7,[eax+496]
794
795        /*Collect*/
796        maxps   xmm0,xmm1
797        maxps   xmm2,xmm3
798        maxps   xmm4,xmm5
799        maxps   xmm6,xmm7
800        maxps   xmm0,xmm2
801        maxps   xmm4,xmm6
802        maxps   xmm0,xmm4
803        movhlps xmm1,xmm0
804        maxps   xmm0,xmm1
805        shufps  xmm1,xmm0,1
806        maxps   xmm0,xmm1
807        movss   back,xmm0
808    }
809
810    return(back);
811#else
812    float val,max_val;
813    float *p,*stop_p;
814    max_val=v[0];
815    for(p=v+1,stop_p=v+128;p!=stop_p;)
816    {
817        val= *p++;
818        if(val>max_val) max_val=val;
819    }
820    return(max_val);
821#endif /*DB_USE_SIMD*/
822}
823
824inline float db_Max_64Aligned16_f(float *v)
825{
826#ifdef DB_USE_SIMD
827    float back;
828
829    _asm
830    {
831        mov eax,v
832
833        /*Chunk1*/
834        movaps xmm0,[eax]
835        movaps xmm1,[eax+16]
836        movaps xmm2,[eax+32]
837        movaps xmm3,[eax+48]
838        movaps xmm4,[eax+64]
839        movaps xmm5,[eax+80]
840        movaps xmm6,[eax+96]
841        movaps xmm7,[eax+112]
842
843        /*Chunk2*/
844        maxps xmm0,[eax+128]
845        maxps xmm1,[eax+144]
846        maxps xmm2,[eax+160]
847        maxps xmm3,[eax+176]
848        maxps xmm4,[eax+192]
849        maxps xmm5,[eax+208]
850        maxps xmm6,[eax+224]
851        maxps xmm7,[eax+240]
852
853        /*Collect*/
854        maxps   xmm0,xmm1
855        maxps   xmm2,xmm3
856        maxps   xmm4,xmm5
857        maxps   xmm6,xmm7
858        maxps   xmm0,xmm2
859        maxps   xmm4,xmm6
860        maxps   xmm0,xmm4
861        movhlps xmm1,xmm0
862        maxps   xmm0,xmm1
863        shufps  xmm1,xmm0,1
864        maxps   xmm0,xmm1
865        movss   back,xmm0
866    }
867
868    return(back);
869#else
870    float val,max_val;
871    float *p,*stop_p;
872    max_val=v[0];
873    for(p=v+1,stop_p=v+64;p!=stop_p;)
874    {
875        val= *p++;
876        if(val>max_val) max_val=val;
877    }
878    return(max_val);
879#endif /*DB_USE_SIMD*/
880}
881
882inline float db_Max_32Aligned16_f(float *v)
883{
884#ifdef DB_USE_SIMD
885    float back;
886
887    _asm
888    {
889        mov eax,v
890
891        /*Chunk1*/
892        movaps xmm0,[eax]
893        movaps xmm1,[eax+16]
894        movaps xmm2,[eax+32]
895        movaps xmm3,[eax+48]
896        movaps xmm4,[eax+64]
897        movaps xmm5,[eax+80]
898        movaps xmm6,[eax+96]
899        movaps xmm7,[eax+112]
900
901        /*Collect*/
902        maxps   xmm0,xmm1
903        maxps   xmm2,xmm3
904        maxps   xmm4,xmm5
905        maxps   xmm6,xmm7
906        maxps   xmm0,xmm2
907        maxps   xmm4,xmm6
908        maxps   xmm0,xmm4
909        movhlps xmm1,xmm0
910        maxps   xmm0,xmm1
911        shufps  xmm1,xmm0,1
912        maxps   xmm0,xmm1
913        movss   back,xmm0
914    }
915
916    return(back);
917#else
918    float val,max_val;
919    float *p,*stop_p;
920    max_val=v[0];
921    for(p=v+1,stop_p=v+32;p!=stop_p;)
922    {
923        val= *p++;
924        if(val>max_val) max_val=val;
925    }
926    return(max_val);
927#endif /*DB_USE_SIMD*/
928}
929
930inline float db_Max_16Aligned16_f(float *v)
931{
932#ifdef DB_USE_SIMD
933    float back;
934
935    _asm
936    {
937        mov eax,v
938
939        /*Chunk1*/
940        movaps xmm0,[eax]
941        movaps xmm1,[eax+16]
942        movaps xmm2,[eax+32]
943        movaps xmm3,[eax+48]
944
945        /*Collect*/
946        maxps   xmm0,xmm1
947        maxps   xmm2,xmm3
948        maxps   xmm0,xmm2
949        movhlps xmm1,xmm0
950        maxps   xmm0,xmm1
951        shufps  xmm1,xmm0,1
952        maxps   xmm0,xmm1
953        movss   back,xmm0
954    }
955
956    return(back);
957#else
958    float val,max_val;
959    float *p,*stop_p;
960    max_val=v[0];
961    for(p=v+1,stop_p=v+16;p!=stop_p;)
962    {
963        val= *p++;
964        if(val>max_val) max_val=val;
965    }
966    return(max_val);
967#endif /*DB_USE_SIMD*/
968}
969
970inline float db_Max_8Aligned16_f(float *v)
971{
972#ifdef DB_USE_SIMD
973    float back;
974
975    _asm
976    {
977        mov eax,v
978
979        /*Chunk1*/
980        movaps xmm0,[eax]
981        movaps xmm1,[eax+16]
982
983        /*Collect*/
984        maxps   xmm0,xmm1
985        movhlps xmm1,xmm0
986        maxps   xmm0,xmm1
987        shufps  xmm1,xmm0,1
988        maxps   xmm0,xmm1
989        movss   back,xmm0
990    }
991
992    return(back);
993#else
994    float val,max_val;
995    float *p,*stop_p;
996    max_val=v[0];
997    for(p=v+1,stop_p=v+8;p!=stop_p;)
998    {
999        val= *p++;
1000        if(val>max_val) max_val=val;
1001    }
1002    return(max_val);
1003#endif /*DB_USE_SIMD*/
1004}
1005
1006inline float db_Max_Aligned16_f(float *v,int size)
1007{
1008    float val,max_val;
1009    float *stop_v;
1010
1011    max_val=v[0];
1012    for(;size>=128;size-=128)
1013    {
1014        val=db_Max_128Aligned16_f(v);
1015        v+=128;
1016        if(val>max_val) max_val=val;
1017    }
1018    if(size&64)
1019    {
1020        val=db_Max_64Aligned16_f(v);
1021        v+=64;
1022        if(val>max_val) max_val=val;
1023    }
1024    if(size&32)
1025    {
1026        val=db_Max_32Aligned16_f(v);
1027        v+=32;
1028        if(val>max_val) max_val=val;
1029    }
1030    if(size&16)
1031    {
1032        val=db_Max_16Aligned16_f(v);
1033        v+=16;
1034        if(val>max_val) max_val=val;
1035    }
1036    if(size&8)
1037    {
1038        val=db_Max_8Aligned16_f(v);
1039        v+=8;
1040        if(val>max_val) max_val=val;
1041    }
1042    if(size&7)
1043    {
1044        for(stop_v=v+(size&7);v!=stop_v;)
1045        {
1046            val= *v++;
1047            if(val>max_val) max_val=val;
1048        }
1049    }
1050
1051    return(max_val);
1052}
1053
1054/*Find maximum value of img in the region starting at (left,top)
1055and with width w and height h. img[left] should be 16 byte aligned*/
1056float db_MaxImage_Aligned16_f(float **img,int left,int top,int w,int h)
1057{
1058    float val,max_val;
1059    int i,stop_i;
1060
1061    if(w && h)
1062    {
1063        stop_i=top+h;
1064        max_val=img[top][left];
1065
1066        for(i=top;i<stop_i;i++)
1067        {
1068            val=db_Max_Aligned16_f(img[i]+left,w);
1069            if(val>max_val) max_val=val;
1070        }
1071        return(max_val);
1072    }
1073    return(0.0);
1074}
1075
1076inline void db_MaxVector_128_Aligned16_f(float *m,float *v1,float *v2)
1077{
1078#ifdef DB_USE_SIMD
1079    _asm
1080    {
1081        mov eax,v1
1082        mov ebx,v2
1083        mov ecx,m
1084
1085        /*Chunk1*/
1086        movaps xmm0,[eax]
1087        movaps xmm1,[eax+16]
1088        movaps xmm2,[eax+32]
1089        movaps xmm3,[eax+48]
1090        movaps xmm4,[eax+64]
1091        movaps xmm5,[eax+80]
1092        movaps xmm6,[eax+96]
1093        movaps xmm7,[eax+112]
1094        maxps  xmm0,[ebx]
1095        maxps  xmm1,[ebx+16]
1096        maxps  xmm2,[ebx+32]
1097        maxps  xmm3,[ebx+48]
1098        maxps  xmm4,[ebx+64]
1099        maxps  xmm5,[ebx+80]
1100        maxps  xmm6,[ebx+96]
1101        maxps  xmm7,[ebx+112]
1102        movaps [ecx],xmm0
1103        movaps [ecx+16],xmm1
1104        movaps [ecx+32],xmm2
1105        movaps [ecx+48],xmm3
1106        movaps [ecx+64],xmm4
1107        movaps [ecx+80],xmm5
1108        movaps [ecx+96],xmm6
1109        movaps [ecx+112],xmm7
1110
1111        /*Chunk2*/
1112        movaps xmm0,[eax+128]
1113        movaps xmm1,[eax+144]
1114        movaps xmm2,[eax+160]
1115        movaps xmm3,[eax+176]
1116        movaps xmm4,[eax+192]
1117        movaps xmm5,[eax+208]
1118        movaps xmm6,[eax+224]
1119        movaps xmm7,[eax+240]
1120        maxps  xmm0,[ebx+128]
1121        maxps  xmm1,[ebx+144]
1122        maxps  xmm2,[ebx+160]
1123        maxps  xmm3,[ebx+176]
1124        maxps  xmm4,[ebx+192]
1125        maxps  xmm5,[ebx+208]
1126        maxps  xmm6,[ebx+224]
1127        maxps  xmm7,[ebx+240]
1128        movaps [ecx+128],xmm0
1129        movaps [ecx+144],xmm1
1130        movaps [ecx+160],xmm2
1131        movaps [ecx+176],xmm3
1132        movaps [ecx+192],xmm4
1133        movaps [ecx+208],xmm5
1134        movaps [ecx+224],xmm6
1135        movaps [ecx+240],xmm7
1136
1137        /*Chunk3*/
1138        movaps xmm0,[eax+256]
1139        movaps xmm1,[eax+272]
1140        movaps xmm2,[eax+288]
1141        movaps xmm3,[eax+304]
1142        movaps xmm4,[eax+320]
1143        movaps xmm5,[eax+336]
1144        movaps xmm6,[eax+352]
1145        movaps xmm7,[eax+368]
1146        maxps  xmm0,[ebx+256]
1147        maxps  xmm1,[ebx+272]
1148        maxps  xmm2,[ebx+288]
1149        maxps  xmm3,[ebx+304]
1150        maxps  xmm4,[ebx+320]
1151        maxps  xmm5,[ebx+336]
1152        maxps  xmm6,[ebx+352]
1153        maxps  xmm7,[ebx+368]
1154        movaps [ecx+256],xmm0
1155        movaps [ecx+272],xmm1
1156        movaps [ecx+288],xmm2
1157        movaps [ecx+304],xmm3
1158        movaps [ecx+320],xmm4
1159        movaps [ecx+336],xmm5
1160        movaps [ecx+352],xmm6
1161        movaps [ecx+368],xmm7
1162
1163        /*Chunk4*/
1164        movaps xmm0,[eax+384]
1165        movaps xmm1,[eax+400]
1166        movaps xmm2,[eax+416]
1167        movaps xmm3,[eax+432]
1168        movaps xmm4,[eax+448]
1169        movaps xmm5,[eax+464]
1170        movaps xmm6,[eax+480]
1171        movaps xmm7,[eax+496]
1172        maxps  xmm0,[ebx+384]
1173        maxps  xmm1,[ebx+400]
1174        maxps  xmm2,[ebx+416]
1175        maxps  xmm3,[ebx+432]
1176        maxps  xmm4,[ebx+448]
1177        maxps  xmm5,[ebx+464]
1178        maxps  xmm6,[ebx+480]
1179        maxps  xmm7,[ebx+496]
1180        movaps [ecx+384],xmm0
1181        movaps [ecx+400],xmm1
1182        movaps [ecx+416],xmm2
1183        movaps [ecx+432],xmm3
1184        movaps [ecx+448],xmm4
1185        movaps [ecx+464],xmm5
1186        movaps [ecx+480],xmm6
1187        movaps [ecx+496],xmm7
1188    }
1189#else
1190    int i;
1191    float a,b;
1192    for(i=0;i<128;i++)
1193    {
1194        a=v1[i];
1195        b=v2[i];
1196        if(a>=b) m[i]=a;
1197        else m[i]=b;
1198    }
1199#endif /*DB_USE_SIMD*/
1200}
1201
1202inline void db_MaxVector_128_SecondSourceDestAligned16_f(float *m,float *v1,float *v2)
1203{
1204#ifdef DB_USE_SIMD
1205    _asm
1206    {
1207        mov eax,v1
1208        mov ebx,v2
1209        mov ecx,m
1210
1211        /*Chunk1*/
1212        movups xmm0,[eax]
1213        movups xmm1,[eax+16]
1214        movups xmm2,[eax+32]
1215        movups xmm3,[eax+48]
1216        movups xmm4,[eax+64]
1217        movups xmm5,[eax+80]
1218        movups xmm6,[eax+96]
1219        movups xmm7,[eax+112]
1220        maxps  xmm0,[ebx]
1221        maxps  xmm1,[ebx+16]
1222        maxps  xmm2,[ebx+32]
1223        maxps  xmm3,[ebx+48]
1224        maxps  xmm4,[ebx+64]
1225        maxps  xmm5,[ebx+80]
1226        maxps  xmm6,[ebx+96]
1227        maxps  xmm7,[ebx+112]
1228        movaps [ecx],xmm0
1229        movaps [ecx+16],xmm1
1230        movaps [ecx+32],xmm2
1231        movaps [ecx+48],xmm3
1232        movaps [ecx+64],xmm4
1233        movaps [ecx+80],xmm5
1234        movaps [ecx+96],xmm6
1235        movaps [ecx+112],xmm7
1236
1237        /*Chunk2*/
1238        movups xmm0,[eax+128]
1239        movups xmm1,[eax+144]
1240        movups xmm2,[eax+160]
1241        movups xmm3,[eax+176]
1242        movups xmm4,[eax+192]
1243        movups xmm5,[eax+208]
1244        movups xmm6,[eax+224]
1245        movups xmm7,[eax+240]
1246        maxps  xmm0,[ebx+128]
1247        maxps  xmm1,[ebx+144]
1248        maxps  xmm2,[ebx+160]
1249        maxps  xmm3,[ebx+176]
1250        maxps  xmm4,[ebx+192]
1251        maxps  xmm5,[ebx+208]
1252        maxps  xmm6,[ebx+224]
1253        maxps  xmm7,[ebx+240]
1254        movaps [ecx+128],xmm0
1255        movaps [ecx+144],xmm1
1256        movaps [ecx+160],xmm2
1257        movaps [ecx+176],xmm3
1258        movaps [ecx+192],xmm4
1259        movaps [ecx+208],xmm5
1260        movaps [ecx+224],xmm6
1261        movaps [ecx+240],xmm7
1262
1263        /*Chunk3*/
1264        movups xmm0,[eax+256]
1265        movups xmm1,[eax+272]
1266        movups xmm2,[eax+288]
1267        movups xmm3,[eax+304]
1268        movups xmm4,[eax+320]
1269        movups xmm5,[eax+336]
1270        movups xmm6,[eax+352]
1271        movups xmm7,[eax+368]
1272        maxps  xmm0,[ebx+256]
1273        maxps  xmm1,[ebx+272]
1274        maxps  xmm2,[ebx+288]
1275        maxps  xmm3,[ebx+304]
1276        maxps  xmm4,[ebx+320]
1277        maxps  xmm5,[ebx+336]
1278        maxps  xmm6,[ebx+352]
1279        maxps  xmm7,[ebx+368]
1280        movaps [ecx+256],xmm0
1281        movaps [ecx+272],xmm1
1282        movaps [ecx+288],xmm2
1283        movaps [ecx+304],xmm3
1284        movaps [ecx+320],xmm4
1285        movaps [ecx+336],xmm5
1286        movaps [ecx+352],xmm6
1287        movaps [ecx+368],xmm7
1288
1289        /*Chunk4*/
1290        movups xmm0,[eax+384]
1291        movups xmm1,[eax+400]
1292        movups xmm2,[eax+416]
1293        movups xmm3,[eax+432]
1294        movups xmm4,[eax+448]
1295        movups xmm5,[eax+464]
1296        movups xmm6,[eax+480]
1297        movups xmm7,[eax+496]
1298        maxps  xmm0,[ebx+384]
1299        maxps  xmm1,[ebx+400]
1300        maxps  xmm2,[ebx+416]
1301        maxps  xmm3,[ebx+432]
1302        maxps  xmm4,[ebx+448]
1303        maxps  xmm5,[ebx+464]
1304        maxps  xmm6,[ebx+480]
1305        maxps  xmm7,[ebx+496]
1306        movaps [ecx+384],xmm0
1307        movaps [ecx+400],xmm1
1308        movaps [ecx+416],xmm2
1309        movaps [ecx+432],xmm3
1310        movaps [ecx+448],xmm4
1311        movaps [ecx+464],xmm5
1312        movaps [ecx+480],xmm6
1313        movaps [ecx+496],xmm7
1314    }
1315#else
1316    int i;
1317    float a,b;
1318    for(i=0;i<128;i++)
1319    {
1320        a=v1[i];
1321        b=v2[i];
1322        if(a>=b) m[i]=a;
1323        else m[i]=b;
1324    }
1325#endif /*DB_USE_SIMD*/
1326}
1327
1328/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top), of width 124 and
1329stopping at bottom. The output is shifted two steps left and overwrites 128 elements for each row.
1330The input s should be of width at least 128, and exist for 2 pixels outside the specified region.
1331s[i][left-2] and sf[i][left-2] should be 16 byte aligned. Top must be at least 3*/
1332inline void db_MaxSuppressFilterChunk_5x5_Aligned16_f(float **sf,float **s,int left,int top,int bottom,
1333                                      /*temp should point to at least
1334                                      6*132 floats of 16-byte-aligned allocated memory*/
1335                                      float *temp)
1336{
1337#ifdef DB_USE_SIMD
1338    int i,lm2;
1339    float *two[4];
1340    float *four,*five;
1341
1342    lm2=left-2;
1343
1344    /*Set pointers to pre-allocated memory*/
1345    four=temp;
1346    five=four+132;
1347    for(i=0;i<4;i++)
1348    {
1349        two[i]=five+(i+1)*132;
1350    }
1351
1352    /*Set rests of four and five to zero to avoid
1353    floating point exceptions*/
1354    for(i=129;i<132;i++)
1355    {
1356        four[i]=0.0;
1357        five[i]=0.0;
1358    }
1359
1360    /*Fill three rows of the wrap-around max buffers*/
1361    for(i=top-3;i<top;i++) db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
1362
1363    /*For each output row*/
1364    for(;i<=bottom;i++)
1365    {
1366        /*Compute max of the lowest pair of rows in the five row window*/
1367        db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
1368        /*Compute max of the lowest and highest pair of rows in the five row window*/
1369        db_MaxVector_128_Aligned16_f(four,two[i&3],two[(i-3)&3]);
1370        /*Compute max of all rows*/
1371        db_MaxVector_128_Aligned16_f(five,four,two[(i-1)&3]);
1372        /*Compute max of 2x5 chunks*/
1373        db_MaxVector_128_SecondSourceDestAligned16_f(five,five+1,five);
1374        /*Compute max of pairs of 2x5 chunks*/
1375        db_MaxVector_128_SecondSourceDestAligned16_f(five,five+3,five);
1376        /*Compute max of pairs of 5x5 except middle*/
1377        db_MaxVector_128_SecondSourceDestAligned16_f(sf[i]+lm2,four+2,five);
1378    }
1379
1380#else
1381    int i,j,right;
1382    float sv;
1383
1384    right=left+128;
1385    for(i=top;i<=bottom;i++) for(j=left;j<right;j++)
1386    {
1387        sv=s[i][j];
1388
1389        if( sv>s[i-2][j-2] && sv>s[i-2][j-1] && sv>s[i-2][j] && sv>s[i-2][j+1] && sv>s[i-2][j+2] &&
1390            sv>s[i-1][j-2] && sv>s[i-1][j-1] && sv>s[i-1][j] && sv>s[i-1][j+1] && sv>s[i-1][j+2] &&
1391            sv>s[  i][j-2] && sv>s[  i][j-1] &&                 sv>s[  i][j+1] && sv>s[  i][j+2] &&
1392            sv>s[i+1][j-2] && sv>s[i+1][j-1] && sv>s[i+1][j] && sv>s[i+1][j+1] && sv>s[i+1][j+2] &&
1393            sv>s[i+2][j-2] && sv>s[i+2][j-1] && sv>s[i+2][j] && sv>s[i+2][j+1] && sv>s[i+2][j+2])
1394        {
1395            sf[i][j-2]=0.0;
1396        }
1397        else sf[i][j-2]=sv;
1398    }
1399#endif /*DB_USE_SIMD*/
1400}
1401
1402/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top) and
1403stopping at bottom. The output is shifted two steps left. The input s should exist for 2 pixels
1404outside the specified region. s[i][left-2] and sf[i][left-2] should be 16 byte aligned.
1405Top must be at least 3. Reading and writing from and to the input and output images is done
1406as if the region had a width equal to a multiple of 124. If this is not the case, the images
1407should be over-allocated and the input cleared for a sufficient region*/
1408void db_MaxSuppressFilter_5x5_Aligned16_f(float **sf,float **s,int left,int top,int right,int bottom,
1409                                          /*temp should point to at least
1410                                          6*132 floats of 16-byte-aligned allocated memory*/
1411                                          float *temp)
1412{
1413    int x,next_x;
1414
1415    for(x=left;x<=right;x=next_x)
1416    {
1417        next_x=x+124;
1418        db_MaxSuppressFilterChunk_5x5_Aligned16_f(sf,s,x,top,bottom,temp);
1419    }
1420}
1421
1422/*Extract corners from the chunk (left,top) to (right,bottom). Store in x_temp,y_temp and s_temp
1423which should point to space of at least as many positions as there are pixels in the chunk*/
1424inline int db_CornersFromChunk(float **strength,int left,int top,int right,int bottom,float threshold,double *x_temp,double *y_temp,double *s_temp)
1425{
1426    int i,j,nr;
1427    float s;
1428
1429    nr=0;
1430    for(i=top;i<=bottom;i++) for(j=left;j<=right;j++)
1431    {
1432        s=strength[i][j];
1433
1434        if(s>=threshold &&
1435            s>strength[i-2][j-2] && s>strength[i-2][j-1] && s>strength[i-2][j] && s>strength[i-2][j+1] && s>strength[i-2][j+2] &&
1436            s>strength[i-1][j-2] && s>strength[i-1][j-1] && s>strength[i-1][j] && s>strength[i-1][j+1] && s>strength[i-1][j+2] &&
1437            s>strength[  i][j-2] && s>strength[  i][j-1] &&                       s>strength[  i][j+1] && s>strength[  i][j+2] &&
1438            s>strength[i+1][j-2] && s>strength[i+1][j-1] && s>strength[i+1][j] && s>strength[i+1][j+1] && s>strength[i+1][j+2] &&
1439            s>strength[i+2][j-2] && s>strength[i+2][j-1] && s>strength[i+2][j] && s>strength[i+2][j+1] && s>strength[i+2][j+2])
1440        {
1441            x_temp[nr]=(double) j;
1442            y_temp[nr]=(double) i;
1443            s_temp[nr]=(double) s;
1444            nr++;
1445        }
1446    }
1447    return(nr);
1448}
1449
1450
1451//Sub-pixel accuracy using 2D quadratic interpolation.(YCJ)
1452inline void db_SubPixel(float **strength, const double xd, const double yd, double &xs, double &ys)
1453{
1454    int x = (int) xd;
1455    int y = (int) yd;
1456
1457    float fxx = strength[y][x-1] - strength[y][x] - strength[y][x] + strength[y][x+1];
1458    float fyy = strength[y-1][x] - strength[y][x] - strength[y][x] + strength[y+1][x];
1459    float fxy = (strength[y-1][x-1] - strength[y-1][x+1] - strength[y+1][x-1] + strength[y+1][x+1])/(float)4.0;
1460
1461    float denom = (fxx * fyy - fxy * fxy) * (float) 2.0;
1462
1463    xs = xd;
1464    ys = yd;
1465
1466    if ( db_absf(denom) <= FLT_EPSILON )
1467    {
1468        return;
1469    }
1470    else
1471    {
1472        float fx = strength[y][x+1] - strength[y][x-1];
1473        float fy = strength[y+1][x] - strength[y-1][x];
1474
1475        float dx = (fyy * fx - fxy * fy) / denom;
1476        float dy = (fxx * fy - fxy * fx) / denom;
1477
1478        if ( db_absf(dx) > 1.0 || db_absf(dy) > 1.0 )
1479        {
1480            return;
1481        }
1482        else
1483        {
1484            xs -= dx;
1485            ys -= dy;
1486        }
1487    }
1488
1489    return;
1490}
1491
1492/*Extract corners from the image part from (left,top) to (right,bottom).
1493Store in x and y, extracting at most satnr corners in each block of size (bw,bh).
1494The pointer temp_d should point to at least 5*bw*bh positions.
1495area_factor holds how many corners max to extract per 10000 pixels*/
1496void db_ExtractCornersSaturated(float **strength,int left,int top,int right,int bottom,
1497                                int bw,int bh,unsigned long area_factor,
1498                                float threshold,double *temp_d,
1499                                double *x_coord,double *y_coord,int *nr_corners)
1500{
1501    double *x_temp,*y_temp,*s_temp,*select_temp;
1502    double loc_thresh;
1503    unsigned long bwbh,area,saturation;
1504    int x,next_x,last_x;
1505    int y,next_y,last_y;
1506    int nr,nr_points,i,stop;
1507
1508    bwbh=bw*bh;
1509    x_temp=temp_d;
1510    y_temp=x_temp+bwbh;
1511    s_temp=y_temp+bwbh;
1512    select_temp=s_temp+bwbh;
1513
1514#ifdef DB_SUB_PIXEL
1515    // subpixel processing may sometimes push the corner ourside the real border
1516    // increasing border size:
1517    left++;
1518    top++;
1519    bottom--;
1520    right--;
1521#endif /*DB_SUB_PIXEL*/
1522
1523    nr_points=0;
1524    for(y=top;y<=bottom;y=next_y)
1525    {
1526        next_y=y+bh;
1527        last_y=next_y-1;
1528        if(last_y>bottom) last_y=bottom;
1529        for(x=left;x<=right;x=next_x)
1530        {
1531            next_x=x+bw;
1532            last_x=next_x-1;
1533            if(last_x>right) last_x=right;
1534
1535            area=(last_x-x+1)*(last_y-y+1);
1536            saturation=(area*area_factor)/10000;
1537            nr=db_CornersFromChunk(strength,x,y,last_x,last_y,threshold,x_temp,y_temp,s_temp);
1538            if(nr)
1539            {
1540                if(((unsigned long)nr)>saturation) loc_thresh=db_LeanQuickSelect(s_temp,nr,nr-saturation,select_temp);
1541                else loc_thresh=threshold;
1542
1543                stop=nr_points+saturation;
1544                for(i=0;(i<nr)&&(nr_points<stop);i++)
1545                {
1546                    if(s_temp[i]>=loc_thresh)
1547                    {
1548                        #ifdef DB_SUB_PIXEL
1549                               db_SubPixel(strength, x_temp[i], y_temp[i], x_coord[nr_points], y_coord[nr_points]);
1550                        #else
1551                               x_coord[nr_points]=x_temp[i];
1552                               y_coord[nr_points]=y_temp[i];
1553                        #endif
1554
1555                        nr_points++;
1556                    }
1557                }
1558            }
1559        }
1560    }
1561    *nr_corners=nr_points;
1562}
1563
1564db_CornerDetector_f::db_CornerDetector_f()
1565{
1566    m_w=0; m_h=0;
1567}
1568
1569db_CornerDetector_f::~db_CornerDetector_f()
1570{
1571    Clean();
1572}
1573
1574void db_CornerDetector_f::Clean()
1575{
1576    if(m_w!=0)
1577    {
1578        delete [] m_temp_f;
1579        delete [] m_temp_d;
1580        db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
1581    }
1582    m_w=0; m_h=0;
1583}
1584
1585unsigned long db_CornerDetector_f::Init(int im_width,int im_height,int target_nr_corners,
1586                            int nr_horizontal_blocks,int nr_vertical_blocks,
1587                            double absolute_threshold,double relative_threshold)
1588{
1589    int chunkwidth=208;
1590    int block_width,block_height;
1591    unsigned long area_factor;
1592    int active_width,active_height;
1593
1594    active_width=db_maxi(1,im_width-10);
1595    active_height=db_maxi(1,im_height-10);
1596    block_width=db_maxi(1,active_width/nr_horizontal_blocks);
1597    block_height=db_maxi(1,active_height/nr_vertical_blocks);
1598
1599    area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
1600        (((double)active_width)*((double)active_height)))));
1601
1602    return(Start(im_width,im_height,block_width,block_height,area_factor,
1603        absolute_threshold,relative_threshold,chunkwidth));
1604}
1605
1606unsigned long db_CornerDetector_f::Start(int im_width,int im_height,
1607                             int block_width,int block_height,unsigned long area_factor,
1608                             double absolute_threshold,double relative_threshold,int chunkwidth)
1609{
1610    Clean();
1611
1612    m_w=im_width;
1613    m_h=im_height;
1614    m_cw=chunkwidth;
1615    m_bw=block_width;
1616    m_bh=block_height;
1617    m_area_factor=area_factor;
1618    m_r_thresh=relative_threshold;
1619    m_a_thresh=absolute_threshold;
1620    m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
1621
1622    m_temp_f=new float[13*(m_cw+4)];
1623    m_temp_d=new double[5*m_bw*m_bh];
1624    m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
1625
1626    return(m_max_nr);
1627}
1628
1629void db_CornerDetector_f::DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const
1630{
1631    float max_val,threshold;
1632
1633    db_HarrisStrength_f(m_strength,img,m_w,m_h,m_temp_f,m_cw);
1634
1635    if(m_r_thresh)
1636    {
1637        max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
1638        threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
1639    }
1640    else threshold= (float) m_a_thresh;
1641
1642    db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
1643        m_temp_d,x_coord,y_coord,nr_corners);
1644}
1645
1646db_CornerDetector_u::db_CornerDetector_u()
1647{
1648    m_w=0; m_h=0;
1649}
1650
1651db_CornerDetector_u::~db_CornerDetector_u()
1652{
1653    Clean();
1654}
1655
1656db_CornerDetector_u::db_CornerDetector_u(const db_CornerDetector_u& cd)
1657{
1658    Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
1659        cd.m_a_thresh, cd.m_r_thresh);
1660}
1661
1662db_CornerDetector_u& db_CornerDetector_u::operator=(const db_CornerDetector_u& cd)
1663{
1664    if ( this == &cd ) return *this;
1665
1666    Clean();
1667
1668    Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
1669        cd.m_a_thresh, cd.m_r_thresh);
1670
1671    return *this;
1672}
1673
1674void db_CornerDetector_u::Clean()
1675{
1676    if(m_w!=0)
1677    {
1678        delete [] m_temp_i;
1679        delete [] m_temp_d;
1680        db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
1681    }
1682    m_w=0; m_h=0;
1683}
1684
1685unsigned long db_CornerDetector_u::Init(int im_width,int im_height,int target_nr_corners,
1686                            int nr_horizontal_blocks,int nr_vertical_blocks,
1687                            double absolute_threshold,double relative_threshold)
1688{
1689    int block_width,block_height;
1690    unsigned long area_factor;
1691    int active_width,active_height;
1692
1693    active_width=db_maxi(1,im_width-10);
1694    active_height=db_maxi(1,im_height-10);
1695    block_width=db_maxi(1,active_width/nr_horizontal_blocks);
1696    block_height=db_maxi(1,active_height/nr_vertical_blocks);
1697
1698    area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
1699        (((double)active_width)*((double)active_height)))));
1700
1701    return(Start(im_width,im_height,block_width,block_height,area_factor,
1702        16.0*absolute_threshold,relative_threshold));
1703}
1704
1705unsigned long db_CornerDetector_u::Start(int im_width,int im_height,
1706                             int block_width,int block_height,unsigned long area_factor,
1707                             double absolute_threshold,double relative_threshold)
1708{
1709    Clean();
1710
1711    m_w=im_width;
1712    m_h=im_height;
1713    m_bw=block_width;
1714    m_bh=block_height;
1715    m_area_factor=area_factor;
1716    m_r_thresh=relative_threshold;
1717    m_a_thresh=absolute_threshold;
1718    m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
1719
1720    m_temp_i=new int[18*128];
1721    m_temp_d=new double[5*m_bw*m_bh];
1722    m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
1723
1724    return(m_max_nr);
1725}
1726
1727void db_CornerDetector_u::DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners,
1728                                        const unsigned char * const *msk, unsigned char fgnd) const
1729{
1730    float max_val,threshold;
1731
1732    db_HarrisStrength_u(m_strength,img,m_w,m_h,m_temp_i);
1733
1734
1735    if(m_r_thresh)
1736    {
1737        max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
1738        threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
1739    }
1740    else threshold= (float) m_a_thresh;
1741
1742    db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
1743        m_temp_d,x_coord,y_coord,nr_corners);
1744
1745
1746    if ( msk )
1747    {
1748        int nr_corners_mask=0;
1749
1750        for ( int i = 0; i < *nr_corners; ++i)
1751        {
1752            int cor_x = db_roundi(*(x_coord+i));
1753            int cor_y = db_roundi(*(y_coord+i));
1754            if ( msk[cor_y][cor_x] == fgnd )
1755            {
1756                x_coord[nr_corners_mask] = x_coord[i];
1757                y_coord[nr_corners_mask] = y_coord[i];
1758                nr_corners_mask++;
1759            }
1760        }
1761        *nr_corners = nr_corners_mask;
1762    }
1763}
1764
1765void db_CornerDetector_u::ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners) {
1766    if ( m_w!=0 )
1767        db_ExtractCornersSaturated(strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,float(m_a_thresh),
1768            m_temp_d,x_coord,y_coord,nr_corners);
1769}
1770
1771