18bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*
28bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * Copyright (C) 2011 The Android Open Source Project
38bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling *
48bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * Licensed under the Apache License, Version 2.0 (the "License");
58bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * you may not use this file except in compliance with the License.
68bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * You may obtain a copy of the License at
78bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling *
88bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling *      http://www.apache.org/licenses/LICENSE-2.0
98bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling *
108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * Unless required by applicable law or agreed to in writing, software
118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * distributed under the License is distributed on an "AS IS" BASIS,
128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * See the License for the specific language governing permissions and
148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling * limitations under the License.
158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling */
168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*$Id: db_feature_detection.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/
188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*****************************************************************
208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling*    Lean and mean begins here                                   *
218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling*****************************************************************/
228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#include "db_utilities.h"
248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#include "db_feature_detection.h"
258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef _VERBOSE_
268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#include <iostream>
278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif
288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#include <float.h>
298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#define DB_SUB_PIXEL
318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#define BORDER 10 // 5
338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingfloat** db_AllocStrengthImage_f(float **im,int w,int h)
358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int i,n,aw;
378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    long c,size;
388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float **img,*aim,*p;
398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*Determine number of 124 element chunks needed*/
418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    n=(db_maxi(1,w-6)+123)/124;
428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*Determine the total allocation width aw*/
438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    aw=n*124+8;
448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*Allocate*/
458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    size=aw*h+16;
468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    *im=new float [size];
478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*Clean up*/
488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    p=(*im);
498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(c=0;c<size;c++) p[c]=0.0;
508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*Get a 16 byte aligned pointer*/
518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    aim=db_AlignPointer_f(*im,16);
528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*Allocate pointer table*/
538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    img=new float* [h];
548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*Initialize the pointer table*/
558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=0;i<h;i++)
568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        img[i]=aim+aw*i+1;
588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(img);
618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_FreeStrengthImage_f(float *im,float **img,int h)
648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    delete [] im;
668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    delete [] img;
678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width chunk_width
708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha HaeberlingMemory references occur one pixel outside the subrow*/
718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_IxIyRow_f(float *Ix,float *Iy,const float * const *img,int i,int j,int chunk_width)
728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int c;
748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(c=0;c<chunk_width;c++)
768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ix[c]=img[i][j+c-1]-img[i][j+c+1];
788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Iy[c]=img[i-1][j+c]-img[i+1][j+c];
798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width 128
838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha HaeberlingMemory references occur one pixel outside the subrow*/
848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_IxIyRow_u(int *dxx,const unsigned char * const *img,int i,int j,int nc)
858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_MMX
878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    const unsigned char *r1,*r2,*r3;
888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    r1=img[i-1]+j; r2=img[i]+j; r3=img[i+1]+j;
908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    _asm
928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov esi,16
948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov eax,r1
958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov ebx,r2
968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov ecx,r3
978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov edx,dxx
988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Get bitmask into mm7*/
1008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov       edi,7F7F7F7Fh
1018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movd      mm7,edi
1028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpckldq mm7,mm7
1038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
1048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingloopstart:
1058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /***************dx part 1-12*********************************/
1068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm0,[eax]       /*1 Get upper*/
1078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pxor      mm6,mm6         /*2 Set to zero*/
1088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm1,[ecx]       /*3 Get lower*/
1098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         psrlq     mm0,1           /*4 Shift*/
1108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        psrlq      mm1,1           /*5 Shift*/
1118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pand      mm0,mm7         /*6 And*/
1128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm2,[ebx-1]     /*13 Get left*/
1138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pand      mm1,mm7         /*7 And*/
1148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        psubb      mm0,mm1         /*8 Subtract*/
1158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pxor      mm5,mm5         /*14 Set to zero*/
1168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm1,mm0         /*9 Copy*/
1178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pcmpgtb   mm6,mm0         /*10 Create unpack mask*/
1188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm3,[ebx+1]     /*15 Get right*/
1198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         punpcklbw mm0,mm6         /*11 Unpack low*/
1208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpckhbw  mm1,mm6         /*12 Unpack high*/
1218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /***************dy part 13-24*********************************/
1228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movq      mm4,mm0         /*25 Copy dx*/
1238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        psrlq      mm2,1           /*16 Shift*/
1248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pmullw    mm0,mm0         /*26 Multiply dx*dx*/
1258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        psrlq      mm3,1           /*17 Shift*/
1268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pand      mm2,mm7         /*18 And*/
1278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pand       mm3,mm7         /*19 And*/
1288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
1298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        psubb      mm2,mm3         /*20 Subtract*/
1308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
1318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm3,mm2         /*21 Copy*/
1328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pcmpgtb   mm5,mm2         /*22 Create unpack mask*/
1338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpcklbw  mm2,mm5         /*23 Unpack low*/
1348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
1358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpckhbw  mm3,mm5         /*24 Unpack high*/
1368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /***************dxx dxy dyy low part 25-49*********************************/
1378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pmullw    mm4,mm2         /*27 Multiply dx*dy*/
1388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pmullw     mm2,mm2         /*28 Multiply dy*dy*/
1398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pxor      mm6,mm6         /*29 Set to zero*/
1408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm5,mm0         /*30 Copy dx*dx*/
1418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pcmpgtw   mm6,mm0         /*31 Create unpack mask for dx*dx*/
1428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpcklwd  mm0,mm6         /*32 Unpack dx*dx lows*/
1438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
1448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpckhwd  mm5,mm6         /*33 Unpack dx*dx highs*/
1458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pxor      mm6,mm6         /*36 Set to zero*/
1468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       [edx],mm0       /*34 Store dx*dx lows*/
1478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movq      mm0,mm4         /*37 Copy dx*dy*/
1488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       [edx+8],mm5     /*35 Store dx*dx highs*/
1498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pcmpgtw   mm6,mm4         /*38 Create unpack mask for dx*dy*/
1508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpcklwd  mm4,mm6         /*39 Unpack dx*dy lows*/
1518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
1528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpckhwd  mm0,mm6         /*40 Unpack dx*dy highs*/
1538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pxor      mm6,mm6         /*43 Set to zero*/
1548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       [edx+512],mm4   /*41 Store dx*dy lows*/
1558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movq      mm5,mm2         /*44 Copy dy*dy*/
1568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       [edx+520],mm0   /*42 Store dx*dy highs*/
1578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pcmpgtw   mm6,mm2         /*45 Create unpack mask for dy*dy*/
1588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpcklwd  mm2,mm6         /*46 Unpack dy*dy lows*/
1598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movq      mm4,mm1         /*50 Copy dx*/
1608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpckhwd  mm5,mm6         /*47 Unpack dy*dy highs*/
1618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pmullw    mm1,mm1         /*51 Multiply dx*dx*/
1628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       [edx+1024],mm2  /*48 Store dy*dy lows*/
1638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pmullw    mm4,mm3         /*52 Multiply dx*dy*/
1648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       [edx+1032],mm5  /*49 Store dy*dy highs*/
1658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /***************dxx dxy dyy high part 50-79*********************************/
1668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pmullw    mm3,mm3         /*53 Multiply dy*dy*/
1678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pxor       mm6,mm6         /*54 Set to zero*/
1688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movq      mm5,mm1         /*55 Copy dx*dx*/
1698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pcmpgtw    mm6,mm1         /*56 Create unpack mask for dx*dx*/
1708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pxor      mm2,mm2         /*61 Set to zero*/
1718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpcklwd  mm1,mm6         /*57 Unpack dx*dx lows*/
1728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movq      mm0,mm4         /*62 Copy dx*dy*/
1738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpckhwd  mm5,mm6         /*58 Unpack dx*dx highs*/
1748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pcmpgtw   mm2,mm4         /*63 Create unpack mask for dx*dy*/
1758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       [edx+16],mm1    /*59 Store dx*dx lows*/
1768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         punpcklwd mm4,mm2         /*64 Unpack dx*dy lows*/
1778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       [edx+24],mm5    /*60 Store dx*dx highs*/
1788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         punpckhwd mm0,mm2         /*65 Unpack dx*dy highs*/
1798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       [edx+528],mm4   /*66 Store dx*dy lows*/
1808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pxor      mm6,mm6         /*68 Set to zero*/
1818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       [edx+536],mm0   /*67 Store dx*dy highs*/
1828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movq      mm5,mm3         /*69 Copy dy*dy*/
1838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pcmpgtw    mm6,mm3         /*70 Create unpack mask for dy*dy*/
1848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         add       eax,8           /*75*/
1858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpcklwd  mm3,mm6         /*71 Unpack dy*dy lows*/
1868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         add       ebx,8           /*76*/
1878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        punpckhwd  mm5,mm6         /*72 Unpack dy*dy highs*/
1888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         add       ecx,8           /*77*/
1898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       [edx+1040],mm3  /*73 Store dy*dy lows*/
1908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
1918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       [edx+1048],mm5  /*74 Store dy*dy highs*/
1928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
1938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        add        edx,32          /*78*/
1948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         dec esi                   /*79*/
1958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        jnz loopstart
1968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
1978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        emms
1988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
1998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
2008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else
2018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int c;
2028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int Ix,Iy;
2038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
2048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(c=0;c<nc;c++)
2058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
2068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ix=(img[i][j+c-1]-img[i][j+c+1])>>1;
2078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Iy=(img[i-1][j+c]-img[i+1][j+c])>>1;
2088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dxx[c]=Ix*Ix;
2098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dxx[c+128]=Ix*Iy;
2108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dxx[c+256]=Iy*Iy;
2118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
2128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_MMX*/
2138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
2148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
2158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Filter vertically five rows of derivatives of length chunk_width into gxx,gxy,gyy*/
2168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_gxx_gxy_gyy_row_f(float *gxx,float *gxy,float *gyy,int chunk_width,
2178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                 float *Ix0,float *Ix1,float *Ix2,float *Ix3,float *Ix4,
2188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                 float *Iy0,float *Iy1,float *Iy2,float *Iy3,float *Iy4)
2198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
2208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int c;
2218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float dx,dy;
2228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float Ixx0,Ixy0,Iyy0,Ixx1,Ixy1,Iyy1,Ixx2,Ixy2,Iyy2,Ixx3,Ixy3,Iyy3,Ixx4,Ixy4,Iyy4;
2238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
2248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(c=0;c<chunk_width;c++)
2258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
2268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dx=Ix0[c];
2278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dy=Iy0[c];
2288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ixx0=dx*dx;
2298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ixy0=dx*dy;
2308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Iyy0=dy*dy;
2318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
2328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dx=Ix1[c];
2338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dy=Iy1[c];
2348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ixx1=dx*dx;
2358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ixy1=dx*dy;
2368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Iyy1=dy*dy;
2378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
2388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dx=Ix2[c];
2398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dy=Iy2[c];
2408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ixx2=dx*dx;
2418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ixy2=dx*dy;
2428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Iyy2=dy*dy;
2438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
2448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dx=Ix3[c];
2458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dy=Iy3[c];
2468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ixx3=dx*dx;
2478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ixy3=dx*dy;
2488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Iyy3=dy*dy;
2498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
2508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dx=Ix4[c];
2518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dy=Iy4[c];
2528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ixx4=dx*dx;
2538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ixy4=dx*dy;
2548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Iyy4=dy*dy;
2558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
2568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Filter vertically*/
2578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        gxx[c]=Ixx0+Ixx1*4.0f+Ixx2*6.0f+Ixx3*4.0f+Ixx4;
2588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        gxy[c]=Ixy0+Ixy1*4.0f+Ixy2*6.0f+Ixy3*4.0f+Ixy4;
2598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        gyy[c]=Iyy0+Iyy1*4.0f+Iyy2*6.0f+Iyy3*4.0f+Iyy4;
2608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
2618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
2628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
2638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Filter vertically five rows of derivatives of length 128 into gxx,gxy,gyy*/
2648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_gxx_gxy_gyy_row_s(int *g,int *d0,int *d1,int *d2,int *d3,int *d4,int nc)
2658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
2668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_MMX
2678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int c;
2688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
2698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    _asm
2708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
2718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov c,64
2728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov eax,d0
2738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov ebx,d1
2748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov ecx,d2
2758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov edx,d3
2768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov edi,d4
2778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov esi,g
2788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
2798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingloopstart:
2808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /***************dxx part 1-14*********************************/
2818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm0,[eax]      /*1 Get dxx0*/
2828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
2838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm1,[ebx]      /*2 Get dxx1*/
2848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
2858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm2,[ecx]      /*5 Get dxx2*/
2868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm1,2          /*3 Shift dxx1*/
2878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm3,[edx]      /*10 Get dxx3*/
2888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         paddd      mm0,mm1        /*4 Accumulate dxx1*/
2898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm4,[eax+512]  /*15 Get dxy0*/
2908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm2,1          /*6 Shift dxx2 1*/
2918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm0,mm2        /*7 Accumulate dxx2 1*/
2928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm2,1          /*8 Shift dxx2 2*/
2938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm5,[ebx+512]  /*16 Get dxy1*/
2948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         paddd      mm0,mm2        /*9 Accumulate dxx2 2*/
2958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pslld       mm3,2          /*11 Shift dxx3*/
2968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
2978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm0,mm3        /*12 Accumulate dxx3*/
2988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm5,2          /*17 Shift dxy1*/
2998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm0,[edi]      /*13 Accumulate dxx4*/
3008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         paddd      mm4,mm5        /*18 Accumulate dxy1*/
3018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm6,[ecx+512]  /*19 Get dxy2*/
3028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
3038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        [esi],mm0      /*14 Store dxx sums*/
3048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /***************dxy part 15-28*********************************/
3058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm6,1          /*20 Shift dxy2 1*/
3068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm4,mm6        /*21 Accumulate dxy2 1*/
3078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm6,1          /*22 Shift dxy2 2*/
3088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm0,[eax+1024] /*29 Get dyy0*/
3098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         paddd      mm4,mm6        /*23 Accumulate dxy2 2*/
3108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm7,[edx+512]  /*24 Get dxy3*/
3118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm7,2          /*25 Shift dxy3*/
3128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm1,[ebx+1024] /*30 Get dyy1*/
3138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         paddd      mm4,mm7        /*26 Accumulate dxy3*/
3148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm4,[edi+512]  /*27 Accumulate dxy4*/
3158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm1,2          /*31 Shift dyy1*/
3168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm2,[ecx+1024] /*33 Get dyy2*/
3178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         paddd      mm0,mm1        /*32 Accumulate dyy1*/
3188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        [esi+512],mm4  /*28 Store dxy sums*/
3198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm2,1          /*34 Shift dyy2 1*/
3208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /***************dyy part 29-49*********************************/
3218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm3,[edx+1024] /*38 Get dyy3*/
3248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         paddd      mm0,mm2        /*35 Accumulate dyy2 1*/
3258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm0,[edi+1024] /*41 Accumulate dyy4*/
3268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm2,1          /*36 Shift dyy2 2*/
3278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm0,mm2        /*37 Accumulate dyy2 2*/
3288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm3,2          /*39 Shift dyy3*/
3298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm0,mm3        /*40 Accumulate dyy3*/
3308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         add        eax,8           /*43*/
3318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        add         ebx,8           /*44*/
3328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         add        ecx,8           /*45*/
3338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        [esi+1024],mm0 /*42 Store dyy sums*/
3348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
3358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        add         edx,8           /*46*/
3368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         add        edi,8           /*47*/
3378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        add         esi,8           /*48*/
3388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         dec        c               /*49*/
3398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        jnz         loopstart
3408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        emms
3428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
3438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else
3458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int c,dd;
3468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(c=0;c<nc;c++)
3488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
3498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Filter vertically*/
3508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dd=d2[c];
3518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        g[c]=d0[c]+(d1[c]<<2)+(dd<<2)+(dd<<1)+(d3[c]<<2)+d4[c];
3528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dd=d2[c+128];
3548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        g[c+128]=d0[c+128]+(d1[c+128]<<2)+(dd<<2)+(dd<<1)+(d3[c+128]<<2)+d4[c+128];
3558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dd=d2[c+256];
3578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        g[c+256]=d0[c+256]+(d1[c+256]<<2)+(dd<<2)+(dd<<1)+(d3[c+256]<<2)+d4[c+256];
3588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
3598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_MMX*/
3608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
3618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Filter horizontally the three rows gxx,gxy,gyy into the strength subrow starting at i,j
3638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingand with width chunk_width. gxx,gxy and gyy are assumed to be four pixels wider than chunk_width
3648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingand starting at (i,j-2)*/
3658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_HarrisStrength_row_f(float **s,float *gxx,float *gxy,float *gyy,int i,int j,int chunk_width)
3668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
3678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float Gxx,Gxy,Gyy,det,trc;
3688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int c;
3698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(c=0;c<chunk_width;c++)
3718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
3728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Gxx=gxx[c]+gxx[c+1]*4.0f+gxx[c+2]*6.0f+gxx[c+3]*4.0f+gxx[c+4];
3738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Gxy=gxy[c]+gxy[c+1]*4.0f+gxy[c+2]*6.0f+gxy[c+3]*4.0f+gxy[c+4];
3748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Gyy=gyy[c]+gyy[c+1]*4.0f+gyy[c+2]*6.0f+gyy[c+3]*4.0f+gyy[c+4];
3758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        det=Gxx*Gyy-Gxy*Gxy;
3778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        trc=Gxx+Gyy;
3788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        s[i][j+c]=det-0.06f*trc*trc;
3798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
3808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
3818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Filter g of length 128 in place with 14641. Output is shifted two steps
3838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingand of length 124*/
3848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_Filter14641_128_i(int *g,int nc)
3858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
3868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_MMX
3878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int mask;
3888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    mask=0xFFFFFFFF;
3908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    _asm
3918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
3928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov esi,31
3938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov eax,g
3948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Get bitmask 00000000FFFFFFFF into mm7*/
3968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movd mm7,mask
3978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
3988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Warming iteration one 1-16********************/
3998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm6,[eax]      /*1 Load new data*/
4008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd      mm0,mm6        /*2 Add 1* behind two steps*/
4018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm2,mm6        /*3 Start with 1* in front two steps*/
4028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pslld      mm6,1          /*4*/
4038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd      mm1,mm6        /*5 Add 2* same place*/
4048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pslld      mm6,1          /*6*/
4058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd      mm1,mm6        /*7 Add 4* same place*/
4068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pshufw     mm6,mm6,4Eh    /*8 Swap the two double-words using bitmask 01001110=4Eh*/
4078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd      mm1,mm6        /*9 Add 4* swapped*/
4088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm5,mm6        /*10 Copy*/
4098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pand       mm6,mm7        /*11 Get low double-word only*/
4108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd      mm2,mm6        /*12 Add 4* in front one step*/
4118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pxor       mm6,mm5        /*13 Get high double-word only*/
4128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd      mm0,mm6        /*14 Add 4* behind one step*/
4138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm0,mm1        /*15 Shift along*/
4148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm1,mm2        /*16 Shift along*/
4158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Warming iteration two 17-32********************/
4168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm4,[eax+8]    /*17 Load new data*/
4178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd      mm0,mm4        /*18 Add 1* behind two steps*/
4188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm2,mm4        /*19 Start with 1* in front two steps*/
4198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pslld      mm4,1          /*20*/
4208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd      mm1,mm4        /*21 Add 2* same place*/
4218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pslld      mm4,1          /*22*/
4228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd      mm1,mm4        /*23 Add 4* same place*/
4238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pshufw     mm4,mm4,4Eh    /*24 Swap the two double-words using bitmask 01001110=4Eh*/
4248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd      mm1,mm4        /*25 Add 4* swapped*/
4258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm3,mm4        /*26 Copy*/
4268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pand       mm4,mm7        /*27 Get low double-word only*/
4278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd      mm2,mm4        /*28 Add 4* in front one step*/
4288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pxor       mm4,mm3        /*29 Get high double-word only*/
4298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd      mm0,mm4        /*30 Add 4* behind one step*/
4308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm0,mm1        /*31 Shift along*/
4318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq       mm1,mm2        /*32 Shift along*/
4328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
4338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Loop********************/
4348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingloopstart:
4358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*First part of loop 33-47********/
4368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm6,[eax+16]   /*33 Load new data*/
4378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
4388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm0,mm6        /*34 Add 1* behind two steps*/
4398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movq       mm2,mm6        /*35 Start with 1* in front two steps*/
4408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm4,[eax+24]   /*48 Load new data*/
4418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm6,1          /*36*/
4428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm1,mm6        /*37 Add 2* same place*/
4438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm6,1          /*38*/
4448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm1,mm6        /*39 Add 4* same place*/
4458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pshufw     mm6,mm6,4Eh    /*40 Swap the two double-words using bitmask 01001110=4Eh*/
4468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm1,mm4        /*49 Add 1* behind two steps*/
4478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movq       mm5,mm6        /*41 Copy*/
4488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm1,mm6        /*42 Add 4* swapped*/
4498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pand       mm6,mm7        /*43 Get low double-word only*/
4508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm2,mm6        /*44 Add 4* in front one step*/
4518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pxor       mm6,mm5        /*45 Get high double-word only*/
4528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm0,mm6        /*46 Add 4* behind one step*/
4538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movq       mm6,mm4        /*50a Copy*/
4548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pslld       mm4,1          /*51*/
4558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
4568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        [eax],mm0      /*47 Store result two steps behind*/
4578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Second part of loop 48-66********/
4588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movq       mm0,mm6        /*50b Start with 1* in front two steps*/
4598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm2,mm4        /*52 Add 2* same place*/
4608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pslld      mm4,1          /*53*/
4618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm2,mm4        /*54 Add 4* same place*/
4628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pshufw     mm4,mm4,4Eh    /*55 Swap the two double-words using bitmask 01001110=4Eh*/
4638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm2,mm4        /*56 Add 4* swapped*/
4648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movq       mm3,mm4        /*57 Copy*/
4658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        pand        mm4,mm7        /*58 Get low double-word only*/
4668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
4678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm0,mm4        /*59 Add 4* in front one step*/
4688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         pxor       mm4,mm3        /*60 Get high double-word only*/
4698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        paddd       mm1,mm4        /*61 Add 4* behind one step*/
4708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         add        eax,16         /*65*/
4718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dec         esi            /*66*/
4728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
4738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        [eax-8],mm1    /*62 Store result two steps behind*/
4748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movq       mm1,mm0        /*63 Shift along*/
4758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movq        mm0,mm2        /*64 Shift along*/
4768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        jnz loopstart
4778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
4788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        emms
4798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
4808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
4818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else
4828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int c;
4838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
4848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(c=0;c<nc-4;c++)
4858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
4868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        g[c]=g[c]+(g[c+1]<<2)+(g[c+2]<<2)+(g[c+2]<<1)+(g[c+3]<<2)+g[c+4];
4878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
4888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_MMX*/
4898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
4908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
4918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Filter horizontally the three rows gxx,gxy,gyy of length 128 into the strength subrow s
4928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingof length 124. gxx,gxy and gyy are assumed to be starting at (i,j-2) if s[i][j] is sought.
4938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlings should be 16 byte aligned*/
4948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_HarrisStrength_row_s(float *s,int *gxx,int *gxy,int *gyy,int nc)
4958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
4968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float k;
4978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
4988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    k=0.06f;
4998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
5008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    db_Filter14641_128_i(gxx,nc);
5018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    db_Filter14641_128_i(gxy,nc);
5028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    db_Filter14641_128_i(gyy,nc);
5038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
5048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD
5058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
5068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
5078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    _asm
5088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
5098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov esi,15
5108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov eax,gxx
5118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov ebx,gxy
5128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov ecx,gyy
5138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov edx,s
5148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
5158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*broadcast k to all positions of xmm7*/
5168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movss   xmm7,k
5178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        shufps  xmm7,xmm7,0
5188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
5198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*****Warm up 1-10**************************************/
5208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm0,[eax+8] /*1 Convert two integers into floating point of low double-word*/
5218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
5228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm1,[ebx+8] /*4 Convert two integers into floating point of low double-word*/
5238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movlhps  xmm0,xmm0    /*2 Move them to the high double-word*/
5248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm2,[ecx+8] /*7 Convert two integers into floating point of low double-word*/
5258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movlhps  xmm1,xmm1    /*5 Move them to the high double-word*/
5268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm0,[eax]   /*3 Convert two integers into floating point of low double-word*/
5278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movlhps  xmm2,xmm2    /*8 Move them to the high double-word*/
5288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm1,[ebx]   /*6 Convert two integers into floating point of low double-word*/
5298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movaps   xmm3,xmm0    /*10 Copy Cxx*/
5308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm2,[ecx]   /*9 Convert two integers into floating point of low double-word*/
5318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
5328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingloopstart:
5338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*****First part of loop 11-18***********************/
5348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mulps     xmm0,xmm2     /*11 Multiply to get Gxx*Gyy*/
5358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         addps    xmm2,xmm3     /*12 Add to get Gxx+Gyy*/
5368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm4,[eax+24] /*19 Convert two integers into floating point of low double-word*/
5378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         mulps    xmm1,xmm1     /*13 Multiply to get Gxy*Gxy*/
5388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mulps     xmm2,xmm2     /*14 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
5398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movlhps  xmm4,xmm4     /*20 Move them to the high double-word*/
5408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm4,[eax+16] /*21 Convert two integers into floating point of low double-word*/
5418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
5428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        subps     xmm0,xmm1     /*15 Subtract to get Gxx*Gyy-Gxy*Gxy*/
5438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         mulps    xmm2,xmm7     /*16 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
5448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm5,[ebx+24] /*22 Convert two integers into floating point of low double-word*/
5458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
5468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movlhps   xmm5,xmm5     /*23 Move them to the high double-word*/
5478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
5488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm5,[ebx+16] /*24 Convert two integers into floating point of low double-word*/
5498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         subps    xmm0,xmm2     /*17 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
5508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm6,[ecx+24] /*25 Convert two integers into floating point of low double-word*/
5518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
5528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps    [edx],xmm0    /*18 Store*/
5538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*****Second part of loop 26-40***********************/
5548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movlhps  xmm6,xmm6     /*26 Move them to the high double-word*/
5558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm6,[ecx+16] /*27 Convert two integers into floating point of low double-word*/
5568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movaps   xmm3,xmm4     /*28 Copy Cxx*/
5578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mulps     xmm4,xmm6     /*29 Multiply to get Gxx*Gyy*/
5588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         addps    xmm6,xmm3     /*30 Add to get Gxx+Gyy*/
5598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm0,[eax+40] /*(1 Next) Convert two integers into floating point of low double-word*/
5608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         mulps    xmm5,xmm5     /*31 Multiply to get Gxy*Gxy*/
5618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm1,[ebx+40] /*(4 Next) Convert two integers into floating point of low double-word*/
5628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         mulps    xmm6,xmm6     /*32 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
5638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm2,[ecx+40] /*(7 Next) Convert two integers into floating point of low double-word*/
5648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movlhps  xmm0,xmm0     /*(2 Next) Move them to the high double-word*/
5658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        subps     xmm4,xmm5     /*33 Subtract to get Gxx*Gyy-Gxy*Gxy*/
5668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movlhps  xmm1,xmm1     /*(5 Next) Move them to the high double-word*/
5678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm0,[eax+32] /*(3 Next)Convert two integers into floating point of low double-word*/
5688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         mulps    xmm6,xmm7     /*34 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
5698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm1,[ebx+32] /*(6 Next) Convert two integers into floating point of low double-word*/
5708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         movlhps  xmm2,xmm2     /*(8 Next) Move them to the high double-word*/
5718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps    xmm3,xmm0     /*(10 Next) Copy Cxx*/
5728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         add      eax,32        /*37*/
5738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        subps     xmm4,xmm6     /*35 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
5748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         add      ebx,32        /*38*/
5758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cvtpi2ps  xmm2,[ecx+32] /*(9 Next) Convert two integers into floating point of low double-word*/
5768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
5778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps    [edx+16],xmm4 /*36 Store*/
5788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         /*Stall*/
5798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        add       ecx,32        /*39*/
5808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling         add      edx,32        /*40*/
5818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        dec       esi           /*41*/
5828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        jnz loopstart
5838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
5848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /****Cool down***************/
5858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mulps    xmm0,xmm2    /*Multiply to get Gxx*Gyy*/
5868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        addps    xmm2,xmm3    /*Add to get Gxx+Gyy*/
5878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mulps    xmm1,xmm1    /*Multiply to get Gxy*Gxy*/
5888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mulps    xmm2,xmm2    /*Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
5898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        subps    xmm0,xmm1    /*Subtract to get Gxx*Gyy-Gxy*Gxy*/
5908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mulps    xmm2,xmm7    /*Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
5918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        subps    xmm0,xmm2    /*Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
5928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps   [edx],xmm0   /*Store*/
5938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
5948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
5958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else
5968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float Gxx,Gxy,Gyy,det,trc;
5978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int c;
5988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
5998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    //for(c=0;c<124;c++)
6008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(c=0;c<nc-4;c++)
6018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
6028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Gxx=(float)gxx[c];
6038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Gxy=(float)gxy[c];
6048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Gyy=(float)gyy[c];
6058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        det=Gxx*Gyy-Gxy*Gxy;
6078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        trc=Gxx+Gyy;
6088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        s[c]=det-k*trc*trc;
6098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
6108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/
6118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
6128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute the Harris corner strength of the chunk [left,top,right,bottom] of img and
6148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingstore it into the corresponding region of s. left and top have to be at least 3 and
6158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingright and bottom have to be at most width-4,height-4*/
6168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_HarrisStrengthChunk_f(float **s,const float * const *img,int left,int top,int right,int bottom,
6178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                      /*temp should point to at least
6188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                      13*(right-left+5) of allocated memory*/
6198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                      float *temp)
6208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
6218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float *Ix[5],*Iy[5];
6228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float *gxx,*gxy,*gyy;
6238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int i,chunk_width,chunk_width_p4;
6248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    chunk_width=right-left+1;
6268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    chunk_width_p4=chunk_width+4;
6278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    gxx=temp;
6288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    gxy=gxx+chunk_width_p4;
6298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    gyy=gxy+chunk_width_p4;
6308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=0;i<5;i++)
6318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
6328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ix[i]=gyy+chunk_width_p4+(2*i*chunk_width_p4);
6338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Iy[i]=Ix[i]+chunk_width_p4;
6348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
6358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*Fill four rows of the wrap-around derivative buffers*/
6378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=top-2;i<top+2;i++) db_IxIyRow_f(Ix[i%5],Iy[i%5],img,i,left-2,chunk_width_p4);
6388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*For each output row*/
6408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=top;i<=bottom;i++)
6418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
6428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Step the derivative buffers*/
6438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_IxIyRow_f(Ix[(i+2)%5],Iy[(i+2)%5],img,(i+2),left-2,chunk_width_p4);
6448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
6468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_gxx_gxy_gyy_row_f(gxx,gxy,gyy,chunk_width_p4,
6478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                 Ix[(i-2)%5],Ix[(i-1)%5],Ix[i%5],Ix[(i+1)%5],Ix[(i+2)%5],
6488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                 Iy[(i-2)%5],Iy[(i-1)%5],Iy[i%5],Iy[(i+1)%5],Iy[(i+2)%5]);
6498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
6518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_HarrisStrength_row_f(s,gxx,gxy,gyy,i,left,chunk_width);
6528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
6538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
6548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute the Harris corner strength of the chunk [left,top,left+123,bottom] of img and
6568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingstore it into the corresponding region of s. left and top have to be at least 3 and
6578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingright and bottom have to be at most width-4,height-4. The left of the region in s should
6588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingbe 16 byte aligned*/
6598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_HarrisStrengthChunk_u(float **s,const unsigned char * const *img,int left,int top,int bottom,
6608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                      /*temp should point to at least
6618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                      18*128 of allocated memory*/
6628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                      int *temp, int nc)
6638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
6648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int *Ixx[5],*Ixy[5],*Iyy[5];
6658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int *gxx,*gxy,*gyy;
6668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int i;
6678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    gxx=temp;
6698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    gxy=gxx+128;
6708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    gyy=gxy+128;
6718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=0;i<5;i++)
6728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
6738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ixx[i]=gyy+(3*i+1)*128;
6748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Ixy[i]=gyy+(3*i+2)*128;
6758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        Iyy[i]=gyy+(3*i+3)*128;
6768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
6778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*Fill four rows of the wrap-around derivative buffers*/
6798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=top-2;i<top+2;i++) db_IxIyRow_u(Ixx[i%5],img,i,left-2,nc);
6808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*For each output row*/
6828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=top;i<=bottom;i++)
6838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
6848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Step the derivative buffers*/
6858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_IxIyRow_u(Ixx[(i+2)%5],img,(i+2),left-2,nc);
6868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
6888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_gxx_gxy_gyy_row_s(gxx,Ixx[(i-2)%5],Ixx[(i-1)%5],Ixx[i%5],Ixx[(i+1)%5],Ixx[(i+2)%5],nc);
6898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
6918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_HarrisStrength_row_s(s[i]+left,gxx,gxy,gyy,nc);
6928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
6938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
6958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
6968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute Harris corner strength of img. Strength is returned for the region
6978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingwith (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
6988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingsame place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
6998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingfor a meaningful result*/
7008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_HarrisStrength_f(float **s,const float * const *img,int w,int h,
7018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                    /*temp should point to at least
7028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                    13*(chunk_width+4) of allocated memory*/
7038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                    float *temp,
7048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                    int chunk_width)
7058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
7068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int x,next_x,last,right;
7078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
7088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    last=w-4;
7098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(x=3;x<=last;x=next_x)
7108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
7118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        next_x=x+chunk_width;
7128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        right=next_x-1;
7138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(right>last) right=last;
7148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Compute the Harris strength of a chunk*/
7158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_HarrisStrengthChunk_f(s,img,x,3,right,h-4,temp);
7168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
7178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
7188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
7198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute Harris corner strength of img. Strength is returned for the region
7208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingwith (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
7218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingsame place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
7228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingfor a meaningful result.Moreover, the image should be overallocated by 256 bytes.
7238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlings[i][3] should by 16 byte aligned for any i*/
7248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_HarrisStrength_u(float **s, const unsigned char * const *img,int w,int h,
7258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                    /*temp should point to at least
7268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                    18*128 of allocated memory*/
7278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                    int *temp)
7288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
7298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int x,next_x,last;
7308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int nc;
7318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
7328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    last=w-4;
7338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(x=3;x<=last;x=next_x)
7348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
7358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        next_x=x+124;
7368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
7378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        // mayban: to revert to the original full chunks state, change the line below to: nc = 128;
7388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        nc = db_mini(128,last-x+1);
7398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        //nc = 128;
7408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
7418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Compute the Harris strength of a chunk*/
7428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_HarrisStrengthChunk_u(s,img,x,3,h-4,temp,nc);
7438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
7448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
7458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
7468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline float db_Max_128Aligned16_f(float *v)
7478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
7488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD
7498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float back;
7508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
7518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    _asm
7528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
7538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov eax,v
7548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
7558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk1*/
7568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm0,[eax]
7578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm1,[eax+16]
7588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm2,[eax+32]
7598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm3,[eax+48]
7608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm4,[eax+64]
7618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm5,[eax+80]
7628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm6,[eax+96]
7638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm7,[eax+112]
7648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
7658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk2*/
7668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm0,[eax+128]
7678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm1,[eax+144]
7688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm2,[eax+160]
7698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm3,[eax+176]
7708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm4,[eax+192]
7718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm5,[eax+208]
7728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm6,[eax+224]
7738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm7,[eax+240]
7748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
7758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk3*/
7768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm0,[eax+256]
7778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm1,[eax+272]
7788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm2,[eax+288]
7798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm3,[eax+304]
7808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm4,[eax+320]
7818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm5,[eax+336]
7828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm6,[eax+352]
7838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm7,[eax+368]
7848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
7858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk4*/
7868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm0,[eax+384]
7878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm1,[eax+400]
7888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm2,[eax+416]
7898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm3,[eax+432]
7908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm4,[eax+448]
7918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm5,[eax+464]
7928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm6,[eax+480]
7938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm7,[eax+496]
7948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
7958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Collect*/
7968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
7978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm2,xmm3
7988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm4,xmm5
7998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm6,xmm7
8008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm2
8018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm4,xmm6
8028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm4
8038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movhlps xmm1,xmm0
8048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
8058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        shufps  xmm1,xmm0,1
8068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
8078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movss   back,xmm0
8088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
8098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
8108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(back);
8118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else
8128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float val,max_val;
8138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float *p,*stop_p;
8148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    max_val=v[0];
8158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(p=v+1,stop_p=v+128;p!=stop_p;)
8168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
8178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        val= *p++;
8188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(val>max_val) max_val=val;
8198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
8208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(max_val);
8218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/
8228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
8238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
8248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline float db_Max_64Aligned16_f(float *v)
8258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
8268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD
8278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float back;
8288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
8298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    _asm
8308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
8318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov eax,v
8328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
8338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk1*/
8348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm0,[eax]
8358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm1,[eax+16]
8368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm2,[eax+32]
8378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm3,[eax+48]
8388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm4,[eax+64]
8398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm5,[eax+80]
8408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm6,[eax+96]
8418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm7,[eax+112]
8428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
8438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk2*/
8448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm0,[eax+128]
8458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm1,[eax+144]
8468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm2,[eax+160]
8478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm3,[eax+176]
8488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm4,[eax+192]
8498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm5,[eax+208]
8508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm6,[eax+224]
8518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps xmm7,[eax+240]
8528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
8538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Collect*/
8548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
8558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm2,xmm3
8568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm4,xmm5
8578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm6,xmm7
8588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm2
8598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm4,xmm6
8608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm4
8618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movhlps xmm1,xmm0
8628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
8638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        shufps  xmm1,xmm0,1
8648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
8658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movss   back,xmm0
8668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
8678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
8688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(back);
8698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else
8708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float val,max_val;
8718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float *p,*stop_p;
8728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    max_val=v[0];
8738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(p=v+1,stop_p=v+64;p!=stop_p;)
8748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
8758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        val= *p++;
8768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(val>max_val) max_val=val;
8778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
8788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(max_val);
8798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/
8808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
8818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
8828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline float db_Max_32Aligned16_f(float *v)
8838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
8848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD
8858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float back;
8868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
8878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    _asm
8888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
8898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov eax,v
8908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
8918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk1*/
8928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm0,[eax]
8938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm1,[eax+16]
8948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm2,[eax+32]
8958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm3,[eax+48]
8968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm4,[eax+64]
8978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm5,[eax+80]
8988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm6,[eax+96]
8998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm7,[eax+112]
9008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
9018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Collect*/
9028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
9038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm2,xmm3
9048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm4,xmm5
9058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm6,xmm7
9068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm2
9078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm4,xmm6
9088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm4
9098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movhlps xmm1,xmm0
9108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
9118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        shufps  xmm1,xmm0,1
9128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
9138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movss   back,xmm0
9148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
9158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
9168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(back);
9178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else
9188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float val,max_val;
9198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float *p,*stop_p;
9208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    max_val=v[0];
9218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(p=v+1,stop_p=v+32;p!=stop_p;)
9228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
9238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        val= *p++;
9248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(val>max_val) max_val=val;
9258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
9268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(max_val);
9278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/
9288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
9298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
9308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline float db_Max_16Aligned16_f(float *v)
9318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
9328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD
9338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float back;
9348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
9358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    _asm
9368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
9378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov eax,v
9388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
9398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk1*/
9408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm0,[eax]
9418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm1,[eax+16]
9428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm2,[eax+32]
9438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm3,[eax+48]
9448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
9458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Collect*/
9468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
9478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm2,xmm3
9488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm2
9498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movhlps xmm1,xmm0
9508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
9518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        shufps  xmm1,xmm0,1
9528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
9538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movss   back,xmm0
9548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
9558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
9568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(back);
9578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else
9588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float val,max_val;
9598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float *p,*stop_p;
9608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    max_val=v[0];
9618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(p=v+1,stop_p=v+16;p!=stop_p;)
9628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
9638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        val= *p++;
9648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(val>max_val) max_val=val;
9658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
9668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(max_val);
9678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/
9688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
9698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
9708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline float db_Max_8Aligned16_f(float *v)
9718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
9728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD
9738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float back;
9748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
9758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    _asm
9768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
9778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov eax,v
9788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
9798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk1*/
9808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm0,[eax]
9818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm1,[eax+16]
9828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
9838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Collect*/
9848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
9858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movhlps xmm1,xmm0
9868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
9878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        shufps  xmm1,xmm0,1
9888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps   xmm0,xmm1
9898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movss   back,xmm0
9908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
9918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
9928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(back);
9938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else
9948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float val,max_val;
9958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float *p,*stop_p;
9968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    max_val=v[0];
9978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(p=v+1,stop_p=v+8;p!=stop_p;)
9988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
9998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        val= *p++;
10008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(val>max_val) max_val=val;
10018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
10028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(max_val);
10038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/
10048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
10058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
10068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline float db_Max_Aligned16_f(float *v,int size)
10078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
10088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float val,max_val;
10098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float *stop_v;
10108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
10118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    max_val=v[0];
10128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(;size>=128;size-=128)
10138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
10148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        val=db_Max_128Aligned16_f(v);
10158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        v+=128;
10168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(val>max_val) max_val=val;
10178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
10188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if(size&64)
10198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
10208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        val=db_Max_64Aligned16_f(v);
10218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        v+=64;
10228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(val>max_val) max_val=val;
10238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
10248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if(size&32)
10258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
10268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        val=db_Max_32Aligned16_f(v);
10278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        v+=32;
10288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(val>max_val) max_val=val;
10298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
10308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if(size&16)
10318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
10328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        val=db_Max_16Aligned16_f(v);
10338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        v+=16;
10348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(val>max_val) max_val=val;
10358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
10368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if(size&8)
10378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
10388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        val=db_Max_8Aligned16_f(v);
10398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        v+=8;
10408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(val>max_val) max_val=val;
10418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
10428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if(size&7)
10438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
10448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        for(stop_v=v+(size&7);v!=stop_v;)
10458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        {
10468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            val= *v++;
10478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            if(val>max_val) max_val=val;
10488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        }
10498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
10508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
10518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(max_val);
10528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
10538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
10548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Find maximum value of img in the region starting at (left,top)
10558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingand with width w and height h. img[left] should be 16 byte aligned*/
10568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingfloat db_MaxImage_Aligned16_f(float **img,int left,int top,int w,int h)
10578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
10588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float val,max_val;
10598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int i,stop_i;
10608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
10618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if(w && h)
10628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
10638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        stop_i=top+h;
10648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        max_val=img[top][left];
10658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
10668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        for(i=top;i<stop_i;i++)
10678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        {
10688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            val=db_Max_Aligned16_f(img[i]+left,w);
10698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            if(val>max_val) max_val=val;
10708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        }
10718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        return(max_val);
10728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
10738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(0.0);
10748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
10758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
10768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_MaxVector_128_Aligned16_f(float *m,float *v1,float *v2)
10778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
10788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD
10798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    _asm
10808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
10818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov eax,v1
10828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov ebx,v2
10838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov ecx,m
10848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
10858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk1*/
10868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm0,[eax]
10878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm1,[eax+16]
10888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm2,[eax+32]
10898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm3,[eax+48]
10908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm4,[eax+64]
10918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm5,[eax+80]
10928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm6,[eax+96]
10938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm7,[eax+112]
10948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm0,[ebx]
10958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm1,[ebx+16]
10968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm2,[ebx+32]
10978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm3,[ebx+48]
10988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm4,[ebx+64]
10998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm5,[ebx+80]
11008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm6,[ebx+96]
11018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm7,[ebx+112]
11028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx],xmm0
11038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+16],xmm1
11048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+32],xmm2
11058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+48],xmm3
11068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+64],xmm4
11078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+80],xmm5
11088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+96],xmm6
11098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+112],xmm7
11108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
11118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk2*/
11128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm0,[eax+128]
11138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm1,[eax+144]
11148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm2,[eax+160]
11158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm3,[eax+176]
11168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm4,[eax+192]
11178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm5,[eax+208]
11188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm6,[eax+224]
11198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm7,[eax+240]
11208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm0,[ebx+128]
11218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm1,[ebx+144]
11228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm2,[ebx+160]
11238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm3,[ebx+176]
11248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm4,[ebx+192]
11258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm5,[ebx+208]
11268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm6,[ebx+224]
11278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm7,[ebx+240]
11288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+128],xmm0
11298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+144],xmm1
11308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+160],xmm2
11318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+176],xmm3
11328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+192],xmm4
11338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+208],xmm5
11348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+224],xmm6
11358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+240],xmm7
11368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
11378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk3*/
11388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm0,[eax+256]
11398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm1,[eax+272]
11408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm2,[eax+288]
11418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm3,[eax+304]
11428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm4,[eax+320]
11438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm5,[eax+336]
11448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm6,[eax+352]
11458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm7,[eax+368]
11468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm0,[ebx+256]
11478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm1,[ebx+272]
11488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm2,[ebx+288]
11498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm3,[ebx+304]
11508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm4,[ebx+320]
11518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm5,[ebx+336]
11528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm6,[ebx+352]
11538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm7,[ebx+368]
11548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+256],xmm0
11558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+272],xmm1
11568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+288],xmm2
11578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+304],xmm3
11588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+320],xmm4
11598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+336],xmm5
11608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+352],xmm6
11618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+368],xmm7
11628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
11638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk4*/
11648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm0,[eax+384]
11658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm1,[eax+400]
11668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm2,[eax+416]
11678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm3,[eax+432]
11688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm4,[eax+448]
11698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm5,[eax+464]
11708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm6,[eax+480]
11718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps xmm7,[eax+496]
11728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm0,[ebx+384]
11738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm1,[ebx+400]
11748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm2,[ebx+416]
11758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm3,[ebx+432]
11768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm4,[ebx+448]
11778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm5,[ebx+464]
11788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm6,[ebx+480]
11798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm7,[ebx+496]
11808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+384],xmm0
11818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+400],xmm1
11828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+416],xmm2
11838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+432],xmm3
11848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+448],xmm4
11858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+464],xmm5
11868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+480],xmm6
11878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+496],xmm7
11888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
11898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else
11908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int i;
11918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float a,b;
11928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=0;i<128;i++)
11938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
11948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        a=v1[i];
11958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        b=v2[i];
11968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(a>=b) m[i]=a;
11978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        else m[i]=b;
11988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
11998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/
12008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
12018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
12028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_MaxVector_128_SecondSourceDestAligned16_f(float *m,float *v1,float *v2)
12038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
12048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD
12058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    _asm
12068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
12078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov eax,v1
12088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov ebx,v2
12098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        mov ecx,m
12108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
12118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk1*/
12128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm0,[eax]
12138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm1,[eax+16]
12148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm2,[eax+32]
12158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm3,[eax+48]
12168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm4,[eax+64]
12178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm5,[eax+80]
12188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm6,[eax+96]
12198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm7,[eax+112]
12208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm0,[ebx]
12218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm1,[ebx+16]
12228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm2,[ebx+32]
12238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm3,[ebx+48]
12248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm4,[ebx+64]
12258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm5,[ebx+80]
12268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm6,[ebx+96]
12278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm7,[ebx+112]
12288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx],xmm0
12298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+16],xmm1
12308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+32],xmm2
12318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+48],xmm3
12328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+64],xmm4
12338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+80],xmm5
12348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+96],xmm6
12358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+112],xmm7
12368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
12378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk2*/
12388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm0,[eax+128]
12398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm1,[eax+144]
12408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm2,[eax+160]
12418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm3,[eax+176]
12428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm4,[eax+192]
12438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm5,[eax+208]
12448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm6,[eax+224]
12458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm7,[eax+240]
12468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm0,[ebx+128]
12478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm1,[ebx+144]
12488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm2,[ebx+160]
12498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm3,[ebx+176]
12508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm4,[ebx+192]
12518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm5,[ebx+208]
12528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm6,[ebx+224]
12538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm7,[ebx+240]
12548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+128],xmm0
12558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+144],xmm1
12568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+160],xmm2
12578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+176],xmm3
12588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+192],xmm4
12598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+208],xmm5
12608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+224],xmm6
12618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+240],xmm7
12628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
12638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk3*/
12648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm0,[eax+256]
12658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm1,[eax+272]
12668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm2,[eax+288]
12678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm3,[eax+304]
12688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm4,[eax+320]
12698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm5,[eax+336]
12708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm6,[eax+352]
12718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm7,[eax+368]
12728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm0,[ebx+256]
12738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm1,[ebx+272]
12748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm2,[ebx+288]
12758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm3,[ebx+304]
12768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm4,[ebx+320]
12778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm5,[ebx+336]
12788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm6,[ebx+352]
12798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm7,[ebx+368]
12808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+256],xmm0
12818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+272],xmm1
12828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+288],xmm2
12838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+304],xmm3
12848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+320],xmm4
12858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+336],xmm5
12868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+352],xmm6
12878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+368],xmm7
12888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
12898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Chunk4*/
12908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm0,[eax+384]
12918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm1,[eax+400]
12928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm2,[eax+416]
12938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm3,[eax+432]
12948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm4,[eax+448]
12958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm5,[eax+464]
12968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm6,[eax+480]
12978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movups xmm7,[eax+496]
12988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm0,[ebx+384]
12998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm1,[ebx+400]
13008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm2,[ebx+416]
13018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm3,[ebx+432]
13028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm4,[ebx+448]
13038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm5,[ebx+464]
13048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm6,[ebx+480]
13058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        maxps  xmm7,[ebx+496]
13068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+384],xmm0
13078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+400],xmm1
13088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+416],xmm2
13098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+432],xmm3
13108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+448],xmm4
13118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+464],xmm5
13128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+480],xmm6
13138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        movaps [ecx+496],xmm7
13148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
13158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else
13168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int i;
13178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float a,b;
13188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=0;i<128;i++)
13198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
13208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        a=v1[i];
13218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        b=v2[i];
13228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(a>=b) m[i]=a;
13238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        else m[i]=b;
13248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
13258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/
13268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
13278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
13288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top), of width 124 and
13298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingstopping at bottom. The output is shifted two steps left and overwrites 128 elements for each row.
13308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha HaeberlingThe input s should be of width at least 128, and exist for 2 pixels outside the specified region.
13318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlings[i][left-2] and sf[i][left-2] should be 16 byte aligned. Top must be at least 3*/
13328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_MaxSuppressFilterChunk_5x5_Aligned16_f(float **sf,float **s,int left,int top,int bottom,
13338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                      /*temp should point to at least
13348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                      6*132 floats of 16-byte-aligned allocated memory*/
13358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                      float *temp)
13368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
13378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_USE_SIMD
13388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int i,lm2;
13398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float *two[4];
13408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float *four,*five;
13418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
13428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    lm2=left-2;
13438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
13448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*Set pointers to pre-allocated memory*/
13458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    four=temp;
13468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    five=four+132;
13478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=0;i<4;i++)
13488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
13498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        two[i]=five+(i+1)*132;
13508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
13518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
13528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*Set rests of four and five to zero to avoid
13538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    floating point exceptions*/
13548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=129;i<132;i++)
13558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
13568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        four[i]=0.0;
13578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        five[i]=0.0;
13588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
13598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
13608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*Fill three rows of the wrap-around max buffers*/
13618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=top-3;i<top;i++) db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
13628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
13638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    /*For each output row*/
13648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(;i<=bottom;i++)
13658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
13668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Compute max of the lowest pair of rows in the five row window*/
13678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
13688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Compute max of the lowest and highest pair of rows in the five row window*/
13698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_MaxVector_128_Aligned16_f(four,two[i&3],two[(i-3)&3]);
13708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Compute max of all rows*/
13718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_MaxVector_128_Aligned16_f(five,four,two[(i-1)&3]);
13728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Compute max of 2x5 chunks*/
13738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_MaxVector_128_SecondSourceDestAligned16_f(five,five+1,five);
13748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Compute max of pairs of 2x5 chunks*/
13758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_MaxVector_128_SecondSourceDestAligned16_f(five,five+3,five);
13768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        /*Compute max of pairs of 5x5 except middle*/
13778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_MaxVector_128_SecondSourceDestAligned16_f(sf[i]+lm2,four+2,five);
13788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
13798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
13808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#else
13818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int i,j,right;
13828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float sv;
13838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
13848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    right=left+128;
13858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=top;i<=bottom;i++) for(j=left;j<right;j++)
13868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
13878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        sv=s[i][j];
13888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
13898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if( sv>s[i-2][j-2] && sv>s[i-2][j-1] && sv>s[i-2][j] && sv>s[i-2][j+1] && sv>s[i-2][j+2] &&
13908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            sv>s[i-1][j-2] && sv>s[i-1][j-1] && sv>s[i-1][j] && sv>s[i-1][j+1] && sv>s[i-1][j+2] &&
13918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            sv>s[  i][j-2] && sv>s[  i][j-1] &&                 sv>s[  i][j+1] && sv>s[  i][j+2] &&
13928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            sv>s[i+1][j-2] && sv>s[i+1][j-1] && sv>s[i+1][j] && sv>s[i+1][j+1] && sv>s[i+1][j+2] &&
13938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            sv>s[i+2][j-2] && sv>s[i+2][j-1] && sv>s[i+2][j] && sv>s[i+2][j+1] && sv>s[i+2][j+2])
13948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        {
13958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            sf[i][j-2]=0.0;
13968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        }
13978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        else sf[i][j-2]=sv;
13988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
13998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_USE_SIMD*/
14008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
14018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top) and
14038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingstopping at bottom. The output is shifted two steps left. The input s should exist for 2 pixels
14048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingoutside the specified region. s[i][left-2] and sf[i][left-2] should be 16 byte aligned.
14058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha HaeberlingTop must be at least 3. Reading and writing from and to the input and output images is done
14068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingas if the region had a width equal to a multiple of 124. If this is not the case, the images
14078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingshould be over-allocated and the input cleared for a sufficient region*/
14088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_MaxSuppressFilter_5x5_Aligned16_f(float **sf,float **s,int left,int top,int right,int bottom,
14098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                          /*temp should point to at least
14108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                          6*132 floats of 16-byte-aligned allocated memory*/
14118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                          float *temp)
14128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
14138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int x,next_x;
14148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(x=left;x<=right;x=next_x)
14168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
14178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        next_x=x+124;
14188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_MaxSuppressFilterChunk_5x5_Aligned16_f(sf,s,x,top,bottom,temp);
14198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
14208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
14218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Extract corners from the chunk (left,top) to (right,bottom). Store in x_temp,y_temp and s_temp
14238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingwhich should point to space of at least as many positions as there are pixels in the chunk*/
14248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline int db_CornersFromChunk(float **strength,int left,int top,int right,int bottom,float threshold,double *x_temp,double *y_temp,double *s_temp)
14258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
14268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int i,j,nr;
14278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float s;
14288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    nr=0;
14308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(i=top;i<=bottom;i++) for(j=left;j<=right;j++)
14318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
14328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        s=strength[i][j];
14338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(s>=threshold &&
14358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            s>strength[i-2][j-2] && s>strength[i-2][j-1] && s>strength[i-2][j] && s>strength[i-2][j+1] && s>strength[i-2][j+2] &&
14368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            s>strength[i-1][j-2] && s>strength[i-1][j-1] && s>strength[i-1][j] && s>strength[i-1][j+1] && s>strength[i-1][j+2] &&
14378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            s>strength[  i][j-2] && s>strength[  i][j-1] &&                       s>strength[  i][j+1] && s>strength[  i][j+2] &&
14388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            s>strength[i+1][j-2] && s>strength[i+1][j-1] && s>strength[i+1][j] && s>strength[i+1][j+1] && s>strength[i+1][j+2] &&
14398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            s>strength[i+2][j-2] && s>strength[i+2][j-1] && s>strength[i+2][j] && s>strength[i+2][j+1] && s>strength[i+2][j+2])
14408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        {
14418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            x_temp[nr]=(double) j;
14428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            y_temp[nr]=(double) i;
14438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            s_temp[nr]=(double) s;
14448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            nr++;
14458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        }
14468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
14478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(nr);
14488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
14498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling//Sub-pixel accuracy using 2D quadratic interpolation.(YCJ)
14528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlinginline void db_SubPixel(float **strength, const double xd, const double yd, double &xs, double &ys)
14538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
14548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int x = (int) xd;
14558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int y = (int) yd;
14568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float fxx = strength[y][x-1] - strength[y][x] - strength[y][x] + strength[y][x+1];
14588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float fyy = strength[y-1][x] - strength[y][x] - strength[y][x] + strength[y+1][x];
14598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float fxy = (strength[y-1][x-1] - strength[y-1][x+1] - strength[y+1][x-1] + strength[y+1][x+1])/(float)4.0;
14608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float denom = (fxx * fyy - fxy * fxy) * (float) 2.0;
14628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    xs = xd;
14648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    ys = yd;
14658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if ( db_absf(denom) <= FLT_EPSILON )
14678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
14688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        return;
14698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
14708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    else
14718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
14728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        float fx = strength[y][x+1] - strength[y][x-1];
14738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        float fy = strength[y+1][x] - strength[y-1][x];
14748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        float dx = (fyy * fx - fxy * fy) / denom;
14768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        float dy = (fxx * fy - fxy * fx) / denom;
14778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if ( db_absf(dx) > 1.0 || db_absf(dy) > 1.0 )
14798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        {
14808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            return;
14818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        }
14828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        else
14838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        {
14848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            xs -= dx;
14858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            ys -= dy;
14868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        }
14878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
14888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return;
14908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
14918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
14928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling/*Extract corners from the image part from (left,top) to (right,bottom).
14938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha HaeberlingStore in x and y, extracting at most satnr corners in each block of size (bw,bh).
14948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha HaeberlingThe pointer temp_d should point to at least 5*bw*bh positions.
14958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingarea_factor holds how many corners max to extract per 10000 pixels*/
14968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_ExtractCornersSaturated(float **strength,int left,int top,int right,int bottom,
14978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                int bw,int bh,unsigned long area_factor,
14988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                float threshold,double *temp_d,
14998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                double *x_coord,double *y_coord,int *nr_corners)
15008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
15018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    double *x_temp,*y_temp,*s_temp,*select_temp;
15028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    double loc_thresh;
15038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    unsigned long bwbh,area,saturation;
15048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int x,next_x,last_x;
15058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int y,next_y,last_y;
15068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int nr,nr_points,i,stop;
15078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
15088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    bwbh=bw*bh;
15098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    x_temp=temp_d;
15108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    y_temp=x_temp+bwbh;
15118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    s_temp=y_temp+bwbh;
15128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    select_temp=s_temp+bwbh;
15138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
15148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#ifdef DB_SUB_PIXEL
15158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    // subpixel processing may sometimes push the corner ourside the real border
15168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    // increasing border size:
15178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    left++;
15188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    top++;
15198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    bottom--;
15208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    right--;
15218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling#endif /*DB_SUB_PIXEL*/
15228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
15238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    nr_points=0;
15248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    for(y=top;y<=bottom;y=next_y)
15258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
15268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        next_y=y+bh;
15278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        last_y=next_y-1;
15288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        if(last_y>bottom) last_y=bottom;
15298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        for(x=left;x<=right;x=next_x)
15308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        {
15318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            next_x=x+bw;
15328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            last_x=next_x-1;
15338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            if(last_x>right) last_x=right;
15348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
15358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            area=(last_x-x+1)*(last_y-y+1);
15368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            saturation=(area*area_factor)/10000;
15378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            nr=db_CornersFromChunk(strength,x,y,last_x,last_y,threshold,x_temp,y_temp,s_temp);
15388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            if(nr)
15398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            {
15408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                if(((unsigned long)nr)>saturation) loc_thresh=db_LeanQuickSelect(s_temp,nr,nr-saturation,select_temp);
15418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                else loc_thresh=threshold;
15428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
15438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                stop=nr_points+saturation;
15448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                for(i=0;(i<nr)&&(nr_points<stop);i++)
15458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                {
15468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                    if(s_temp[i]>=loc_thresh)
15478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                    {
15488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                        #ifdef DB_SUB_PIXEL
15498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                               db_SubPixel(strength, x_temp[i], y_temp[i], x_coord[nr_points], y_coord[nr_points]);
15508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                        #else
15518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                               x_coord[nr_points]=x_temp[i];
15528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                               y_coord[nr_points]=y_temp[i];
15538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                        #endif
15548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
15558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                        nr_points++;
15568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                    }
15578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                }
15588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            }
15598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        }
15608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
15618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    *nr_corners=nr_points;
15628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
15638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
15648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingdb_CornerDetector_f::db_CornerDetector_f()
15658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
15668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_w=0; m_h=0;
15678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
15688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
15698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingdb_CornerDetector_f::~db_CornerDetector_f()
15708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
15718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    Clean();
15728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
15738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
15748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_CornerDetector_f::Clean()
15758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
15768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if(m_w!=0)
15778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
15788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        delete [] m_temp_f;
15798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        delete [] m_temp_d;
15808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
15818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
15828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_w=0; m_h=0;
15838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
15848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
15858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingunsigned long db_CornerDetector_f::Init(int im_width,int im_height,int target_nr_corners,
15868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                            int nr_horizontal_blocks,int nr_vertical_blocks,
15878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                            double absolute_threshold,double relative_threshold)
15888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
15898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int chunkwidth=208;
15908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int block_width,block_height;
15918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    unsigned long area_factor;
15928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int active_width,active_height;
15938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
15948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    active_width=db_maxi(1,im_width-10);
15958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    active_height=db_maxi(1,im_height-10);
15968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    block_width=db_maxi(1,active_width/nr_horizontal_blocks);
15978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    block_height=db_maxi(1,active_height/nr_vertical_blocks);
15988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
15998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
16008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        (((double)active_width)*((double)active_height)))));
16018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(Start(im_width,im_height,block_width,block_height,area_factor,
16038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        absolute_threshold,relative_threshold,chunkwidth));
16048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
16058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingunsigned long db_CornerDetector_f::Start(int im_width,int im_height,
16078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                             int block_width,int block_height,unsigned long area_factor,
16088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                             double absolute_threshold,double relative_threshold,int chunkwidth)
16098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
16108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    Clean();
16118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_w=im_width;
16138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_h=im_height;
16148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_cw=chunkwidth;
16158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_bw=block_width;
16168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_bh=block_height;
16178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_area_factor=area_factor;
16188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_r_thresh=relative_threshold;
16198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_a_thresh=absolute_threshold;
16208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
16218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_temp_f=new float[13*(m_cw+4)];
16238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_temp_d=new double[5*m_bw*m_bh];
16248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
16258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(m_max_nr);
16278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
16288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_CornerDetector_f::DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const
16308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
16318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float max_val,threshold;
16328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    db_HarrisStrength_f(m_strength,img,m_w,m_h,m_temp_f,m_cw);
16348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if(m_r_thresh)
16368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
16378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
16388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
16398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
16408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    else threshold= (float) m_a_thresh;
16418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
16438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        m_temp_d,x_coord,y_coord,nr_corners);
16448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
16458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingdb_CornerDetector_u::db_CornerDetector_u()
16478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
16488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_w=0; m_h=0;
16498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
16508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingdb_CornerDetector_u::~db_CornerDetector_u()
16528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
16538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    Clean();
16548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
16558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingdb_CornerDetector_u::db_CornerDetector_u(const db_CornerDetector_u& cd)
16578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
16588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
16598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cd.m_a_thresh, cd.m_r_thresh);
16608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
16618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingdb_CornerDetector_u& db_CornerDetector_u::operator=(const db_CornerDetector_u& cd)
16638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
16648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if ( this == &cd ) return *this;
16658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    Clean();
16678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
16698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        cd.m_a_thresh, cd.m_r_thresh);
16708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16718bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return *this;
16728bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
16738bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16748bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_CornerDetector_u::Clean()
16758bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
16768bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if(m_w!=0)
16778bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
16788bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        delete [] m_temp_i;
16798bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        delete [] m_temp_d;
16808bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
16818bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
16828bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_w=0; m_h=0;
16838bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
16848bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16858bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingunsigned long db_CornerDetector_u::Init(int im_width,int im_height,int target_nr_corners,
16868bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                            int nr_horizontal_blocks,int nr_vertical_blocks,
16878bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                            double absolute_threshold,double relative_threshold)
16888bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
16898bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int block_width,block_height;
16908bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    unsigned long area_factor;
16918bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    int active_width,active_height;
16928bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16938bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    active_width=db_maxi(1,im_width-10);
16948bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    active_height=db_maxi(1,im_height-10);
16958bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    block_width=db_maxi(1,active_width/nr_horizontal_blocks);
16968bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    block_height=db_maxi(1,active_height/nr_vertical_blocks);
16978bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
16988bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
16998bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        (((double)active_width)*((double)active_height)))));
17008bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17018bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(Start(im_width,im_height,block_width,block_height,area_factor,
17028bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        16.0*absolute_threshold,relative_threshold));
17038bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
17048bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17058bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingunsigned long db_CornerDetector_u::Start(int im_width,int im_height,
17068bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                             int block_width,int block_height,unsigned long area_factor,
17078bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                             double absolute_threshold,double relative_threshold)
17088bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
17098bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    Clean();
17108bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17118bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_w=im_width;
17128bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_h=im_height;
17138bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_bw=block_width;
17148bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_bh=block_height;
17158bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_area_factor=area_factor;
17168bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_r_thresh=relative_threshold;
17178bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_a_thresh=absolute_threshold;
17188bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
17198bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17208bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_temp_i=new int[18*128];
17218bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_temp_d=new double[5*m_bw*m_bh];
17228bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
17238bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17248bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    return(m_max_nr);
17258bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
17268bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17278bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_CornerDetector_u::DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners,
17288bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                                        const unsigned char * const *msk, unsigned char fgnd) const
17298bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling{
17308bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    float max_val,threshold;
17318bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17328bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    db_HarrisStrength_u(m_strength,img,m_w,m_h,m_temp_i);
17338bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17348bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17358bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if(m_r_thresh)
17368bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
17378bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
17388bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
17398bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
17408bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    else threshold= (float) m_a_thresh;
17418bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17428bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
17438bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        m_temp_d,x_coord,y_coord,nr_corners);
17448bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17458bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17468bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if ( msk )
17478bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    {
17488bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        int nr_corners_mask=0;
17498bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17508bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        for ( int i = 0; i < *nr_corners; ++i)
17518bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        {
17528bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            int cor_x = db_roundi(*(x_coord+i));
17538bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            int cor_y = db_roundi(*(y_coord+i));
17548bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            if ( msk[cor_y][cor_x] == fgnd )
17558bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            {
17568bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                x_coord[nr_corners_mask] = x_coord[i];
17578bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                y_coord[nr_corners_mask] = y_coord[i];
17588bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling                nr_corners_mask++;
17598bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            }
17608bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        }
17618bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        *nr_corners = nr_corners_mask;
17628bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    }
17638bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
17648bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
17658bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberlingvoid db_CornerDetector_u::ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners) {
17668bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling    if ( m_w!=0 )
17678bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling        db_ExtractCornersSaturated(strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,float(m_a_thresh),
17688bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling            m_temp_d,x_coord,y_coord,nr_corners);
17698bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling}
17708bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96Sascha Haeberling
1771