1/* Original code has been submitted by Liu Liu. Here is the copyright.
2----------------------------------------------------------------------------------
3 * An OpenCV Implementation of SURF
4 * Further Information Refer to "SURF: Speed-Up Robust Feature"
5 * Author: Liu Liu
6 * liuliu.1987+opencv@gmail.com
7 *
8 * There are still serveral lacks for this experimental implementation:
9 * 1.The interpolation of sub-pixel mentioned in article was not implemented yet;
10 * 2.A comparision with original libSurf.so shows that the hessian detector is not a 100% match to their implementation;
11 * 3.Due to above reasons, I recommanded the original one for study and reuse;
12 *
13 * However, the speed of this implementation is something comparable to original one.
14 *
15 * Copyright© 2008, Liu Liu All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or
18 * without modification, are permitted provided that the following
19 * conditions are met:
20 * 	Redistributions of source code must retain the above
21 * 	copyright notice, this list of conditions and the following
22 * 	disclaimer.
23 * 	Redistributions in binary form must reproduce the above
24 * 	copyright notice, this list of conditions and the following
25 * 	disclaimer in the documentation and/or other materials
26 * 	provided with the distribution.
27 * 	The name of Contributor may not be used to endorse or
28 * 	promote products derived from this software without
29 * 	specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
32 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
33 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
34 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35 * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
36 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
38 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
40 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
41 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
42 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
43 * OF SUCH DAMAGE.
44 */
45
46/*
47   The following changes have been made, comparing to the original contribution:
48   1. A lot of small optimizations, less memory allocations, got rid of global buffers
49   2. Reversed order of cvGetQuadrangleSubPix and cvResize calls; probably less accurate, but much faster
50   3. The descriptor computing part (which is most expensive) is threaded using OpenMP
51   (subpixel-accurate keypoint localization and scale estimation are still TBD)
52*/
53
54#include "_cv.h"
55
56CvSURFParams cvSURFParams(double threshold, int extended)
57{
58    CvSURFParams params;
59    params.hessianThreshold = threshold;
60    params.extended = extended;
61    params.nOctaves = 3;
62    params.nOctaveLayers = 4;
63    return params;
64}
65
66struct CvSurfHF
67{
68    int p0, p1, p2, p3;
69    float w;
70};
71
72CV_INLINE float
73icvCalcHaarPattern( const int* origin, const CvSurfHF* f, int n )
74{
75    double d = 0;
76    for( int k = 0; k < n; k++ )
77        d += (origin[f[k].p0] + origin[f[k].p3] - origin[f[k].p1] - origin[f[k].p2])*f[k].w;
78    return (float)d;
79}
80
81static void
82icvResizeHaarPattern( const int src[][5], CvSurfHF* dst, int n, int oldSize, int newSize, int widthStep )
83{
84    for( int k = 0; k < n; k++ )
85    {
86        int dx1 = src[k][0]*newSize/oldSize;
87        int dy1 = src[k][1]*newSize/oldSize;
88        int dx2 = src[k][2]*newSize/oldSize;
89        int dy2 = src[k][3]*newSize/oldSize;
90        dst[k].p0 = dy1*widthStep + dx1;
91        dst[k].p1 = dy2*widthStep + dx1;
92        dst[k].p2 = dy1*widthStep + dx2;
93        dst[k].p3 = dy2*widthStep + dx2;
94        dst[k].w = src[k][4]/((float)(dx2-dx1)*(dy2-dy1));
95    }
96}
97
98static CvSeq* icvFastHessianDetector( const CvMat* sum, const CvMat* mask_sum,
99    CvMemStorage* storage, const CvSURFParams* params )
100{
101    CvSeq* points = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvSURFPoint), storage );
102
103    int totalLayers = params->nOctaves*(params->nOctaveLayers+2);
104    CvMat** hessians = (CvMat**)cvStackAlloc(totalLayers*sizeof(hessians[0]));
105    CvMat** traces = (CvMat**)cvStackAlloc(totalLayers*sizeof(traces[0]));
106    int size, *sizeCache = (int*)cvStackAlloc(totalLayers*sizeof(sizeCache[0]));
107    int scale, *scaleCache = (int*)cvStackAlloc(totalLayers*sizeof(scaleCache[0]));
108
109    const int NX=3, NY=3, NXY=4, SIZE0=9;
110    int dx_s[NX][5] = { {0, 2, 3, 7, 1}, {3, 2, 6, 7, -2}, {6, 2, 9, 7, 1} };
111    int dy_s[NY][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} };
112    int dxy_s[NXY][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} };
113    int dm[1][5] = { {0, 0, 9, 9, 1} };
114    CvSurfHF Dx[NX], Dy[NY], Dxy[NXY], Dm;
115    double dx = 0, dy = 0, dxy = 0;
116    int hessian_rows, hessian_cols;
117
118    int octave, sc;
119    int i, j, k, z;
120    int* xofs = (int*)cvStackAlloc(sum->cols*sizeof(xofs[0]));
121
122    /* hessian detector */
123    for( octave = k = 0; octave < params->nOctaves; octave++ )
124    {
125        for( sc = -1; sc <= params->nOctaveLayers; sc++, k++ )
126        {
127            if ( sc < 0 )
128                sizeCache[k] = size = 7 << octave; // gaussian scale 1.0;
129            else
130                sizeCache[k] = size = (sc*6 + 9) << octave; // gaussian scale size*1.2/9.;
131            scaleCache[k] = scale = MAX(size, SIZE0);
132
133            hessian_rows = (sum->rows)*SIZE0/scale;
134            hessian_cols = (sum->cols)*SIZE0/scale;
135            hessians[k] = cvCreateMat( hessian_rows, hessian_cols, CV_32FC1 );
136            traces[k] = cvCreateMat( hessian_rows, hessian_cols, CV_32FC1 );
137
138            icvResizeHaarPattern( dx_s, Dx, NX, SIZE0, size, sum->cols );
139            icvResizeHaarPattern( dy_s, Dy, NY, SIZE0, size, sum->cols );
140            icvResizeHaarPattern( dxy_s, Dxy, NXY, SIZE0, size, sum->cols );
141            for( i = 0; i < NXY; i++ )
142                Dxy[i].w *= 0.9f;
143
144            float* hessian = hessians[k]->data.fl;
145            float* trace = traces[k]->data.fl;
146
147            for( i = 0; i < hessian_cols*(SIZE0/2); i++ )
148                hessian[i] = hessian[hessian_cols*hessian_rows-1-i] =
149                trace[i] = trace[hessian_cols*hessian_rows-1-i] = 0.f;
150
151            hessian += (SIZE0/2)*(hessian_cols + 1);
152            trace += (SIZE0/2)*(hessian_cols + 1);
153
154            for( j = 0; j <= hessian_cols - SIZE0; j++ )
155                xofs[j] = j*scale/SIZE0;
156
157            for( i = 0; i < hessian_rows - SIZE0; i++,
158                trace += hessian_cols, hessian += hessian_cols )
159            {
160                const int* sum_ptr = sum->data.i + sum->cols*(i*scale/SIZE0);
161                for( j = 0; j < SIZE0/2; j++ )
162                    hessian[-j-1] = hessian[hessian_cols - SIZE0 + j] =
163                    trace[-j-1] = trace[hessian_cols - SIZE0 + j] = 0.f;
164                for( j = 0; j <= hessian_cols - SIZE0; j++ )
165                {
166                    const int* s = sum_ptr + xofs[j];
167                    dx = (s[Dx[0].p0] + s[Dx[0].p3] - s[Dx[0].p1] - s[Dx[0].p2])*Dx[0].w +
168                        (s[Dx[1].p0] + s[Dx[1].p3] - s[Dx[1].p1] - s[Dx[1].p2])*Dx[1].w +
169                        (s[Dx[2].p0] + s[Dx[2].p3] - s[Dx[2].p1] - s[Dx[2].p2])*Dx[2].w;
170                    dy = (s[Dy[0].p0] + s[Dy[0].p3] - s[Dy[0].p1] - s[Dy[0].p2])*Dy[0].w +
171                        (s[Dy[1].p0] + s[Dy[1].p3] - s[Dy[1].p1] - s[Dy[1].p2])*Dy[1].w +
172                        (s[Dy[2].p0] + s[Dy[2].p3] - s[Dy[2].p1] - s[Dy[2].p2])*Dy[2].w;
173                    dxy = (s[Dxy[0].p0] + s[Dxy[0].p3] - s[Dxy[0].p1] - s[Dxy[0].p2])*Dxy[0].w +
174                        (s[Dxy[1].p0] + s[Dxy[1].p3] - s[Dxy[1].p1] - s[Dxy[1].p2])*Dxy[1].w +
175                        (s[Dxy[2].p0] + s[Dxy[2].p3] - s[Dxy[2].p1] - s[Dxy[2].p2])*Dxy[2].w +
176                        (s[Dxy[3].p0] + s[Dxy[3].p3] - s[Dxy[3].p1] - s[Dxy[3].p2])*Dxy[3].w;
177                    hessian[j] = (float)(dx*dy - dxy*dxy);
178                    trace[j] = (float)(dx + dy);
179                }
180            }
181        }
182    }
183
184    for( octave = 0, k = 1; octave < params->nOctaves; octave++, k+=2 )
185    {
186        for( sc = 0; sc < params->nOctaveLayers; sc++, k++ )
187        {
188            size = sizeCache[k];
189            scale = scaleCache[k];
190            hessian_rows = hessians[k]->rows;
191            hessian_cols = hessians[k]->cols;
192            icvResizeHaarPattern( dm, &Dm, 1, SIZE0, size, mask_sum ? mask_sum->cols : sum->cols );
193            int margin = 5*scaleCache[k+1]/scale;
194            for( i = margin; i < hessian_rows-margin; i++ )
195            {
196                const float* hessian = hessians[k]->data.fl + i*hessian_cols;
197                const float* trace = traces[k]->data.fl + i*hessian_cols;
198                for( j = margin; j < hessian_cols-margin; j++ )
199                {
200                    float val0 = hessian[j];
201                    if( val0 > params->hessianThreshold )
202                    {
203                        bool suppressed = false;
204                        if( mask_sum )
205                        {
206                            const int* mask_ptr = mask_sum->data.i +
207                                mask_sum->cols*((i-SIZE0/2)*scale/SIZE0) +
208                                (j - SIZE0/2)*scale/SIZE0;
209                            float mval = icvCalcHaarPattern( mask_ptr, &Dm, 1 );
210                            if( mval < 0.5 )
211                                continue;
212                        }
213
214                        /* non-maxima suppression */
215                        for( z = k-1; z < k+2; z++ )
216                        {
217                            int hcols_z = hessians[z]->cols;
218                            const float* hessian = hessians[z]->data.fl + (j*scale+scaleCache[z]/2)/scaleCache[z]-1 +
219                                ((i*scale + scaleCache[z]/2)/scaleCache[z]-1)*hcols_z;
220                            if( val0 < hessian[0] || val0 < hessian[1] || val0 < hessian[2] ||
221                                val0 < hessian[hcols_z] || val0 < hessian[hcols_z+1] ||
222                                val0 < hessian[hcols_z+2] || val0 < hessian[hcols_z*2] ||
223                                val0 < hessian[hcols_z*2+1] || val0 < hessian[hcols_z*2+2] )
224                            {
225                                suppressed = true;
226                                break;
227                            }
228                        }
229                        if( !suppressed )
230                        {
231                            double trace_val = trace[j];
232                            CvSURFPoint point = cvSURFPoint( cvPoint2D32f(j*scale/9.f, i*scale/9.f),
233                                CV_SIGN(trace_val), sizeCache[k], 0, val0 );
234                            cvSeqPush( points, &point );
235                        }
236                    }
237                }
238            }
239        }
240    }
241
242    for( octave = k = 0; octave < params->nOctaves; octave++ )
243        for( sc = -1; sc <= params->nOctaveLayers; sc++, k++ )
244        {
245            cvReleaseMat( &hessians[k] );
246            cvReleaseMat( &traces[k] );
247        }
248    return points;
249}
250
251
252CV_IMPL void
253cvExtractSURF( const CvArr* _img, const CvArr* _mask,
254               CvSeq** _keypoints, CvSeq** _descriptors,
255               CvMemStorage* storage, CvSURFParams params )
256{
257    CvMat *sum = 0, *mask1 = 0, *mask_sum = 0;
258
259    if( _keypoints )
260        *_keypoints = 0;
261    if( _descriptors )
262        *_descriptors = 0;
263
264    CV_FUNCNAME( "cvExtractSURF" );
265
266    __BEGIN__;
267
268    CvSeq *keypoints, *descriptors = 0;
269    CvMat imghdr, *img = cvGetMat(_img, &imghdr);
270    CvMat maskhdr, *mask = _mask ? cvGetMat(_mask, &maskhdr) : 0;
271
272    int descriptor_size = params.extended ? 128 : 64;
273    const int descriptor_data_type = CV_32F;
274    const int NX=2, NY=2;
275    const float sqrt_2 = 1.4142135623730950488016887242097f;
276    const int PATCH_SZ = 20;
277    const int RS_PATCH_SZ = 30; // ceil((PATCH_SZ+1)*sqrt_2);
278    int dx_s[NX][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
279    int dy_s[NY][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
280    float G[9] = {0,0,0,0,0,0,0,0,0};
281    CvMat _G = cvMat(1, 9, CV_32F, G);
282    float DW[PATCH_SZ][PATCH_SZ];
283    CvMat _DW = cvMat(PATCH_SZ, PATCH_SZ, CV_32F, DW);
284    CvPoint apt[81];
285    int i, j, k, nangle0 = 0, N;
286
287    CV_ASSERT( img != 0 && CV_MAT_TYPE(img->type) == CV_8UC1 &&
288        (mask == 0 || (CV_ARE_SIZES_EQ(img,mask) &&
289        CV_MAT_TYPE(mask->type) == CV_8UC1)) &&
290        storage != 0 && params.hessianThreshold >= 0 &&
291        params.nOctaves > 0 && params.nOctaveLayers > 0 );
292
293    sum = cvCreateMat( img->height+1, img->width+1, CV_32SC1 );
294    cvIntegral( img, sum );
295    if( mask )
296    {
297        mask1 = cvCreateMat( img->height, img->width, CV_8UC1 );
298        mask_sum = cvCreateMat( img->height+1, img->width+1, CV_32SC1 );
299        cvMinS( mask, 1, mask1 );
300        cvIntegral( mask1, mask_sum );
301    }
302    keypoints = icvFastHessianDetector( sum, mask_sum, storage, &params );
303    N = keypoints->total;
304    if( _descriptors )
305    {
306        descriptors = cvCreateSeq( 0, sizeof(CvSeq),
307            descriptor_size*CV_ELEM_SIZE(descriptor_data_type), storage );
308        cvSeqPushMulti( descriptors, 0, N );
309    }
310
311    CvSepFilter::init_gaussian_kernel( &_G, 2.5 );
312
313    {
314    const double sigma = 3.3;
315    double c2 = 1./(sigma*sigma*2), gs = 0;
316    for( i = 0; i < PATCH_SZ; i++ )
317    {
318        for( j = 0; j < PATCH_SZ; j++ )
319        {
320            double x = j - PATCH_SZ*0.5, y = i - PATCH_SZ*0.5;
321            double val = exp(-(x*x+y*y)*c2);
322            DW[i][j] = (float)val;
323            gs += val;
324        }
325    }
326    cvScale( &_DW, &_DW, 1./gs );
327    }
328
329    for( i = -4; i <= 4; i++ )
330        for( j = -4; j <= 4; j++ )
331        {
332            if( i*i + j*j <= 16 )
333                apt[nangle0++] = cvPoint(j,i);
334        }
335
336    {
337#ifdef _OPENMP
338    int nthreads = cvGetNumThreads();
339#pragma omp parallel for num_threads(nthreads) schedule(dynamic)
340#endif
341    for( k = 0; k < N; k++ )
342    {
343        const int* sum_ptr = sum->data.i;
344        int sum_cols = sum->cols;
345        int i, j, kk, x, y, nangle;
346        CvSurfHF dx_t[NX], dy_t[NY];
347        float X[81], Y[81], angle[81];
348        uchar PATCH[PATCH_SZ+1][PATCH_SZ+1], RS_PATCH[RS_PATCH_SZ][RS_PATCH_SZ];
349        float DX[PATCH_SZ][PATCH_SZ], DY[PATCH_SZ][PATCH_SZ];
350        CvMat _X = cvMat(1, 81, CV_32F, X);
351        CvMat _Y = cvMat(1, 81, CV_32F, Y);
352        CvMat _angle = cvMat(1, 81, CV_32F, angle);
353        CvMat _patch = cvMat(PATCH_SZ+1, PATCH_SZ+1, CV_8U, PATCH);
354        CvMat _rs_patch = cvMat(RS_PATCH_SZ, RS_PATCH_SZ, CV_8U, RS_PATCH);
355        CvMat _src, *src = img;
356
357        CvSURFPoint* kp = (CvSURFPoint*)cvGetSeqElem( keypoints, k );
358        CvPoint2D32f center = kp->pt;
359        int size = kp->size;
360        icvResizeHaarPattern( dx_s, dx_t, NX, 9, size, sum->cols );
361        icvResizeHaarPattern( dy_s, dy_t, NY, 9, size, sum->cols );
362        CvPoint pt = cvPointFrom32f(center);
363        float* vec;
364        float alpha0, beta0, sz0, scale0;
365
366        for( kk = 0, nangle = 0; kk < nangle0; kk++ )
367        {
368            j = apt[kk].x; i = apt[kk].y;
369            int x = pt.x + (j-2)*size/9;
370            int y = pt.y + (i-2)*size/9;
371            const int* ptr;
372            float vx, vy, w;
373            if( (unsigned)y >= (unsigned)sum->rows - size ||
374                (unsigned)x >= (unsigned)sum->cols - size )
375                continue;
376            ptr = sum_ptr + x + y*sum_cols;
377            w = G[i+4]*G[j+4];
378            vx = icvCalcHaarPattern( ptr, dx_t, NX )*w;
379            vy = icvCalcHaarPattern( ptr, dy_t, NX )*w;
380            X[nangle] = vx; Y[nangle] = vy;
381            nangle++;
382        }
383        _X.cols = _Y.cols = _angle.cols = nangle;
384        cvCartToPolar( &_X, &_Y, 0, &_angle, 1 );
385
386        float bestx = 0, besty = 0, descriptor_mod = 0;
387        for( i = 0; i < 360; i += 5 )
388        {
389            float sumx = 0, sumy = 0, temp_mod;
390            for( j = 0; j < nangle; j++ )
391            {
392                int d = abs(cvRound(angle[j]) - i);
393                if( d < 60 || d > 300 )
394                {
395                    sumx += X[j];
396                    sumy += Y[j];
397                }
398            }
399            temp_mod = sumx*sumx + sumy*sumy;
400            if( temp_mod > descriptor_mod )
401            {
402                descriptor_mod = temp_mod;
403                bestx = sumx;
404                besty = sumy;
405            }
406        }
407
408        float descriptor_dir = cvFastArctan( besty, bestx );
409        kp->dir = descriptor_dir;
410
411        if( !_descriptors )
412            continue;
413        descriptor_dir *= (float)(CV_PI/180);
414
415        alpha0 = (float)cos(descriptor_dir);
416        beta0 = (float)sin(descriptor_dir);
417        sz0 = (float)((PATCH_SZ+1)*size*1.2/9.);
418        scale0 = sz0/(PATCH_SZ+1);
419
420        if( sz0 > (PATCH_SZ+1)*1.5f )
421        {
422            float rd = (float)(sz0*sqrt_2*0.5);
423            float alpha1 = (alpha0 - beta0)*sqrt_2*0.5f, beta1 = (alpha0 + beta0)*sqrt_2*0.5f;
424            CvRect patch_rect0 = { INT_MAX, INT_MAX, INT_MIN, INT_MIN }, patch_rect, sr_patch_rect;
425
426            for( i = 0; i < 4; i++ )
427            {
428                float a, b, r = i < 2 ? rd : -rd;
429                if( i % 2 == 0 )
430                    a = alpha1, b = beta1;
431                else
432                    a = -beta1, b = alpha1;
433                float xf = center.x + r*a;
434                float yf = center.y - r*b;
435                x = cvFloor(xf); patch_rect0.x = MIN(patch_rect0.x, x);
436                y = cvFloor(yf); patch_rect0.y = MIN(patch_rect0.y, y);
437                x = cvCeil(xf)+1; patch_rect0.width = MAX(patch_rect0.width, x);
438                y = cvCeil(yf)+1; patch_rect0.height = MAX(patch_rect0.height, y);
439            }
440
441            patch_rect = patch_rect0;
442            patch_rect.x = MAX(patch_rect.x, 0);
443            patch_rect.y = MAX(patch_rect.y, 0);
444            patch_rect.width = MIN(patch_rect.width, img->width) - patch_rect.x;
445            patch_rect.height = MIN(patch_rect.height, img->height) - patch_rect.y;
446            patch_rect0.width -= patch_rect0.x;
447            patch_rect0.height -= patch_rect0.y;
448
449            CvMat _src0;
450            float scale = MIN(1.f,MIN((float)RS_PATCH_SZ/patch_rect0.width,
451                (float)RS_PATCH_SZ/patch_rect0.height));
452            cvGetSubArr( img, &_src0, patch_rect );
453            sr_patch_rect = cvRect(0,0, RS_PATCH_SZ, RS_PATCH_SZ);
454            sr_patch_rect.width = cvRound(patch_rect.width*scale);
455            sr_patch_rect.height = cvRound(patch_rect.height*scale);
456            src = cvGetSubArr( &_rs_patch, &_src, sr_patch_rect );
457            cvResize( &_src0, &_src, CV_INTER_AREA );
458            center.x = RS_PATCH_SZ*0.5f - (patch_rect.x - patch_rect0.x)*scale;
459            center.y = RS_PATCH_SZ*0.5f - (patch_rect.y - patch_rect0.y)*scale;
460            scale0 *= scale;
461        }
462
463        {
464        float w[] =
465        {
466            alpha0*scale0, beta0*scale0, center.x,
467            -beta0*scale0, alpha0*scale0, center.y
468        };
469        CvMat W = cvMat(2, 3, CV_32F, w);
470        cvGetQuadrangleSubPix( src, &_patch, &W );
471        }
472
473        for( i = 0; i < PATCH_SZ; i++ )
474            for( j = 0; j < PATCH_SZ; j++ )
475            {
476                float dw = DW[i][j];
477                float vx = (PATCH[i][j+1] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i+1][j])*dw;
478                float vy = (PATCH[i+1][j] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i][j+1])*dw;
479                DX[i][j] = vx;
480                DY[i][j] = vy;
481            }
482
483        vec = (float*)cvGetSeqElem( descriptors, k );
484        for( kk = 0; kk < (int)(descriptors->elem_size/sizeof(vec[0])); kk++ )
485            vec[kk] = 0;
486        if( params.extended )
487        {
488            /* 128-bin descriptor */
489            for( i = 0; i < 4; i++ )
490                for( j = 0; j < 4; j++ )
491                {
492                    for( y = i*5; y < i*5+5; y++ )
493                    {
494                        for( x = j*5; x < j*5+5; x++ )
495                        {
496                            float tx = DX[y][x], ty = DY[y][x];
497                            if( ty >= 0 )
498                            {
499                                vec[0] += tx;
500                                vec[1] += (float)fabs(tx);
501                            } else {
502                                vec[2] += tx;
503                                vec[3] += (float)fabs(tx);
504                            }
505                            if ( tx >= 0 )
506                            {
507                                vec[4] += ty;
508                                vec[5] += (float)fabs(ty);
509                            } else {
510                                vec[6] += ty;
511                                vec[7] += (float)fabs(ty);
512                            }
513                        }
514                    }
515                    /* unit vector is essential for contrast invariance */
516                    double normalize = 0;
517                    for( kk = 0; kk < 8; kk++ )
518                        normalize += vec[kk]*vec[kk];
519                    normalize = 1./(sqrt(normalize) + DBL_EPSILON);
520                    for( kk = 0; kk < 8; kk++ )
521                        vec[kk] = (float)(vec[kk]*normalize);
522                    vec += 8;
523                }
524        }
525        else
526        {
527            /* 64-bin descriptor */
528            for( i = 0; i < 4; i++ )
529                for( j = 0; j < 4; j++ )
530                {
531                    for( y = i*5; y < i*5+5; y++ )
532                    {
533                        for( x = j*5; x < j*5+5; x++ )
534                        {
535                            float tx = DX[y][x], ty = DY[y][x];
536                            vec[0] += tx; vec[1] += ty;
537                            vec[2] += (float)fabs(tx); vec[3] += (float)fabs(ty);
538                        }
539                    }
540                    double normalize = 0;
541                    for( kk = 0; kk < 4; kk++ )
542                        normalize += vec[kk]*vec[kk];
543                    normalize = 1./(sqrt(normalize) + DBL_EPSILON);
544                    for( kk = 0; kk < 4; kk++ )
545                        vec[kk] = (float)(vec[kk]*normalize);
546                    vec+=4;
547                }
548        }
549    }
550    }
551
552    if( _keypoints )
553        *_keypoints = keypoints;
554    if( _descriptors )
555        *_descriptors = descriptors;
556
557    __END__;
558
559    cvReleaseMat( &sum );
560    cvReleaseMat( &mask1 );
561    cvReleaseMat( &mask_sum );
562}
563