1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*$Id: db_feature_matching.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/
18
19/*****************************************************************
20*    Lean and mean begins here                                   *
21*****************************************************************/
22
23#include "db_utilities.h"
24#include "db_feature_matching.h"
25#ifdef _VERBOSE_
26#include <iostream>
27#endif
28
29
30int AffineWarpPoint_NN_LUT_x[11][11];
31int AffineWarpPoint_NN_LUT_y[11][11];
32
33float AffineWarpPoint_BL_LUT_x[11][11];
34float AffineWarpPoint_BL_LUT_y[11][11];
35
36
37inline float db_SignedSquareNormCorr7x7_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
38{
39    unsigned char *pf,*pg;
40    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
41    int xm_f,xm_g;
42
43    xm_f=x_f-3;
44    xm_g=x_g-3;
45    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
46
47    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
48    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
49    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
50    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
51    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
52    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
53    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
54    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
55
56    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
57    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
58    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
59    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
60    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
61    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
62    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
63    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
64
65    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
66    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
67    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
68    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
69    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
70    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
71    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
72    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
73
74    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
75    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
76    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
77    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
78    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
79    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
80    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
81    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
82
83    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
84    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
85    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
86    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
87    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
88    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
89    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
90    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
91
92    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
93    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
94    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
95    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
96    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
97    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
98    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
99    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
100
101    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
102    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
103    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
104    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
105    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
106    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
107    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
108    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
109
110    fg_corr=49.0f*fgsum-fsum*gsum;
111    den=(49.0f*f2sum-fsum*fsum)*(49.0f*g2sum-gsum*gsum);
112    if(den!=0.0)
113    {
114        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
115        return(-fg_corr*fg_corr/den);
116    }
117    return(0.0);
118}
119
120inline float db_SignedSquareNormCorr9x9_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
121{
122    unsigned char *pf,*pg;
123    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
124    int xm_f,xm_g;
125
126    xm_f=x_f-4;
127    xm_g=x_g-4;
128    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
129
130    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
131    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
132    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
133    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
134    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
135    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
136    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
137    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
138    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
139    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
140
141    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
142    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
143    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
144    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
145    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
146    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
147    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
148    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
149    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
150    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
151
152    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
153    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
154    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
155    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
156    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
157    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
158    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
159    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
160    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
161    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
162
163    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
164    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
165    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
166    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
167    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
168    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
169    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
170    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
171    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
172    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
173
174    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
175    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
176    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
177    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
178    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
179    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
180    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
181    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
182    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
183    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
184
185    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
186    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
187    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
188    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
189    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
190    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
191    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
192    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
193    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
194    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
195
196    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
197    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
198    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
199    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
200    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
201    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
202    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
203    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
204    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
205    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
206
207    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
208    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
209    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
210    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
211    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
212    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
213    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
214    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
215    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
216    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
217
218    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
219    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
220    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
221    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
222    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
223    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
224    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
225    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
226    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
227    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
228
229    fg_corr=81.0f*fgsum-fsum*gsum;
230    den=(81.0f*f2sum-fsum*fsum)*(81.0f*g2sum-gsum*gsum);
231    if(den!=0.0)
232    {
233        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
234        return(-fg_corr*fg_corr/den);
235    }
236    return(0.0);
237}
238
239inline float db_SignedSquareNormCorr11x11_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
240{
241    unsigned char *pf,*pg;
242    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
243    int xm_f,xm_g;
244
245    xm_f=x_f-5;
246    xm_g=x_g-5;
247    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
248
249    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
250    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
251    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
252    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
253    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
254    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
255    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
256    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
257    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
258    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
259    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
260    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
261
262    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
263    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
264    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
265    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
266    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
267    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
268    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
269    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
270    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
271    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
272    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
273    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
274
275    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
276    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
277    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
278    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
279    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
280    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
281    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
282    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
283    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
284    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
285    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
286    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
287
288    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
289    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
290    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
291    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
292    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
293    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
294    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
295    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
296    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
297    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
298    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
299    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
300
301    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
302    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
303    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
304    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
305    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
306    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
307    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
308    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
309    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
310    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
311    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
312    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
313
314    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
315    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
316    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
317    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
318    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
319    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
320    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
321    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
322    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
323    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
324    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
325    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
326
327    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
328    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
329    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
330    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
331    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
332    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
333    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
334    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
335    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
336    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
337    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
338    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
339
340    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
341    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
342    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
343    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
344    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
345    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
346    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
347    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
348    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
349    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
350    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
351    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
352
353    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
354    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
355    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
356    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
357    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
358    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
359    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
360    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
361    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
362    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
363    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
364    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
365
366    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
367    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
368    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
369    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
370    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
371    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
372    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
373    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
374    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
375    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
376    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
377    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
378
379    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
380    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
381    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
382    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
383    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
384    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
385    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
386    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
387    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
388    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
389    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
390    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
391
392    fg_corr=121.0f*fgsum-fsum*gsum;
393    den=(121.0f*f2sum-fsum*fsum)*(121.0f*g2sum-gsum*gsum);
394    if(den!=0.0)
395    {
396        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
397        return(-fg_corr*fg_corr/den);
398    }
399    return(0.0);
400}
401
402inline void db_SignedSquareNormCorr11x11_Pre_u(unsigned char **f_img,int x_f,int y_f,float *sum,float *recip)
403{
404    unsigned char *pf;
405    float den;
406    int f,f2sum,fsum;
407    int xm_f;
408
409    xm_f=x_f-5;
410
411    pf=f_img[y_f-5]+xm_f;
412    f= *pf++; f2sum=f*f;  fsum=f;
413    f= *pf++; f2sum+=f*f; fsum+=f;
414    f= *pf++; f2sum+=f*f; fsum+=f;
415    f= *pf++; f2sum+=f*f; fsum+=f;
416    f= *pf++; f2sum+=f*f; fsum+=f;
417    f= *pf++; f2sum+=f*f; fsum+=f;
418    f= *pf++; f2sum+=f*f; fsum+=f;
419    f= *pf++; f2sum+=f*f; fsum+=f;
420    f= *pf++; f2sum+=f*f; fsum+=f;
421    f= *pf++; f2sum+=f*f; fsum+=f;
422    f= *pf;   f2sum+=f*f; fsum+=f;
423
424    pf=f_img[y_f-4]+xm_f;
425    f= *pf++; f2sum+=f*f; fsum+=f;
426    f= *pf++; f2sum+=f*f; fsum+=f;
427    f= *pf++; f2sum+=f*f; fsum+=f;
428    f= *pf++; f2sum+=f*f; fsum+=f;
429    f= *pf++; f2sum+=f*f; fsum+=f;
430    f= *pf++; f2sum+=f*f; fsum+=f;
431    f= *pf++; f2sum+=f*f; fsum+=f;
432    f= *pf++; f2sum+=f*f; fsum+=f;
433    f= *pf++; f2sum+=f*f; fsum+=f;
434    f= *pf++; f2sum+=f*f; fsum+=f;
435    f= *pf;   f2sum+=f*f; fsum+=f;
436
437    pf=f_img[y_f-3]+xm_f;
438    f= *pf++; f2sum+=f*f; fsum+=f;
439    f= *pf++; f2sum+=f*f; fsum+=f;
440    f= *pf++; f2sum+=f*f; fsum+=f;
441    f= *pf++; f2sum+=f*f; fsum+=f;
442    f= *pf++; f2sum+=f*f; fsum+=f;
443    f= *pf++; f2sum+=f*f; fsum+=f;
444    f= *pf++; f2sum+=f*f; fsum+=f;
445    f= *pf++; f2sum+=f*f; fsum+=f;
446    f= *pf++; f2sum+=f*f; fsum+=f;
447    f= *pf++; f2sum+=f*f; fsum+=f;
448    f= *pf;   f2sum+=f*f; fsum+=f;
449
450    pf=f_img[y_f-2]+xm_f;
451    f= *pf++; f2sum+=f*f; fsum+=f;
452    f= *pf++; f2sum+=f*f; fsum+=f;
453    f= *pf++; f2sum+=f*f; fsum+=f;
454    f= *pf++; f2sum+=f*f; fsum+=f;
455    f= *pf++; f2sum+=f*f; fsum+=f;
456    f= *pf++; f2sum+=f*f; fsum+=f;
457    f= *pf++; f2sum+=f*f; fsum+=f;
458    f= *pf++; f2sum+=f*f; fsum+=f;
459    f= *pf++; f2sum+=f*f; fsum+=f;
460    f= *pf++; f2sum+=f*f; fsum+=f;
461    f= *pf;   f2sum+=f*f; fsum+=f;
462
463    pf=f_img[y_f-1]+xm_f;
464    f= *pf++; f2sum+=f*f; fsum+=f;
465    f= *pf++; f2sum+=f*f; fsum+=f;
466    f= *pf++; f2sum+=f*f; fsum+=f;
467    f= *pf++; f2sum+=f*f; fsum+=f;
468    f= *pf++; f2sum+=f*f; fsum+=f;
469    f= *pf++; f2sum+=f*f; fsum+=f;
470    f= *pf++; f2sum+=f*f; fsum+=f;
471    f= *pf++; f2sum+=f*f; fsum+=f;
472    f= *pf++; f2sum+=f*f; fsum+=f;
473    f= *pf++; f2sum+=f*f; fsum+=f;
474    f= *pf;   f2sum+=f*f; fsum+=f;
475
476    pf=f_img[y_f]+xm_f;
477    f= *pf++; f2sum+=f*f; fsum+=f;
478    f= *pf++; f2sum+=f*f; fsum+=f;
479    f= *pf++; f2sum+=f*f; fsum+=f;
480    f= *pf++; f2sum+=f*f; fsum+=f;
481    f= *pf++; f2sum+=f*f; fsum+=f;
482    f= *pf++; f2sum+=f*f; fsum+=f;
483    f= *pf++; f2sum+=f*f; fsum+=f;
484    f= *pf++; f2sum+=f*f; fsum+=f;
485    f= *pf++; f2sum+=f*f; fsum+=f;
486    f= *pf++; f2sum+=f*f; fsum+=f;
487    f= *pf;   f2sum+=f*f; fsum+=f;
488
489    pf=f_img[y_f+1]+xm_f;
490    f= *pf++; f2sum+=f*f; fsum+=f;
491    f= *pf++; f2sum+=f*f; fsum+=f;
492    f= *pf++; f2sum+=f*f; fsum+=f;
493    f= *pf++; f2sum+=f*f; fsum+=f;
494    f= *pf++; f2sum+=f*f; fsum+=f;
495    f= *pf++; f2sum+=f*f; fsum+=f;
496    f= *pf++; f2sum+=f*f; fsum+=f;
497    f= *pf++; f2sum+=f*f; fsum+=f;
498    f= *pf++; f2sum+=f*f; fsum+=f;
499    f= *pf++; f2sum+=f*f; fsum+=f;
500    f= *pf;   f2sum+=f*f; fsum+=f;
501
502    pf=f_img[y_f+2]+xm_f;
503    f= *pf++; f2sum+=f*f; fsum+=f;
504    f= *pf++; f2sum+=f*f; fsum+=f;
505    f= *pf++; f2sum+=f*f; fsum+=f;
506    f= *pf++; f2sum+=f*f; fsum+=f;
507    f= *pf++; f2sum+=f*f; fsum+=f;
508    f= *pf++; f2sum+=f*f; fsum+=f;
509    f= *pf++; f2sum+=f*f; fsum+=f;
510    f= *pf++; f2sum+=f*f; fsum+=f;
511    f= *pf++; f2sum+=f*f; fsum+=f;
512    f= *pf++; f2sum+=f*f; fsum+=f;
513    f= *pf;   f2sum+=f*f; fsum+=f;
514
515    pf=f_img[y_f+3]+xm_f;
516    f= *pf++; f2sum+=f*f; fsum+=f;
517    f= *pf++; f2sum+=f*f; fsum+=f;
518    f= *pf++; f2sum+=f*f; fsum+=f;
519    f= *pf++; f2sum+=f*f; fsum+=f;
520    f= *pf++; f2sum+=f*f; fsum+=f;
521    f= *pf++; f2sum+=f*f; fsum+=f;
522    f= *pf++; f2sum+=f*f; fsum+=f;
523    f= *pf++; f2sum+=f*f; fsum+=f;
524    f= *pf++; f2sum+=f*f; fsum+=f;
525    f= *pf++; f2sum+=f*f; fsum+=f;
526    f= *pf;   f2sum+=f*f; fsum+=f;
527
528    pf=f_img[y_f+4]+xm_f;
529    f= *pf++; f2sum+=f*f; fsum+=f;
530    f= *pf++; f2sum+=f*f; fsum+=f;
531    f= *pf++; f2sum+=f*f; fsum+=f;
532    f= *pf++; f2sum+=f*f; fsum+=f;
533    f= *pf++; f2sum+=f*f; fsum+=f;
534    f= *pf++; f2sum+=f*f; fsum+=f;
535    f= *pf++; f2sum+=f*f; fsum+=f;
536    f= *pf++; f2sum+=f*f; fsum+=f;
537    f= *pf++; f2sum+=f*f; fsum+=f;
538    f= *pf++; f2sum+=f*f; fsum+=f;
539    f= *pf;   f2sum+=f*f; fsum+=f;
540
541    pf=f_img[y_f+5]+xm_f;
542    f= *pf++; f2sum+=f*f; fsum+=f;
543    f= *pf++; f2sum+=f*f; fsum+=f;
544    f= *pf++; f2sum+=f*f; fsum+=f;
545    f= *pf++; f2sum+=f*f; fsum+=f;
546    f= *pf++; f2sum+=f*f; fsum+=f;
547    f= *pf++; f2sum+=f*f; fsum+=f;
548    f= *pf++; f2sum+=f*f; fsum+=f;
549    f= *pf++; f2sum+=f*f; fsum+=f;
550    f= *pf++; f2sum+=f*f; fsum+=f;
551    f= *pf++; f2sum+=f*f; fsum+=f;
552    f= *pf;   f2sum+=f*f; fsum+=f;
553
554    *sum= (float) fsum;
555    den=(121.0f*f2sum-fsum*fsum);
556    *recip=(float)(((den!=0.0)?1.0/den:0.0));
557}
558
559inline void db_SignedSquareNormCorr5x5_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip)
560{
561    float den;
562    int f2sum,fsum;
563    int xm_f=x_f-2;
564
565#ifndef DB_USE_SSE2
566    const unsigned char *pf;
567    short f;
568
569    pf=f_img[y_f-2]+xm_f;
570    f= *pf++; f2sum=f*f; fsum=f; (*patch++)=f;
571    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
572    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
573    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
574    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
575
576    pf=f_img[y_f-1]+xm_f;
577    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
578    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
579    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
580    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
581    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
582
583    pf=f_img[y_f]+xm_f;
584    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
585    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
586    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
587    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
588    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
589
590    pf=f_img[y_f+1]+xm_f;
591    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
592    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
593    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
594    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
595    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
596
597    pf=f_img[y_f+2]+xm_f;
598    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
599    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
600    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
601    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
602    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
603    //int xwi;
604    //int ywi;
605    //f2sum=0;
606    //fsum=0;
607    //for (int r=-5;r<=5;r++){
608    //  ywi=y_f+r;
609    //  for (int c=-5;c<=5;c++){
610    //      xwi=x_f+c;
611    //      f=f_img[ywi][xwi];
612    //      f2sum+=f*f;
613    //      fsum+=f;
614    //      (*patch++)=f;
615    //  }
616    //}
617    (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0;
618    (*patch++)=0; (*patch++)=0;
619#endif /* DB_USE_SSE2 */
620
621    *sum= (float) fsum;
622    den=(25.0f*f2sum-fsum*fsum);
623    *recip= (float)((den!=0.0)?1.0/den:0.0);
624}
625
626inline void db_SignedSquareNormCorr21x21_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip)
627{
628    float den;
629    int f2sum,fsum;
630    int xm_f=x_f-10;
631    short f;
632
633    int xwi;
634    int ywi;
635    f2sum=0;
636    fsum=0;
637    for (int r=-10;r<=10;r++){
638        ywi=y_f+r;
639        for (int c=-10;c<=10;c++){
640            xwi=x_f+c;
641            f=f_img[ywi][xwi];
642            f2sum+=f*f;
643            fsum+=f;
644            (*patch++)=f;
645        }
646    }
647
648    for(int i=442; i<512; i++)
649        (*patch++)=0;
650
651    *sum= (float) fsum;
652    den=(441.0f*f2sum-fsum*fsum);
653    *recip= (float)((den!=0.0)?1.0/den:0.0);
654
655
656}
657
658/* Lay out the image in the patch, computing norm and
659*/
660inline void db_SignedSquareNormCorr11x11_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip)
661{
662    float den;
663    int f2sum,fsum;
664    int xm_f=x_f-5;
665
666#ifndef DB_USE_SSE2
667    const unsigned char *pf;
668    short f;
669
670    pf=f_img[y_f-5]+xm_f;
671    f= *pf++; f2sum=f*f;  fsum=f;  (*patch++)=f;
672    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
673    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
674    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
675    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
676    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
677    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
678    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
679    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
680    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
681    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
682
683    pf=f_img[y_f-4]+xm_f;
684    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
685    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
686    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
687    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
688    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
689    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
690    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
691    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
692    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
693    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
694    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
695
696    pf=f_img[y_f-3]+xm_f;
697    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
698    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
699    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
700    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
701    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
702    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
703    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
704    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
705    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
706    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
707    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
708
709    pf=f_img[y_f-2]+xm_f;
710    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
711    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
712    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
713    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
714    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
715    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
716    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
717    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
718    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
719    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
720    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
721
722    pf=f_img[y_f-1]+xm_f;
723    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
724    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
725    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
726    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
727    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
728    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
729    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
730    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
731    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
732    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
733    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
734
735    pf=f_img[y_f]+xm_f;
736    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
737    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
738    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
739    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
740    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
741    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
742    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
743    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
744    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
745    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
746    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
747
748    pf=f_img[y_f+1]+xm_f;
749    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
750    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
751    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
752    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
753    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
754    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
755    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
756    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
757    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
758    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
759    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
760
761    pf=f_img[y_f+2]+xm_f;
762    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
763    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
764    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
765    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
766    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
767    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
768    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
769    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
770    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
771    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
772    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
773
774    pf=f_img[y_f+3]+xm_f;
775    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
776    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
777    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
778    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
779    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
780    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
781    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
782    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
783    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
784    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
785    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
786
787    pf=f_img[y_f+4]+xm_f;
788    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
789    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
790    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
791    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
792    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
793    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
794    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
795    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
796    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
797    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
798    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
799
800    pf=f_img[y_f+5]+xm_f;
801    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
802    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
803    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
804    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
805    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
806    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
807    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
808    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
809    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
810    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
811    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
812
813    //int xwi;
814    //int ywi;
815    //f2sum=0;
816    //fsum=0;
817    //for (int r=-5;r<=5;r++){
818    //  ywi=y_f+r;
819    //  for (int c=-5;c<=5;c++){
820    //      xwi=x_f+c;
821    //      f=f_img[ywi][xwi];
822    //      f2sum+=f*f;
823    //      fsum+=f;
824    //      (*patch++)=f;
825    //  }
826    //}
827
828    (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0;
829    (*patch++)=0; (*patch++)=0;
830#else
831    const unsigned char *pf0 =f_img[y_f-5]+xm_f;
832    const unsigned char *pf1 =f_img[y_f-4]+xm_f;
833    const unsigned char *pf2 =f_img[y_f-3]+xm_f;
834    const unsigned char *pf3 =f_img[y_f-2]+xm_f;
835    const unsigned char *pf4 =f_img[y_f-1]+xm_f;
836    const unsigned char *pf5 =f_img[y_f  ]+xm_f;
837    const unsigned char *pf6 =f_img[y_f+1]+xm_f;
838    const unsigned char *pf7 =f_img[y_f+2]+xm_f;
839    const unsigned char *pf8 =f_img[y_f+3]+xm_f;
840    const unsigned char *pf9 =f_img[y_f+4]+xm_f;
841    const unsigned char *pf10=f_img[y_f+5]+xm_f;
842
843    /* pixel mask */
844    const unsigned char pm[16] = {
845        0xFF,0xFF,
846        0xFF,0xFF,
847        0xFF,0xFF,
848        0,0,0,0,0,
849        0,0,0,0,0};
850    const unsigned char * pm_p = pm;
851
852    _asm
853    {
854        mov         ecx,patch   /* load patch pointer */
855        mov         ebx, pm_p   /* load pixel mask pointer */
856        movdqu      xmm1,[ebx]  /* load pixel mask */
857
858        pxor        xmm5,xmm5   /* set xmm5 to 0 accumulator for sum squares */
859        pxor        xmm4,xmm4   /* set xmm4 to 0 accumulator for sum */
860        pxor        xmm0,xmm0   /* set xmm0 to 0 */
861
862        /* row 0 */
863        mov         eax,pf0     /* load image pointer */
864        movdqu      xmm7,[eax]  /* load 16 pixels */
865        movdqa      xmm6,xmm7
866
867        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
868        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
869
870        pand        xmm6,xmm1   /* mask out pixels 12-16 */
871
872        movdqa      [ecx+0*22],xmm7 /* move short values to patch */
873        movdqa      [ecx+0*22+16],xmm6  /* move short values to patch */
874
875        paddusw     xmm4,xmm7   /* accumulate sums */
876        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
877        paddd       xmm5,xmm7   /* accumulate sum squares */
878
879        paddw       xmm4,xmm6   /* accumulate sums */
880        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
881        paddd       xmm5,xmm6   /* accumulate sum squares */
882
883        /* row 1 */
884        mov         eax,pf1     /* load image pointer */
885        movdqu      xmm7,[eax]  /* load 16 pixels */
886        movdqa      xmm6,xmm7
887
888        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
889        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
890
891        pand        xmm6,xmm1   /* mask out pixels 12-16 */
892
893        movdqu      [ecx+1*22],xmm7 /* move short values to patch */
894        movdqu      [ecx+1*22+16],xmm6  /* move short values to patch */
895
896        paddusw     xmm4,xmm7   /* accumulate sums */
897        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
898        paddd       xmm5,xmm7   /* accumulate sum squares */
899
900        paddw       xmm4,xmm6   /* accumulate sums */
901        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
902        paddd       xmm5,xmm6   /* accumulate sum squares */
903
904        /* row 2 */
905        mov         eax,pf2     /* load image pointer */
906        movdqu      xmm7,[eax]  /* load 16 pixels */
907        movdqa      xmm6,xmm7
908
909        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
910        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
911
912        pand        xmm6,xmm1   /* mask out pixels 12-16 */
913
914        movdqu      [ecx+2*22],xmm7 /* move short values to patch */
915        movdqu      [ecx+2*22+16],xmm6  /* move short values to patch */
916
917        paddusw     xmm4,xmm7   /* accumulate sums */
918        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
919        paddd       xmm5,xmm7   /* accumulate sum squares */
920
921        paddw       xmm4,xmm6   /* accumulate sums */
922        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
923        paddd       xmm5,xmm6   /* accumulate sum squares */
924
925        /* row 3 */
926        mov         eax,pf3     /* load image pointer */
927        movdqu      xmm7,[eax]  /* load 16 pixels */
928        movdqa      xmm6,xmm7
929
930        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
931        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
932
933        pand        xmm6,xmm1   /* mask out pixels 12-16 */
934
935        movdqu      [ecx+3*22],xmm7 /* move short values to patch */
936        movdqu      [ecx+3*22+16],xmm6  /* move short values to patch */
937
938        paddusw     xmm4,xmm7   /* accumulate sums */
939        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
940        paddd       xmm5,xmm7   /* accumulate sum squares */
941
942        paddw       xmm4,xmm6   /* accumulate sums */
943        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
944        paddd       xmm5,xmm6   /* accumulate sum squares */
945
946        /* row 4 */
947        mov         eax,pf4     /* load image pointer */
948        movdqu      xmm7,[eax]  /* load 16 pixels */
949        movdqa      xmm6,xmm7
950
951        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
952        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
953
954        pand        xmm6,xmm1   /* mask out pixels 12-16 */
955
956        movdqu      [ecx+4*22],xmm7 /* move short values to patch */
957        movdqu      [ecx+4*22+16],xmm6  /* move short values to patch */
958
959        paddusw     xmm4,xmm7   /* accumulate sums */
960        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
961        paddd       xmm5,xmm7   /* accumulate sum squares */
962
963        paddw       xmm4,xmm6   /* accumulate sums */
964        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
965        paddd       xmm5,xmm6   /* accumulate sum squares */
966
967        /* row 5 */
968        mov         eax,pf5     /* load image pointer */
969        movdqu      xmm7,[eax]  /* load 16 pixels */
970        movdqa      xmm6,xmm7
971
972        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
973        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
974
975        pand        xmm6,xmm1   /* mask out pixels 12-16 */
976
977        movdqu      [ecx+5*22],xmm7 /* move short values to patch */
978        movdqu      [ecx+5*22+16],xmm6  /* move short values to patch */
979
980        paddusw     xmm4,xmm7   /* accumulate sums */
981        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
982        paddd       xmm5,xmm7   /* accumulate sum squares */
983
984        paddw       xmm4,xmm6   /* accumulate sums */
985        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
986        paddd       xmm5,xmm6   /* accumulate sum squares */
987
988        /* row 6 */
989        mov         eax,pf6     /* load image pointer */
990        movdqu      xmm7,[eax]  /* load 16 pixels */
991        movdqa      xmm6,xmm7
992
993        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
994        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
995
996        pand        xmm6,xmm1   /* mask out pixels 12-16 */
997
998        movdqu      [ecx+6*22],xmm7 /* move short values to patch */
999        movdqu      [ecx+6*22+16],xmm6  /* move short values to patch */
1000
1001        paddusw     xmm4,xmm7   /* accumulate sums */
1002        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
1003        paddd       xmm5,xmm7   /* accumulate sum squares */
1004
1005        paddw       xmm4,xmm6   /* accumulate sums */
1006        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
1007        paddd       xmm5,xmm6   /* accumulate sum squares */
1008
1009        /* row 7 */
1010        mov         eax,pf7     /* load image pointer */
1011        movdqu      xmm7,[eax]  /* load 16 pixels */
1012        movdqa      xmm6,xmm7
1013
1014        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
1015        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
1016
1017        pand        xmm6,xmm1   /* mask out pixels 12-16 */
1018
1019        movdqu      [ecx+7*22],xmm7 /* move short values to patch */
1020        movdqu      [ecx+7*22+16],xmm6  /* move short values to patch */
1021
1022        paddusw     xmm4,xmm7   /* accumulate sums */
1023        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
1024        paddd       xmm5,xmm7   /* accumulate sum squares */
1025
1026        paddw       xmm4,xmm6   /* accumulate sums */
1027        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
1028        paddd       xmm5,xmm6   /* accumulate sum squares */
1029
1030        /* row 8 */
1031        mov         eax,pf8     /* load image pointer */
1032        movdqu      xmm7,[eax]  /* load 16 pixels */
1033        movdqa      xmm6,xmm7
1034
1035        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
1036        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
1037
1038        pand        xmm6,xmm1   /* mask out pixels 12-16 */
1039
1040        movdqa      [ecx+8*22],xmm7 /* move short values to patch */
1041        movdqa      [ecx+8*22+16],xmm6  /* move short values to patch */
1042
1043        paddusw     xmm4,xmm7   /* accumulate sums */
1044        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
1045        paddd       xmm5,xmm7   /* accumulate sum squares */
1046
1047        paddw       xmm4,xmm6   /* accumulate sums */
1048        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
1049        paddd       xmm5,xmm6   /* accumulate sum squares */
1050
1051        /* row 9 */
1052        mov         eax,pf9     /* load image pointer */
1053        movdqu      xmm7,[eax]  /* load 16 pixels */
1054        movdqa      xmm6,xmm7
1055
1056        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
1057        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
1058
1059        pand        xmm6,xmm1   /* mask out pixels 12-16 */
1060
1061        movdqu      [ecx+9*22],xmm7 /* move short values to patch */
1062        movdqu      [ecx+9*22+16],xmm6  /* move short values to patch */
1063
1064        paddusw     xmm4,xmm7   /* accumulate sums */
1065        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
1066        paddd       xmm5,xmm7   /* accumulate sum squares */
1067
1068        paddw       xmm4,xmm6   /* accumulate sums */
1069        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
1070        paddd       xmm5,xmm6   /* accumulate sum squares */
1071
1072        /* row 10 */
1073        mov         eax,pf10    /* load image pointer */
1074        movdqu      xmm7,[eax]  /* load 16 pixels */
1075        movdqa      xmm6,xmm7
1076
1077        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
1078        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
1079
1080        pand        xmm6,xmm1   /* mask out pixels 12-16 */
1081
1082        movdqu      [ecx+10*22],xmm7    /* move short values to patch */
1083        movdqu      [ecx+10*22+16],xmm6 /* move short values to patch */
1084
1085        paddusw     xmm4,xmm7   /* accumulate sums */
1086        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
1087        paddd       xmm5,xmm7   /* accumulate sum squares */
1088
1089        paddw       xmm4,xmm6   /* accumulate sums */
1090        pmaddwd     xmm6,xmm6   /* multiply 16 bit ints and add into 32 bit ints */
1091        paddd       xmm5,xmm6   /* accumulate sum squares */
1092
1093        /* add up the sum squares */
1094        movhlps     xmm0,xmm5   /* high half to low half */
1095        paddd       xmm5,xmm0   /* add high to low */
1096        pshuflw     xmm0,xmm5, 0xE /* reshuffle */
1097        paddd       xmm5,xmm0   /* add remaining */
1098        movd        f2sum,xmm5
1099
1100        /* add up the sum */
1101        movhlps     xmm0,xmm4
1102        paddw       xmm4,xmm0   /* halves added */
1103        pshuflw     xmm0,xmm4,0xE
1104        paddw       xmm4,xmm0   /* quarters added */
1105        pshuflw     xmm0,xmm4,0x1
1106        paddw       xmm4,xmm0   /* eighth added */
1107        movd        fsum, xmm4
1108
1109        emms
1110    }
1111
1112    fsum = fsum & 0xFFFF;
1113
1114    patch[126] = 0;
1115    patch[127] = 0;
1116#endif /* DB_USE_SSE2 */
1117
1118    *sum= (float) fsum;
1119    den=(121.0f*f2sum-fsum*fsum);
1120    *recip= (float)((den!=0.0)?1.0/den:0.0);
1121}
1122
1123void AffineWarpPointOffset(float &r_w,float &c_w,double Hinv[9],int r,int c)
1124{
1125    r_w=(float)(Hinv[3]*c+Hinv[4]*r);
1126    c_w=(float)(Hinv[0]*c+Hinv[1]*r);
1127}
1128
1129
1130
1131/*!
1132Prewarp the patches with given affine transform. For a given homogeneous point "x", "H*x" is
1133the warped point and for any displacement "d" in the warped image resulting in point "y", the
1134corresponding point in the original image is given by "Hinv*y", which can be simplified for affine H.
1135If "affine" is 1, then nearest neighbor method is used, else if it is 2, then
1136bilinear method is used.
1137 */
1138inline void db_SignedSquareNormCorr11x11_PreAlign_AffinePatchWarp_u(short *patch,const unsigned char * const *f_img,
1139                                                                    int xi,int yi,float *sum,float *recip,
1140                                                                    const double Hinv[9],int affine)
1141{
1142    float den;
1143    short f;
1144    int f2sum,fsum;
1145
1146    f2sum=0;
1147    fsum=0;
1148
1149    if (affine==1)
1150    {
1151        for (int r=0;r<11;r++){
1152            for (int c=0;c<11;c++){
1153                f=f_img[yi+AffineWarpPoint_NN_LUT_y[r][c]][xi+AffineWarpPoint_NN_LUT_x[r][c]];
1154                f2sum+=f*f;
1155                fsum+=f;
1156                (*patch++)=f;
1157            }
1158        }
1159    }
1160    else if (affine==2)
1161    {
1162        for (int r=0;r<11;r++){
1163            for (int c=0;c<11;c++){
1164                f=db_BilinearInterpolation(yi+AffineWarpPoint_BL_LUT_y[r][c]
1165                ,xi+AffineWarpPoint_BL_LUT_x[r][c],f_img);
1166                f2sum+=f*f;
1167                fsum+=f;
1168                (*patch++)=f;
1169            }
1170        }
1171    }
1172
1173
1174
1175    (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0;
1176    (*patch++)=0; (*patch++)=0;
1177
1178    *sum= (float) fsum;
1179    den=(121.0f*f2sum-fsum*fsum);
1180    *recip= (float)((den!=0.0)?1.0/den:0.0);
1181}
1182
1183
1184inline float db_SignedSquareNormCorr11x11_Post_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g,
1185                                                float fsum_gsum,float f_recip_g_recip)
1186{
1187    unsigned char *pf,*pg;
1188    int fgsum;
1189    float fg_corr;
1190    int xm_f,xm_g;
1191
1192    xm_f=x_f-5;
1193    xm_g=x_g-5;
1194
1195    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
1196    fgsum=(*pf++)*(*pg++);  fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1197    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1198    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1199    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
1200
1201    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
1202    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1203    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1204    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1205    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
1206
1207    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
1208    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1209    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1210    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1211    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
1212
1213    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
1214    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1215    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1216    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1217    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
1218
1219    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
1220    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1221    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1222    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1223    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
1224
1225    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
1226    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1227    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1228    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1229    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
1230
1231    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
1232    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1233    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1234    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1235    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
1236
1237    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
1238    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1239    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1240    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1241    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
1242
1243    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
1244    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1245    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1246    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1247    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
1248
1249    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
1250    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1251    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1252    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1253    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
1254
1255    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
1256    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1257    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1258    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
1259    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
1260
1261    fg_corr=121.0f*fgsum-fsum_gsum;
1262    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
1263    return(-fg_corr*fg_corr*f_recip_g_recip);
1264}
1265
1266float db_SignedSquareNormCorr21x21Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip)
1267{
1268    float fgsum,fg_corr;
1269
1270    fgsum= (float) db_ScalarProduct512_s(f_patch,g_patch);
1271
1272    fg_corr=441.0f*fgsum-fsum_gsum;
1273    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
1274    return(-fg_corr*fg_corr*f_recip_g_recip);
1275}
1276
1277
1278float db_SignedSquareNormCorr11x11Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip)
1279{
1280    float fgsum,fg_corr;
1281
1282    fgsum= (float) db_ScalarProduct128_s(f_patch,g_patch);
1283
1284    fg_corr=121.0f*fgsum-fsum_gsum;
1285    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
1286    return(-fg_corr*fg_corr*f_recip_g_recip);
1287}
1288
1289float db_SignedSquareNormCorr5x5Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip)
1290{
1291    float fgsum,fg_corr;
1292
1293    fgsum= (float) db_ScalarProduct32_s(f_patch,g_patch);
1294
1295    fg_corr=25.0f*fgsum-fsum_gsum;
1296    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
1297    return(-fg_corr*fg_corr*f_recip_g_recip);
1298}
1299
1300
1301inline float db_SignedSquareNormCorr15x15_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
1302{
1303    unsigned char *pf,*pg;
1304    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
1305    int xm_f,xm_g;
1306
1307    xm_f=x_f-7;
1308    xm_g=x_g-7;
1309    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
1310
1311    pf=f_img[y_f-7]+xm_f; pg=g_img[y_g-7]+xm_g;
1312    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1313    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1314    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1315    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1316    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1317    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1318    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1319    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1320    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1321    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1322    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1323    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1324    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1325    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1326    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1327
1328    pf=f_img[y_f-6]+xm_f; pg=g_img[y_g-6]+xm_g;
1329    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1330    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1331    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1332    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1333    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1334    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1335    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1336    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1337    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1338    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1339    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1340    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1341    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1342    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1343    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1344
1345    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
1346    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1347    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1348    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1349    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1350    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1351    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1352    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1353    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1354    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1355    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1356    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1357    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1358    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1359    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1360    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1361
1362    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
1363    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1364    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1365    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1366    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1367    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1368    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1369    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1370    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1371    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1372    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1373    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1374    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1375    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1376    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1377    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1378
1379    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
1380    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1381    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1382    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1383    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1384    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1385    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1386    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1387    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1388    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1389    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1390    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1391    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1392    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1393    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1394    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1395
1396    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
1397    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1398    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1399    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1400    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1401    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1402    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1403    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1404    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1405    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1406    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1407    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1408    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1409    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1410    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1411    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1412
1413    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
1414    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1415    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1416    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1417    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1418    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1419    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1420    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1421    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1422    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1423    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1424    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1425    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1426    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1427    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1428    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1429
1430    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
1431    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1432    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1433    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1434    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1435    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1436    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1437    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1438    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1439    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1440    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1441    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1442    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1443    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1444    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1445    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1446
1447    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
1448    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1449    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1450    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1451    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1452    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1453    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1454    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1455    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1456    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1457    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1458    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1459    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1460    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1461    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1462    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1463
1464    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
1465    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1466    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1467    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1468    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1469    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1470    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1471    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1472    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1473    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1474    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1475    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1476    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1477    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1478    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1479    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1480
1481    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
1482    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1483    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1484    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1485    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1486    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1487    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1488    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1489    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1490    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1491    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1492    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1493    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1494    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1495    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1496    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1497
1498    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
1499    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1500    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1501    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1502    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1503    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1504    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1505    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1506    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1507    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1508    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1509    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1510    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1511    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1512    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1513    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1514
1515    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
1516    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1517    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1518    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1519    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1520    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1521    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1522    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1523    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1524    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1525    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1526    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1527    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1528    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1529    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1530    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1531
1532    pf=f_img[y_f+6]+xm_f; pg=g_img[y_g+6]+xm_g;
1533    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1534    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1535    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1536    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1537    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1538    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1539    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1540    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1541    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1542    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1543    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1544    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1545    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1546    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1547    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1548
1549    pf=f_img[y_f+7]+xm_f; pg=g_img[y_g+7]+xm_g;
1550    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1551    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1552    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1553    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1554    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1555    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1556    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1557    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1558    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1559    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1560    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1561    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1562    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1563    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1564    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1565
1566    fg_corr=225.0f*fgsum-fsum*gsum;
1567    den=(225.0f*f2sum-fsum*fsum)*(225.0f*g2sum-gsum*gsum);
1568    if(den!=0.0)
1569    {
1570        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
1571        return(-fg_corr*fg_corr/den);
1572    }
1573    return(0.0);
1574}
1575
1576inline float db_SignedSquareNormCorr7x7_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
1577{
1578    float f,g,*pf,*pg,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
1579    int xm_f,xm_g;
1580
1581    xm_f=x_f-3;
1582    xm_g=x_g-3;
1583    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
1584
1585    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
1586    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1587    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1588    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1589    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1590    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1591    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1592    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1593
1594    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
1595    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1596    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1597    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1598    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1599    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1600    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1601    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1602
1603    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
1604    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1605    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1606    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1607    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1608    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1609    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1610    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1611
1612    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
1613    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1614    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1615    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1616    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1617    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1618    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1619    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1620
1621    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
1622    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1623    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1624    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1625    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1626    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1627    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1628    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1629
1630    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
1631    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1632    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1633    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1634    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1635    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1636    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1637    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1638
1639    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
1640    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1641    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1642    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1643    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1644    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1645    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1646    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1647
1648    fg_corr=49.0f*fgsum-fsum*gsum;
1649    den=(49.0f*f2sum-fsum*fsum)*(49.0f*g2sum-gsum*gsum);
1650    if(den!=0.0)
1651    {
1652        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
1653        return(-fg_corr*fg_corr/den);
1654    }
1655    return(0.0);
1656}
1657
1658inline float db_SignedSquareNormCorr9x9_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
1659{
1660    float f,g,*pf,*pg,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
1661    int xm_f,xm_g;
1662
1663    xm_f=x_f-4;
1664    xm_g=x_g-4;
1665    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
1666
1667    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
1668    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1669    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1670    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1671    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1672    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1673    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1674    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1675    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1676    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1677
1678    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
1679    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1680    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1681    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1682    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1683    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1684    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1685    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1686    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1687    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1688
1689    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
1690    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1691    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1692    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1693    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1694    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1695    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1696    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1697    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1698    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1699
1700    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
1701    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1702    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1703    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1704    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1705    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1706    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1707    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1708    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1709    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1710
1711    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
1712    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1713    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1714    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1715    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1716    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1717    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1718    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1719    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1720    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1721
1722    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
1723    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1724    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1725    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1726    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1727    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1728    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1729    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1730    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1731    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1732
1733    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
1734    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1735    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1736    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1737    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1738    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1739    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1740    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1741    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1742    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1743
1744    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
1745    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1746    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1747    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1748    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1749    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1750    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1751    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1752    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1753    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1754
1755    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
1756    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1757    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1758    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1759    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1760    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1761    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1762    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1763    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1764    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1765
1766    fg_corr=81.0f*fgsum-fsum*gsum;
1767    den=(81.0f*f2sum-fsum*fsum)*(81.0f*g2sum-gsum*gsum);
1768    if(den!=0.0)
1769    {
1770        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
1771        return(-fg_corr*fg_corr/den);
1772    }
1773    return(0.0);
1774}
1775
1776inline float db_SignedSquareNormCorr11x11_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
1777{
1778    float *pf,*pg;
1779    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
1780    int xm_f,xm_g;
1781
1782    xm_f=x_f-5;
1783    xm_g=x_g-5;
1784    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
1785
1786    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
1787    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1788    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1789    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1790    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1791    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1792    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1793    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1794    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1795    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1796    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1797    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1798
1799    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
1800    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1801    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1802    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1803    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1804    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1805    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1806    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1807    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1808    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1809    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1810    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1811
1812    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
1813    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1814    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1815    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1816    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1817    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1818    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1819    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1820    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1821    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1822    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1823    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1824
1825    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
1826    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1827    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1828    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1829    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1830    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1831    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1832    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1833    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1834    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1835    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1836    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1837
1838    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
1839    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1840    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1841    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1842    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1843    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1844    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1845    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1846    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1847    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1848    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1849    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1850
1851    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
1852    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1853    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1854    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1855    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1856    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1857    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1858    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1859    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1860    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1861    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1862    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1863
1864    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
1865    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1866    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1867    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1868    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1869    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1870    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1871    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1872    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1873    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1874    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1875    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1876
1877    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
1878    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1879    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1880    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1881    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1882    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1883    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1884    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1885    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1886    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1887    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1888    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1889
1890    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
1891    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1892    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1893    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1894    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1895    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1896    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1897    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1898    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1899    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1900    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1901    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1902
1903    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
1904    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1905    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1906    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1907    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1908    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1909    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1910    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1911    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1912    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1913    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1914    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1915
1916    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
1917    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1918    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1919    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1920    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1921    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1922    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1923    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1924    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1925    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1926    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1927    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
1928
1929    fg_corr=121.0f*fgsum-fsum*gsum;
1930    den=(121.0f*f2sum-fsum*fsum)*(121.0f*g2sum-gsum*gsum);
1931    if(den!=0.0)
1932    {
1933        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
1934        return(-fg_corr*fg_corr/den);
1935    }
1936    return(0.0);
1937}
1938
1939inline void db_SignedSquareNormCorr11x11_Pre_f(float **f_img,int x_f,int y_f,float *sum,float *recip)
1940{
1941    float *pf,den;
1942    float f,f2sum,fsum;
1943    int xm_f;
1944
1945    xm_f=x_f-5;
1946
1947    pf=f_img[y_f-5]+xm_f;
1948    f= *pf++; f2sum=f*f;  fsum=f;
1949    f= *pf++; f2sum+=f*f; fsum+=f;
1950    f= *pf++; f2sum+=f*f; fsum+=f;
1951    f= *pf++; f2sum+=f*f; fsum+=f;
1952    f= *pf++; f2sum+=f*f; fsum+=f;
1953    f= *pf++; f2sum+=f*f; fsum+=f;
1954    f= *pf++; f2sum+=f*f; fsum+=f;
1955    f= *pf++; f2sum+=f*f; fsum+=f;
1956    f= *pf++; f2sum+=f*f; fsum+=f;
1957    f= *pf++; f2sum+=f*f; fsum+=f;
1958    f= *pf;   f2sum+=f*f; fsum+=f;
1959
1960    pf=f_img[y_f-4]+xm_f;
1961    f= *pf++; f2sum+=f*f; fsum+=f;
1962    f= *pf++; f2sum+=f*f; fsum+=f;
1963    f= *pf++; f2sum+=f*f; fsum+=f;
1964    f= *pf++; f2sum+=f*f; fsum+=f;
1965    f= *pf++; f2sum+=f*f; fsum+=f;
1966    f= *pf++; f2sum+=f*f; fsum+=f;
1967    f= *pf++; f2sum+=f*f; fsum+=f;
1968    f= *pf++; f2sum+=f*f; fsum+=f;
1969    f= *pf++; f2sum+=f*f; fsum+=f;
1970    f= *pf++; f2sum+=f*f; fsum+=f;
1971    f= *pf;   f2sum+=f*f; fsum+=f;
1972
1973    pf=f_img[y_f-3]+xm_f;
1974    f= *pf++; f2sum+=f*f; fsum+=f;
1975    f= *pf++; f2sum+=f*f; fsum+=f;
1976    f= *pf++; f2sum+=f*f; fsum+=f;
1977    f= *pf++; f2sum+=f*f; fsum+=f;
1978    f= *pf++; f2sum+=f*f; fsum+=f;
1979    f= *pf++; f2sum+=f*f; fsum+=f;
1980    f= *pf++; f2sum+=f*f; fsum+=f;
1981    f= *pf++; f2sum+=f*f; fsum+=f;
1982    f= *pf++; f2sum+=f*f; fsum+=f;
1983    f= *pf++; f2sum+=f*f; fsum+=f;
1984    f= *pf;   f2sum+=f*f; fsum+=f;
1985
1986    pf=f_img[y_f-2]+xm_f;
1987    f= *pf++; f2sum+=f*f; fsum+=f;
1988    f= *pf++; f2sum+=f*f; fsum+=f;
1989    f= *pf++; f2sum+=f*f; fsum+=f;
1990    f= *pf++; f2sum+=f*f; fsum+=f;
1991    f= *pf++; f2sum+=f*f; fsum+=f;
1992    f= *pf++; f2sum+=f*f; fsum+=f;
1993    f= *pf++; f2sum+=f*f; fsum+=f;
1994    f= *pf++; f2sum+=f*f; fsum+=f;
1995    f= *pf++; f2sum+=f*f; fsum+=f;
1996    f= *pf++; f2sum+=f*f; fsum+=f;
1997    f= *pf;   f2sum+=f*f; fsum+=f;
1998
1999    pf=f_img[y_f-1]+xm_f;
2000    f= *pf++; f2sum+=f*f; fsum+=f;
2001    f= *pf++; f2sum+=f*f; fsum+=f;
2002    f= *pf++; f2sum+=f*f; fsum+=f;
2003    f= *pf++; f2sum+=f*f; fsum+=f;
2004    f= *pf++; f2sum+=f*f; fsum+=f;
2005    f= *pf++; f2sum+=f*f; fsum+=f;
2006    f= *pf++; f2sum+=f*f; fsum+=f;
2007    f= *pf++; f2sum+=f*f; fsum+=f;
2008    f= *pf++; f2sum+=f*f; fsum+=f;
2009    f= *pf++; f2sum+=f*f; fsum+=f;
2010    f= *pf;   f2sum+=f*f; fsum+=f;
2011
2012    pf=f_img[y_f]+xm_f;
2013    f= *pf++; f2sum+=f*f; fsum+=f;
2014    f= *pf++; f2sum+=f*f; fsum+=f;
2015    f= *pf++; f2sum+=f*f; fsum+=f;
2016    f= *pf++; f2sum+=f*f; fsum+=f;
2017    f= *pf++; f2sum+=f*f; fsum+=f;
2018    f= *pf++; f2sum+=f*f; fsum+=f;
2019    f= *pf++; f2sum+=f*f; fsum+=f;
2020    f= *pf++; f2sum+=f*f; fsum+=f;
2021    f= *pf++; f2sum+=f*f; fsum+=f;
2022    f= *pf++; f2sum+=f*f; fsum+=f;
2023    f= *pf;   f2sum+=f*f; fsum+=f;
2024
2025    pf=f_img[y_f+1]+xm_f;
2026    f= *pf++; f2sum+=f*f; fsum+=f;
2027    f= *pf++; f2sum+=f*f; fsum+=f;
2028    f= *pf++; f2sum+=f*f; fsum+=f;
2029    f= *pf++; f2sum+=f*f; fsum+=f;
2030    f= *pf++; f2sum+=f*f; fsum+=f;
2031    f= *pf++; f2sum+=f*f; fsum+=f;
2032    f= *pf++; f2sum+=f*f; fsum+=f;
2033    f= *pf++; f2sum+=f*f; fsum+=f;
2034    f= *pf++; f2sum+=f*f; fsum+=f;
2035    f= *pf++; f2sum+=f*f; fsum+=f;
2036    f= *pf;   f2sum+=f*f; fsum+=f;
2037
2038    pf=f_img[y_f+2]+xm_f;
2039    f= *pf++; f2sum+=f*f; fsum+=f;
2040    f= *pf++; f2sum+=f*f; fsum+=f;
2041    f= *pf++; f2sum+=f*f; fsum+=f;
2042    f= *pf++; f2sum+=f*f; fsum+=f;
2043    f= *pf++; f2sum+=f*f; fsum+=f;
2044    f= *pf++; f2sum+=f*f; fsum+=f;
2045    f= *pf++; f2sum+=f*f; fsum+=f;
2046    f= *pf++; f2sum+=f*f; fsum+=f;
2047    f= *pf++; f2sum+=f*f; fsum+=f;
2048    f= *pf++; f2sum+=f*f; fsum+=f;
2049    f= *pf;   f2sum+=f*f; fsum+=f;
2050
2051    pf=f_img[y_f+3]+xm_f;
2052    f= *pf++; f2sum+=f*f; fsum+=f;
2053    f= *pf++; f2sum+=f*f; fsum+=f;
2054    f= *pf++; f2sum+=f*f; fsum+=f;
2055    f= *pf++; f2sum+=f*f; fsum+=f;
2056    f= *pf++; f2sum+=f*f; fsum+=f;
2057    f= *pf++; f2sum+=f*f; fsum+=f;
2058    f= *pf++; f2sum+=f*f; fsum+=f;
2059    f= *pf++; f2sum+=f*f; fsum+=f;
2060    f= *pf++; f2sum+=f*f; fsum+=f;
2061    f= *pf++; f2sum+=f*f; fsum+=f;
2062    f= *pf;   f2sum+=f*f; fsum+=f;
2063
2064    pf=f_img[y_f+4]+xm_f;
2065    f= *pf++; f2sum+=f*f; fsum+=f;
2066    f= *pf++; f2sum+=f*f; fsum+=f;
2067    f= *pf++; f2sum+=f*f; fsum+=f;
2068    f= *pf++; f2sum+=f*f; fsum+=f;
2069    f= *pf++; f2sum+=f*f; fsum+=f;
2070    f= *pf++; f2sum+=f*f; fsum+=f;
2071    f= *pf++; f2sum+=f*f; fsum+=f;
2072    f= *pf++; f2sum+=f*f; fsum+=f;
2073    f= *pf++; f2sum+=f*f; fsum+=f;
2074    f= *pf++; f2sum+=f*f; fsum+=f;
2075    f= *pf;   f2sum+=f*f; fsum+=f;
2076
2077    pf=f_img[y_f+5]+xm_f;
2078    f= *pf++; f2sum+=f*f; fsum+=f;
2079    f= *pf++; f2sum+=f*f; fsum+=f;
2080    f= *pf++; f2sum+=f*f; fsum+=f;
2081    f= *pf++; f2sum+=f*f; fsum+=f;
2082    f= *pf++; f2sum+=f*f; fsum+=f;
2083    f= *pf++; f2sum+=f*f; fsum+=f;
2084    f= *pf++; f2sum+=f*f; fsum+=f;
2085    f= *pf++; f2sum+=f*f; fsum+=f;
2086    f= *pf++; f2sum+=f*f; fsum+=f;
2087    f= *pf++; f2sum+=f*f; fsum+=f;
2088    f= *pf;   f2sum+=f*f; fsum+=f;
2089
2090    *sum=fsum;
2091    den=(121.0f*f2sum-fsum*fsum);
2092    *recip= (float) ((den!=0.0)?1.0/den:0.0);
2093}
2094
2095inline void db_SignedSquareNormCorr11x11_PreAlign_f(float *patch,const float * const *f_img,int x_f,int y_f,float *sum,float *recip)
2096{
2097    const float *pf;
2098    float den,f,f2sum,fsum;
2099    int xm_f;
2100
2101    xm_f=x_f-5;
2102
2103    pf=f_img[y_f-5]+xm_f;
2104    f= *pf++; f2sum=f*f;  fsum=f;  (*patch++)=f;
2105    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2106    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2107    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2108    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2109    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2110    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2111    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2112    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2113    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2114    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
2115
2116    pf=f_img[y_f-4]+xm_f;
2117    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2118    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2119    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2120    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2121    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2122    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2123    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2124    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2125    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2126    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2127    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
2128
2129    pf=f_img[y_f-3]+xm_f;
2130    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2131    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2132    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2133    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2134    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2135    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2136    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2137    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2138    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2139    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2140    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
2141
2142    pf=f_img[y_f-2]+xm_f;
2143    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2144    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2145    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2146    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2147    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2148    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2149    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2150    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2151    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2152    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2153    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
2154
2155    pf=f_img[y_f-1]+xm_f;
2156    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2157    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2158    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2159    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2160    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2161    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2162    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2163    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2164    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2165    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2166    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
2167
2168    pf=f_img[y_f]+xm_f;
2169    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2170    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2171    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2172    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2173    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2174    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2175    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2176    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2177    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2178    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2179    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
2180
2181    pf=f_img[y_f+1]+xm_f;
2182    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2183    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2184    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2185    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2186    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2187    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2188    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2189    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2190    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2191    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2192    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
2193
2194    pf=f_img[y_f+2]+xm_f;
2195    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2196    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2197    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2198    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2199    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2200    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2201    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2202    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2203    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2204    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2205    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
2206
2207    pf=f_img[y_f+3]+xm_f;
2208    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2209    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2210    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2211    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2212    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2213    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2214    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2215    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2216    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2217    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2218    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
2219
2220    pf=f_img[y_f+4]+xm_f;
2221    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2222    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2223    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2224    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2225    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2226    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2227    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2228    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2229    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2230    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2231    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
2232
2233    pf=f_img[y_f+5]+xm_f;
2234    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2235    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2236    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2237    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2238    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2239    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2240    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2241    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2242    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2243    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
2244    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
2245
2246    (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0;
2247    (*patch++)=0.0; (*patch++)=0.0;
2248
2249    *sum=fsum;
2250    den=(121.0f*f2sum-fsum*fsum);
2251    *recip= (float) ((den!=0.0)?1.0/den:0.0);
2252}
2253
2254inline float db_SignedSquareNormCorr11x11_Post_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g,
2255                                                float fsum_gsum,float f_recip_g_recip)
2256{
2257    float *pf,*pg;
2258    float fgsum,fg_corr;
2259    int xm_f,xm_g;
2260
2261    xm_f=x_f-5;
2262    xm_g=x_g-5;
2263
2264    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
2265    fgsum=(*pf++)*(*pg++);  fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2266    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2267    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2268    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
2269
2270    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
2271    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2272    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2273    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2274    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
2275
2276    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
2277    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2278    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2279    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2280    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
2281
2282    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
2283    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2284    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2285    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2286    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
2287
2288    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
2289    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2290    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2291    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2292    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
2293
2294    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
2295    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2296    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2297    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2298    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
2299
2300    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
2301    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2302    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2303    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2304    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
2305
2306    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
2307    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2308    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2309    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2310    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
2311
2312    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
2313    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2314    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2315    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2316    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
2317
2318    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
2319    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2320    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2321    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2322    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
2323
2324    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
2325    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2326    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2327    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
2328    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
2329
2330    fg_corr=121.0f*fgsum-fsum_gsum;
2331    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
2332    return(-fg_corr*fg_corr*f_recip_g_recip);
2333}
2334
2335inline float db_SignedSquareNormCorr11x11Aligned_Post_f(const float *f_patch,const float *g_patch,float fsum_gsum,float f_recip_g_recip)
2336{
2337    float fgsum,fg_corr;
2338
2339    fgsum=db_ScalarProduct128Aligned16_f(f_patch,g_patch);
2340
2341    fg_corr=121.0f*fgsum-fsum_gsum;
2342    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
2343    return(-fg_corr*fg_corr*f_recip_g_recip);
2344}
2345
2346inline float db_SignedSquareNormCorr15x15_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
2347{
2348    float *pf,*pg;
2349    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
2350    int xm_f,xm_g;
2351
2352    xm_f=x_f-7;
2353    xm_g=x_g-7;
2354    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
2355
2356    pf=f_img[y_f-7]+xm_f; pg=g_img[y_g-7]+xm_g;
2357    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2358    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2359    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2360    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2361    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2362    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2363    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2364    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2365    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2366    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2367    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2368    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2369    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2370    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2371    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2372
2373    pf=f_img[y_f-6]+xm_f; pg=g_img[y_g-6]+xm_g;
2374    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2375    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2376    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2377    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2378    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2379    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2380    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2381    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2382    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2383    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2384    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2385    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2386    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2387    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2388    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2389
2390    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
2391    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2392    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2393    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2394    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2395    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2396    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2397    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2398    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2399    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2400    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2401    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2402    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2403    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2404    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2405    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2406
2407    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
2408    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2409    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2410    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2411    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2412    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2413    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2414    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2415    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2416    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2417    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2418    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2419    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2420    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2421    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2422    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2423
2424    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
2425    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2426    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2427    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2428    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2429    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2430    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2431    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2432    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2433    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2434    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2435    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2436    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2437    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2438    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2439    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2440
2441    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
2442    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2443    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2444    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2445    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2446    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2447    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2448    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2449    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2450    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2451    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2452    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2453    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2454    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2455    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2456    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2457
2458    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
2459    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2460    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2461    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2462    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2463    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2464    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2465    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2466    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2467    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2468    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2469    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2470    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2471    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2472    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2473    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2474
2475    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
2476    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2477    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2478    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2479    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2480    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2481    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2482    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2483    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2484    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2485    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2486    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2487    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2488    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2489    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2490    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2491
2492    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
2493    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2494    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2495    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2496    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2497    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2498    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2499    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2500    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2501    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2502    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2503    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2504    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2505    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2506    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2507    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2508
2509    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
2510    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2511    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2512    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2513    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2514    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2515    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2516    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2517    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2518    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2519    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2520    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2521    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2522    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2523    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2524    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2525
2526    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
2527    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2528    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2529    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2530    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2531    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2532    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2533    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2534    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2535    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2536    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2537    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2538    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2539    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2540    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2541    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2542
2543    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
2544    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2545    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2546    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2547    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2548    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2549    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2550    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2551    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2552    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2553    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2554    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2555    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2556    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2557    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2558    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2559
2560    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
2561    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2562    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2563    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2564    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2565    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2566    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2567    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2568    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2569    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2570    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2571    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2572    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2573    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2574    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2575    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2576
2577    pf=f_img[y_f+6]+xm_f; pg=g_img[y_g+6]+xm_g;
2578    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2579    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2580    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2581    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2582    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2583    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2584    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2585    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2586    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2587    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2588    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2589    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2590    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2591    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2592    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2593
2594    pf=f_img[y_f+7]+xm_f; pg=g_img[y_g+7]+xm_g;
2595    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2596    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2597    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2598    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2599    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2600    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2601    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2602    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2603    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2604    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2605    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2606    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2607    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2608    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2609    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
2610
2611    fg_corr=225.0f*fgsum-fsum*gsum;
2612    den=(225.0f*f2sum-fsum*fsum)*(225.0f*g2sum-gsum*gsum);
2613    if(den!=0.0)
2614    {
2615        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
2616        return(-fg_corr*fg_corr/den);
2617    }
2618    return(0.0);
2619}
2620
2621db_Bucket_f** db_AllocBuckets_f(int nr_h,int nr_v,int bd)
2622{
2623    int i,j;
2624    db_Bucket_f **bp,*b;
2625
2626    b=new db_Bucket_f [(nr_h+2)*(nr_v+2)];
2627    bp=new db_Bucket_f* [(nr_v+2)];
2628    bp=bp+1;
2629    for(i= -1;i<=nr_v;i++)
2630    {
2631        bp[i]=b+1+(nr_h+2)*(i+1);
2632        for(j= -1;j<=nr_h;j++)
2633        {
2634            bp[i][j].ptr=new db_PointInfo_f [bd];
2635        }
2636    }
2637
2638    return(bp);
2639}
2640
2641db_Bucket_u** db_AllocBuckets_u(int nr_h,int nr_v,int bd)
2642{
2643    int i,j;
2644    db_Bucket_u **bp,*b;
2645
2646    b=new db_Bucket_u [(nr_h+2)*(nr_v+2)];
2647    bp=new db_Bucket_u* [(nr_v+2)];
2648    bp=bp+1;
2649    for(i= -1;i<=nr_v;i++)
2650    {
2651        bp[i]=b+1+(nr_h+2)*(i+1);
2652        for(j= -1;j<=nr_h;j++)
2653        {
2654            bp[i][j].ptr=new db_PointInfo_u [bd];
2655        }
2656    }
2657
2658    return(bp);
2659}
2660
2661void db_FreeBuckets_f(db_Bucket_f **bp,int nr_h,int nr_v)
2662{
2663    int i,j;
2664
2665    for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++)
2666    {
2667        delete [] bp[i][j].ptr;
2668    }
2669    delete [] (bp[-1]-1);
2670    delete [] (bp-1);
2671}
2672
2673void db_FreeBuckets_u(db_Bucket_u **bp,int nr_h,int nr_v)
2674{
2675    int i,j;
2676
2677    for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++)
2678    {
2679        delete [] bp[i][j].ptr;
2680    }
2681    delete [] (bp[-1]-1);
2682    delete [] (bp-1);
2683}
2684
2685void db_EmptyBuckets_f(db_Bucket_f **bp,int nr_h,int nr_v)
2686{
2687    int i,j;
2688    for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++) bp[i][j].nr=0;
2689}
2690
2691void db_EmptyBuckets_u(db_Bucket_u **bp,int nr_h,int nr_v)
2692{
2693    int i,j;
2694    for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++) bp[i][j].nr=0;
2695}
2696
2697float* db_FillBuckets_f(float *patch_space,const float * const *f_img,db_Bucket_f **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners)
2698{
2699    int i,xi,yi,xpos,ypos,nr;
2700    db_Bucket_f *br;
2701    db_PointInfo_f *pir;
2702
2703    db_EmptyBuckets_f(bp,nr_h,nr_v);
2704    for(i=0;i<nr_corners;i++)
2705    {
2706        xi=(int) x[i];
2707        yi=(int) y[i];
2708        xpos=xi/bw;
2709        ypos=yi/bh;
2710        if(xpos>=0 && xpos<nr_h && ypos>=0 && ypos<nr_v)
2711        {
2712            br=&bp[ypos][xpos];
2713            nr=br->nr;
2714            if(nr<bd)
2715            {
2716                pir=&(br->ptr[nr]);
2717                pir->x=xi;
2718                pir->y=yi;
2719                pir->id=i;
2720                pir->pir=0;
2721                pir->patch=patch_space;
2722                br->nr=nr+1;
2723
2724                db_SignedSquareNormCorr11x11_PreAlign_f(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
2725                patch_space+=128;
2726            }
2727        }
2728    }
2729    return(patch_space);
2730}
2731
2732short* db_FillBuckets_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners,int use_smaller_matching_window, int use_21)
2733{
2734    int i,xi,yi,xpos,ypos,nr;
2735    db_Bucket_u *br;
2736    db_PointInfo_u *pir;
2737
2738    db_EmptyBuckets_u(bp,nr_h,nr_v);
2739    for(i=0;i<nr_corners;i++)
2740    {
2741        xi=(int)db_roundi(x[i]);
2742        yi=(int)db_roundi(y[i]);
2743        xpos=xi/bw;
2744        ypos=yi/bh;
2745        if(xpos>=0 && xpos<nr_h && ypos>=0 && ypos<nr_v)
2746        {
2747            br=&bp[ypos][xpos];
2748            nr=br->nr;
2749            if(nr<bd)
2750            {
2751                pir=&(br->ptr[nr]);
2752                pir->x=xi;
2753                pir->y=yi;
2754                pir->id=i;
2755                pir->pir=0;
2756                pir->patch=patch_space;
2757                br->nr=nr+1;
2758
2759                if(use_21)
2760                {
2761                    db_SignedSquareNormCorr21x21_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
2762                    patch_space+=512;
2763                }
2764                else
2765                {
2766                if(!use_smaller_matching_window)
2767                {
2768                    db_SignedSquareNormCorr11x11_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
2769                    patch_space+=128;
2770                }
2771                else
2772                {
2773                    db_SignedSquareNormCorr5x5_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
2774                    patch_space+=32;
2775                }
2776                }
2777            }
2778        }
2779    }
2780    return(patch_space);
2781}
2782
2783
2784
2785float* db_FillBucketsPrewarped_f(float *patch_space,const float *const *f_img,db_Bucket_f **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners,const double H[9])
2786{
2787    int i,xi,yi,xpos,ypos,nr,wxi,wyi;
2788    db_Bucket_f *br;
2789    db_PointInfo_f *pir;
2790    double xd[2],wx[2];
2791
2792    db_EmptyBuckets_f(bp,nr_h,nr_v);
2793    for(i=0;i<nr_corners;i++)
2794    {
2795        xd[0]=x[i];
2796        xd[1]=y[i];
2797        xi=(int) xd[0];
2798        yi=(int) xd[1];
2799        db_ImageHomographyInhomogenous(wx,H,xd);
2800        wxi=(int) wx[0];
2801        wyi=(int) wx[1];
2802
2803        xpos=((wxi+bw)/bw)-1;
2804        ypos=((wyi+bh)/bh)-1;
2805        if(xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v)
2806        {
2807            br=&bp[ypos][xpos];
2808            nr=br->nr;
2809            if(nr<bd)
2810            {
2811                pir=&(br->ptr[nr]);
2812                pir->x=wxi;
2813                pir->y=wyi;
2814                pir->id=i;
2815                pir->pir=0;
2816                pir->patch=patch_space;
2817                br->nr=nr+1;
2818
2819                db_SignedSquareNormCorr11x11_PreAlign_f(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
2820                patch_space+=128;
2821            }
2822        }
2823    }
2824    return(patch_space);
2825}
2826
2827short* db_FillBucketsPrewarped_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp,
2828                                 int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,
2829                                 int nr_corners,const double H[9])
2830{
2831    int i,xi,yi,xpos,ypos,nr,wxi,wyi;
2832    db_Bucket_u *br;
2833    db_PointInfo_u *pir;
2834    double xd[2],wx[2];
2835
2836    db_EmptyBuckets_u(bp,nr_h,nr_v);
2837    for(i=0;i<nr_corners;i++)
2838    {
2839        xd[0]=x[i];
2840        xd[1]=y[i];
2841        xi=(int) db_roundi(xd[0]);
2842        yi=(int) db_roundi(xd[1]);
2843        db_ImageHomographyInhomogenous(wx,H,xd);
2844        wxi=(int) wx[0];
2845        wyi=(int) wx[1];
2846
2847        xpos=((wxi+bw)/bw)-1;
2848        ypos=((wyi+bh)/bh)-1;
2849        if(xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v)
2850        {
2851            br=&bp[ypos][xpos];
2852            nr=br->nr;
2853            if(nr<bd)
2854            {
2855                pir=&(br->ptr[nr]);
2856                pir->x=wxi;
2857                pir->y=wyi;
2858                pir->id=i;
2859                pir->pir=0;
2860                pir->patch=patch_space;
2861                br->nr=nr+1;
2862
2863                db_SignedSquareNormCorr11x11_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
2864                patch_space+=128;
2865            }
2866        }
2867    }
2868    return(patch_space);
2869}
2870
2871
2872
2873short* db_FillBucketsPrewarpedAffine_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp,
2874                                 int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,
2875                                 int nr_corners,const double H[9],const double Hinv[9],const int warpboundsp[4],
2876                                 int affine)
2877{
2878    int i,xi,yi,xpos,ypos,nr,wxi,wyi;
2879    db_Bucket_u *br;
2880    db_PointInfo_u *pir;
2881    double xd[2],wx[2];
2882
2883    db_EmptyBuckets_u(bp,nr_h,nr_v);
2884    for(i=0;i<nr_corners;i++)
2885    {
2886        xd[0]=x[i];
2887        xd[1]=y[i];
2888        xi=(int) db_roundi(xd[0]);
2889        yi=(int) db_roundi(xd[1]);
2890        db_ImageHomographyInhomogenous(wx,H,xd);
2891        wxi=(int) wx[0];
2892        wyi=(int) wx[1];
2893
2894        xpos=((wxi+bw)/bw)-1;
2895        ypos=((wyi+bh)/bh)-1;
2896
2897
2898        if (xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v)
2899        {
2900            if( xi>warpboundsp[0] && xi<warpboundsp[1] && yi>warpboundsp[2] && yi<warpboundsp[3])
2901            {
2902
2903                br=&bp[ypos][xpos];
2904                nr=br->nr;
2905                if(nr<bd)
2906                {
2907                    pir=&(br->ptr[nr]);
2908                    pir->x=wxi;
2909                    pir->y=wyi;
2910                    pir->id=i;
2911                    pir->pir=0;
2912                    pir->patch=patch_space;
2913                    br->nr=nr+1;
2914
2915                    db_SignedSquareNormCorr11x11_PreAlign_AffinePatchWarp_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip),Hinv,affine);
2916                    patch_space+=128;
2917                }
2918            }
2919        }
2920    }
2921    return(patch_space);
2922}
2923
2924
2925
2926inline void db_MatchPointPair_f(db_PointInfo_f *pir_l,db_PointInfo_f *pir_r,
2927                            unsigned long kA,unsigned long kB)
2928{
2929    int x_l,y_l,x_r,y_r,xm,ym;
2930    double score;
2931
2932    x_l=pir_l->x;
2933    y_l=pir_l->y;
2934    x_r=pir_r->x;
2935    y_r=pir_r->y;
2936    xm=x_l-x_r;
2937    ym=y_l-y_r;
2938    /*Check if disparity is within the maximum disparity
2939    with the formula xm^2*256+ym^2*kA<kB
2940    where kA=256*w^2/h^2
2941    and   kB=256*max_disp^2*w^2*/
2942    if(((xm*xm)<<8)+ym*ym*kA<kB)
2943    {
2944        /*Correlate*/
2945        score=db_SignedSquareNormCorr11x11Aligned_Post_f(pir_l->patch,pir_r->patch,
2946            (pir_l->sum)*(pir_r->sum),
2947            (pir_l->recip)*(pir_r->recip));
2948
2949        if((!(pir_l->pir)) || (score>pir_l->s))
2950        {
2951            /*Update left corner*/
2952            pir_l->s=score;
2953            pir_l->pir=pir_r;
2954        }
2955        if((!(pir_r->pir)) || (score>pir_r->s))
2956        {
2957            /*Update right corner*/
2958            pir_r->s=score;
2959            pir_r->pir=pir_l;
2960        }
2961    }
2962}
2963
2964inline void db_MatchPointPair_u(db_PointInfo_u *pir_l,db_PointInfo_u *pir_r,
2965                            unsigned long kA,unsigned long kB, unsigned int rect_window,bool use_smaller_matching_window, int use_21)
2966{
2967    int xm,ym;
2968    double score;
2969    bool compute_score;
2970
2971
2972    if( rect_window )
2973        compute_score = ((unsigned)db_absi(pir_l->x - pir_r->x)<kA && (unsigned)db_absi(pir_l->y - pir_r->y)<kB);
2974    else
2975    {   /*Check if disparity is within the maximum disparity
2976        with the formula xm^2*256+ym^2*kA<kB
2977        where kA=256*w^2/h^2
2978        and   kB=256*max_disp^2*w^2*/
2979        xm= pir_l->x - pir_r->x;
2980        ym= pir_l->y - pir_r->y;
2981        compute_score = ((xm*xm)<<8)+ym*ym*kA < kB;
2982    }
2983
2984    if ( compute_score )
2985    {
2986        if(use_21)
2987        {
2988            score=db_SignedSquareNormCorr21x21Aligned_Post_s(pir_l->patch,pir_r->patch,
2989                (pir_l->sum)*(pir_r->sum),
2990                (pir_l->recip)*(pir_r->recip));
2991        }
2992        else
2993        {
2994        /*Correlate*/
2995        if(!use_smaller_matching_window)
2996        {
2997            score=db_SignedSquareNormCorr11x11Aligned_Post_s(pir_l->patch,pir_r->patch,
2998                (pir_l->sum)*(pir_r->sum),
2999                (pir_l->recip)*(pir_r->recip));
3000        }
3001        else
3002        {
3003            score=db_SignedSquareNormCorr5x5Aligned_Post_s(pir_l->patch,pir_r->patch,
3004                (pir_l->sum)*(pir_r->sum),
3005                (pir_l->recip)*(pir_r->recip));
3006        }
3007        }
3008
3009        if((!(pir_l->pir)) || (score>pir_l->s))
3010        {
3011            /*Update left corner*/
3012            pir_l->s=score;
3013            pir_l->pir=pir_r;
3014        }
3015        if((!(pir_r->pir)) || (score>pir_r->s))
3016        {
3017            /*Update right corner*/
3018            pir_r->s=score;
3019            pir_r->pir=pir_l;
3020        }
3021    }
3022}
3023
3024inline void db_MatchPointAgainstBucket_f(db_PointInfo_f *pir_l,db_Bucket_f *b_r,
3025                                       unsigned long kA,unsigned long kB)
3026{
3027    int p_r,nr;
3028    db_PointInfo_f *pir_r;
3029
3030    nr=b_r->nr;
3031    pir_r=b_r->ptr;
3032    for(p_r=0;p_r<nr;p_r++) db_MatchPointPair_f(pir_l,pir_r+p_r,kA,kB);
3033}
3034
3035inline void db_MatchPointAgainstBucket_u(db_PointInfo_u *pir_l,db_Bucket_u *b_r,
3036                                       unsigned long kA,unsigned long kB,int rect_window, bool use_smaller_matching_window, int use_21)
3037{
3038    int p_r,nr;
3039    db_PointInfo_u *pir_r;
3040
3041    nr=b_r->nr;
3042    pir_r=b_r->ptr;
3043
3044    for(p_r=0;p_r<nr;p_r++) db_MatchPointPair_u(pir_l,pir_r+p_r,kA,kB, rect_window, use_smaller_matching_window, use_21);
3045
3046}
3047
3048void db_MatchBuckets_f(db_Bucket_f **bp_l,db_Bucket_f **bp_r,int nr_h,int nr_v,
3049                     unsigned long kA,unsigned long kB)
3050{
3051    int i,j,k,a,b,br_nr;
3052    db_Bucket_f *br;
3053    db_PointInfo_f *pir_l;
3054
3055    /*For all buckets*/
3056    for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
3057    {
3058        br=&bp_l[i][j];
3059        br_nr=br->nr;
3060        /*For all points in bucket*/
3061        for(k=0;k<br_nr;k++)
3062        {
3063            pir_l=br->ptr+k;
3064            for(a=i-1;a<=i+1;a++)
3065            {
3066                for(b=j-1;b<=j+1;b++)
3067                {
3068                    db_MatchPointAgainstBucket_f(pir_l,&bp_r[a][b],kA,kB);
3069                }
3070            }
3071        }
3072    }
3073}
3074
3075void db_MatchBuckets_u(db_Bucket_u **bp_l,db_Bucket_u **bp_r,int nr_h,int nr_v,
3076                     unsigned long kA,unsigned long kB,int rect_window,bool use_smaller_matching_window, int use_21)
3077{
3078    int i,j,k,a,b,br_nr;
3079    db_Bucket_u *br;
3080    db_PointInfo_u *pir_l;
3081
3082    /*For all buckets*/
3083    for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
3084    {
3085        br=&bp_l[i][j];
3086        br_nr=br->nr;
3087        /*For all points in bucket*/
3088        for(k=0;k<br_nr;k++)
3089        {
3090            pir_l=br->ptr+k;
3091            for(a=i-1;a<=i+1;a++)
3092            {
3093                for(b=j-1;b<=j+1;b++)
3094                {
3095                    db_MatchPointAgainstBucket_u(pir_l,&bp_r[a][b],kA,kB,rect_window,use_smaller_matching_window, use_21);
3096                }
3097            }
3098        }
3099    }
3100}
3101
3102void db_CollectMatches_f(db_Bucket_f **bp_l,int nr_h,int nr_v,unsigned long target,int *id_l,int *id_r,int *nr_matches)
3103{
3104    int i,j,k,br_nr;
3105    unsigned long count;
3106    db_Bucket_f *br;
3107    db_PointInfo_f *pir,*pir2;
3108
3109    count=0;
3110    /*For all buckets*/
3111    for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
3112    {
3113        br=&bp_l[i][j];
3114        br_nr=br->nr;
3115        /*For all points in bucket*/
3116        for(k=0;k<br_nr;k++)
3117        {
3118            pir=br->ptr+k;
3119            pir2=pir->pir;
3120            if(pir2)
3121            {
3122                /*This point has a best match*/
3123                if((pir2->pir)==pir)
3124                {
3125                    /*We have a mutually consistent match*/
3126                    if(count<target)
3127                    {
3128                        id_l[count]=pir->id;
3129                        id_r[count]=pir2->id;
3130                        count++;
3131                    }
3132                }
3133            }
3134        }
3135    }
3136    *nr_matches=count;
3137}
3138
3139void db_CollectMatches_u(db_Bucket_u **bp_l,int nr_h,int nr_v,unsigned long target,int *id_l,int *id_r,int *nr_matches)
3140{
3141    int i,j,k,br_nr;
3142    unsigned long count;
3143    db_Bucket_u *br;
3144    db_PointInfo_u *pir,*pir2;
3145
3146    count=0;
3147    /*For all buckets*/
3148    for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
3149    {
3150        br=&bp_l[i][j];
3151        br_nr=br->nr;
3152        /*For all points in bucket*/
3153        for(k=0;k<br_nr;k++)
3154        {
3155            pir=br->ptr+k;
3156            pir2=pir->pir;
3157            if(pir2)
3158            {
3159                /*This point has a best match*/
3160                if((pir2->pir)==pir)
3161                {
3162                    /*We have a mutually consistent match*/
3163                    if(count<target)
3164                    {
3165                        id_l[count]=pir->id;
3166                        id_r[count]=pir2->id;
3167                        count++;
3168                    }
3169                }
3170            }
3171        }
3172    }
3173    *nr_matches=count;
3174}
3175
3176db_Matcher_f::db_Matcher_f()
3177{
3178    m_w=0; m_h=0;
3179}
3180
3181db_Matcher_f::~db_Matcher_f()
3182{
3183    Clean();
3184}
3185
3186void db_Matcher_f::Clean()
3187{
3188    if(m_w)
3189    {
3190        /*Free buckets*/
3191        db_FreeBuckets_f(m_bp_l,m_nr_h,m_nr_v);
3192        db_FreeBuckets_f(m_bp_r,m_nr_h,m_nr_v);
3193        /*Free space for patch layouts*/
3194        delete [] m_patch_space;
3195    }
3196    m_w=0; m_h=0;
3197}
3198
3199unsigned long db_Matcher_f::Init(int im_width,int im_height,double max_disparity,int target_nr_corners)
3200{
3201    Clean();
3202    m_w=im_width;
3203    m_h=im_height;
3204    m_bw=db_maxi(1,(int) (max_disparity*((double)im_width)));
3205    m_bh=db_maxi(1,(int) (max_disparity*((double)im_height)));
3206    m_nr_h=1+(im_width-1)/m_bw;
3207    m_nr_v=1+(im_height-1)/m_bh;
3208    m_bd=db_maxi(1,(int)(((double)target_nr_corners)*
3209        max_disparity*max_disparity));
3210    m_target=target_nr_corners;
3211    m_kA=(long)(256.0*((double)(m_w*m_w))/((double)(m_h*m_h)));
3212    m_kB=(long)(256.0*max_disparity*max_disparity*((double)(m_w*m_w)));
3213
3214    /*Alloc bucket structure*/
3215    m_bp_l=db_AllocBuckets_f(m_nr_h,m_nr_v,m_bd);
3216    m_bp_r=db_AllocBuckets_f(m_nr_h,m_nr_v,m_bd);
3217
3218    /*Alloc 16byte-aligned space for patch layouts*/
3219    m_patch_space=new float [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*128+16];
3220    m_aligned_patch_space=db_AlignPointer_f(m_patch_space,16);
3221
3222    return(m_target);
3223}
3224
3225void db_Matcher_f::Match(const float * const *l_img,const float * const *r_img,
3226        const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r,
3227        int *id_l,int *id_r,int *nr_matches,const double H[9])
3228{
3229    float *ps;
3230
3231    /*Insert the corners into bucket structure*/
3232    ps=db_FillBuckets_f(m_aligned_patch_space,l_img,m_bp_l,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_l,y_l,nr_l);
3233    if(H==0) db_FillBuckets_f(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r);
3234    else db_FillBucketsPrewarped_f(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,H);
3235
3236    /*Compute all the necessary match scores*/
3237    db_MatchBuckets_f(m_bp_l,m_bp_r,m_nr_h,m_nr_v,m_kA,m_kB);
3238
3239    /*Collect the correspondences*/
3240    db_CollectMatches_f(m_bp_l,m_nr_h,m_nr_v,m_target,id_l,id_r,nr_matches);
3241}
3242
3243db_Matcher_u::db_Matcher_u()
3244{
3245    m_w=0; m_h=0;
3246    m_rect_window = 0;
3247    m_bw=m_bh=m_nr_h=m_nr_v=m_bd=m_target=0;
3248    m_bp_l=m_bp_r=0;
3249    m_patch_space=m_aligned_patch_space=0;
3250}
3251
3252db_Matcher_u::db_Matcher_u(const db_Matcher_u& cm)
3253{
3254    Init(cm.m_w, cm.m_h, cm.m_max_disparity, cm.m_target, cm.m_max_disparity_v);
3255}
3256
3257db_Matcher_u& db_Matcher_u::operator= (const db_Matcher_u& cm)
3258{
3259    if ( this == &cm ) return *this;
3260    Init(cm.m_w, cm.m_h, cm.m_max_disparity, cm.m_target, cm.m_max_disparity_v);
3261    return *this;
3262}
3263
3264
3265db_Matcher_u::~db_Matcher_u()
3266{
3267    Clean();
3268}
3269
3270void db_Matcher_u::Clean()
3271{
3272    if(m_w)
3273    {
3274        /*Free buckets*/
3275        db_FreeBuckets_u(m_bp_l,m_nr_h,m_nr_v);
3276        db_FreeBuckets_u(m_bp_r,m_nr_h,m_nr_v);
3277        /*Free space for patch layouts*/
3278        delete [] m_patch_space;
3279    }
3280    m_w=0; m_h=0;
3281}
3282
3283
3284unsigned long db_Matcher_u::Init(int im_width,int im_height,double max_disparity,int target_nr_corners,
3285                                 double max_disparity_v, bool use_smaller_matching_window, int use_21)
3286{
3287    Clean();
3288    m_w=im_width;
3289    m_h=im_height;
3290    m_max_disparity=max_disparity;
3291    m_max_disparity_v=max_disparity_v;
3292
3293    if ( max_disparity_v != DB_DEFAULT_NO_DISPARITY )
3294    {
3295        m_rect_window = 1;
3296
3297        m_bw=db_maxi(1,(int)(max_disparity*((double)im_width)));
3298        m_bh=db_maxi(1,(int)(max_disparity_v*((double)im_height)));
3299
3300        m_bd=db_maxi(1,(int)(((double)target_nr_corners)*max_disparity*max_disparity_v));
3301
3302        m_kA=(int)(max_disparity*m_w);
3303        m_kB=(int)(max_disparity_v*m_h);
3304
3305    } else
3306    {
3307        m_bw=(int)db_maxi(1,(int)(max_disparity*((double)im_width)));
3308        m_bh=(int)db_maxi(1,(int)(max_disparity*((double)im_height)));
3309
3310        m_bd=db_maxi(1,(int)(((double)target_nr_corners)*max_disparity*max_disparity));
3311
3312        m_kA=(long)(256.0*((double)(m_w*m_w))/((double)(m_h*m_h)));
3313        m_kB=(long)(256.0*max_disparity*max_disparity*((double)(m_w*m_w)));
3314    }
3315
3316    m_nr_h=1+(im_width-1)/m_bw;
3317    m_nr_v=1+(im_height-1)/m_bh;
3318
3319    m_target=target_nr_corners;
3320
3321    /*Alloc bucket structure*/
3322    m_bp_l=db_AllocBuckets_u(m_nr_h,m_nr_v,m_bd);
3323    m_bp_r=db_AllocBuckets_u(m_nr_h,m_nr_v,m_bd);
3324
3325    m_use_smaller_matching_window = use_smaller_matching_window;
3326    m_use_21 = use_21;
3327
3328    if(m_use_21)
3329    {
3330        /*Alloc 64byte-aligned space for patch layouts*/
3331        m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*512+64];
3332        m_aligned_patch_space=db_AlignPointer_s(m_patch_space,64);
3333    }
3334    else
3335    {
3336    if(!m_use_smaller_matching_window)
3337    {
3338        /*Alloc 16byte-aligned space for patch layouts*/
3339        m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*128+16];
3340        m_aligned_patch_space=db_AlignPointer_s(m_patch_space,16);
3341    }
3342    else
3343    {
3344        /*Alloc 4byte-aligned space for patch layouts*/
3345        m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*32+4];
3346        m_aligned_patch_space=db_AlignPointer_s(m_patch_space,4);
3347    }
3348    }
3349
3350    return(m_target);
3351}
3352
3353void db_Matcher_u::Match(const unsigned char * const *l_img,const unsigned char * const *r_img,
3354        const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r,
3355        int *id_l,int *id_r,int *nr_matches,const double H[9],int affine)
3356{
3357    short *ps;
3358
3359    /*Insert the corners into bucket structure*/
3360    ps=db_FillBuckets_u(m_aligned_patch_space,l_img,m_bp_l,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_l,y_l,nr_l,m_use_smaller_matching_window,m_use_21);
3361    if(H==0)
3362        db_FillBuckets_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,m_use_smaller_matching_window,m_use_21);
3363    else
3364    {
3365        if (affine)
3366        {
3367            double Hinv[9];
3368            db_InvertAffineTransform(Hinv,H);
3369            float r_w, c_w;
3370            float stretch_x[2];
3371            float stretch_y[2];
3372            AffineWarpPointOffset(r_w,c_w,Hinv, 5,5);
3373            stretch_x[0]=db_absf(c_w);stretch_y[0]=db_absf(r_w);
3374            AffineWarpPointOffset(r_w,c_w,Hinv, 5,-5);
3375            stretch_x[1]=db_absf(c_w);stretch_y[1]=db_absf(r_w);
3376            int max_stretxh_x=(int) (db_maxd(stretch_x[0],stretch_x[1]));
3377            int max_stretxh_y=(int) (db_maxd(stretch_y[0],stretch_y[1]));
3378            int warpbounds[4]={max_stretxh_x,m_w-1-max_stretxh_x,max_stretxh_y,m_h-1-max_stretxh_y};
3379
3380            for (int r=-5;r<=5;r++){
3381                for (int c=-5;c<=5;c++){
3382                    AffineWarpPointOffset(r_w,c_w,Hinv,r,c);
3383                    AffineWarpPoint_BL_LUT_y[r+5][c+5]=r_w;
3384                    AffineWarpPoint_BL_LUT_x[r+5][c+5]=c_w;
3385
3386                    AffineWarpPoint_NN_LUT_y[r+5][c+5]=db_roundi(r_w);
3387                    AffineWarpPoint_NN_LUT_x[r+5][c+5]=db_roundi(c_w);
3388
3389                }
3390            }
3391
3392            db_FillBucketsPrewarpedAffine_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,
3393                x_r,y_r,nr_r,H,Hinv,warpbounds,affine);
3394        }
3395        else
3396            db_FillBucketsPrewarped_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,H);
3397    }
3398
3399
3400    /*Compute all the necessary match scores*/
3401    db_MatchBuckets_u(m_bp_l,m_bp_r,m_nr_h,m_nr_v,m_kA,m_kB, m_rect_window,m_use_smaller_matching_window,m_use_21);
3402
3403    /*Collect the correspondences*/
3404    db_CollectMatches_u(m_bp_l,m_nr_h,m_nr_v,m_target,id_l,id_r,nr_matches);
3405}
3406
3407int db_Matcher_u::IsAllocated()
3408{
3409    return (int)(m_w != 0);
3410}
3411