dbreg.h revision e295e32b68cf04f0d99138bf4a6d25555f3aef99
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#pragma once
19
20#ifdef _WIN32
21#ifdef DBREG_EXPORTS
22#define DBREG_API __declspec(dllexport)
23#else
24#define DBREG_API __declspec(dllimport)
25#endif
26#else
27#define DBREG_API
28#endif
29
30// @jke - the next few lines are for extracting timing data.  TODO: Remove after test
31#define PROFILE 0
32
33#include "dbstabsmooth.h"
34
35#include <db_feature_detection.h>
36#include <db_feature_matching.h>
37#include <db_rob_image_homography.h>
38
39#if PROFILE
40    #include <sys/time.h>
41#endif
42
43/*! \mainpage db_FrameToReferenceRegistration
44
45 \section intro Introduction
46
47 db_FrameToReferenceRegistration provides a simple interface to a set of sophisticated algorithms for stabilizing
48 video sequences.  As its name suggests, the class is used to compute parameters that will allow us to warp incoming video
49 frames and register them with respect to a so-called <i>reference</i> frame.  The reference frame is simply the first
50 frame of a sequence; the registration process is that of estimating the parameters of a warp that can be applied to
51 subsequent frames to make those frames align with the reference.  A video made up of these warped frames will be more
52 stable than the input video.
53
54 For more technical information on the internal structure of the algorithms used within the db_FrameToRegistration class,
55 please follow this <a href="../Sarnoff image registration.docx">link</a>.
56
57 \section usage Usage
58 In addition to the class constructor, there are two main functions of db_FrameToReferenceRegistration that are of
59 interest to the programmer.  db_FrameToReferenceRegistration::Init(...) is used to initialize the parameters of the
60 registration algorithm. db_FrameToReferenceRegistration::AddFrame(...) is the method by which each new video frame
61 is introduced to the registration algorithm, and produces the estimated registration warp parameters.
62
63 The following example illustrates how the major methods of the class db_FrameToReferenceRegistration can be used together
64 to calculate the registration parameters for an image sequence.  In the example, the calls to the methods of
65 db_FrameToReferenceRegistration match those found in the API, but supporting code should be considered pseudo-code.
66 For a more complete example, please consult the source code for dbregtest.
67
68
69    \code
70    // feature-based image registration class:
71    db_FrameToReferenceRegistration reg;
72
73    // Image data
74    const unsigned char * const * image_storage;
75
76    // The 3x3 frame to reference registration parameters
77    double frame_to_ref_homography[9];
78
79    // a counter to count the number of frames processed.
80    unsigned long frame_counter;
81    // ...
82
83    // main loop - keep going while there are images to process.
84    while (ImagesAreAvailable)
85    {
86        // Call functions to place latest data into image_storage
87        // ...
88
89        // if the registration object is not yet initialized, then do so
90        // The arguments to this function are explained in the accompanying
91        // html API documentation
92        if (!reg.Initialized())
93        {
94            reg.Init(w,h,motion_model_type,25,linear_polish,quarter_resolution,
95                   DB_POINT_STANDARDDEV,reference_update_period,
96                   do_motion_smoothing,motion_smoothing_gain,
97                   DB_DEFAULT_NR_SAMPLES,DB_DEFAULT_CHUNK_SIZE,
98                   nr_corners,max_disparity);
99        }
100
101        // Present the new image data to the registration algorithm,
102        // with the result being stored in the frame_to_ref_homography
103        // variable.
104        reg.AddFrame(image_storage,frame_to_ref_homography);
105
106        // frame_to_ref_homography now contains the stabilizing transform
107        // use this to warp the latest image for display, etc.
108
109        // if this is the first frame, we need to tell the registration
110        // class to store the image as its reference.  Otherwise, AddFrame
111        // takes care of that.
112        if (frame_counter == 0)
113        {
114            reg.UpdateReference(image_storage);
115        }
116
117        // increment the frame counter
118        frame_counter++;
119    }
120
121    \endcode
122
123 */
124
125/*!
126 * Performs feature-based frame to reference image registration.
127 */
128class DBREG_API db_FrameToReferenceRegistration
129{
130public:
131    db_FrameToReferenceRegistration(void);
132    ~db_FrameToReferenceRegistration();
133
134    /*!
135     * Set parameters and allocate memory. Note: The default values of these parameters have been set to the values used for the android implementation (i.e. the demo APK).
136     * \param width         image width
137     * \param height        image height
138     * \param homography_type see definitions in \ref LMRobImageHomography
139     * \param max_iterations    max number of polishing steps
140     * \param linear_polish     whether to perform a linear polishing step after RANSAC
141     * \param quarter_resolution    whether to process input images at quarter resolution (for computational efficiency)
142     * \param scale         Cauchy scale coefficient (see db_ExpCauchyReprojectionError() )
143     * \param reference_update_period   how often to update the alignment reference (in units of number of frames)
144     * \param do_motion_smoothing   whether to perform display reference smoothing
145     * \param motion_smoothing_gain weight factor to reflect how fast the display reference must follow the current frame if motion smoothing is enabled
146     * \param nr_samples        number of times to compute a hypothesis
147     * \param chunk_size        size of cost chunks
148     * \param cd_target_nr_corners  target number of corners for corner detector
149     * \param cm_max_disparity      maximum disparity search range for corner matcher (in units of ratio of image width)
150     * \param cm_use_smaller_matching_window    if set to true, uses a correlation window of 5x5 instead of the default 11x11
151     * \param cd_nr_horz_blocks     the number of horizontal blocks for the corner detector to partition the image
152     * \param cd_nr_vert_blocks     the number of vertical blocks for the corner detector to partition the image
153    */
154    void Init(int width, int height,
155          int       homography_type = DB_HOMOGRAPHY_TYPE_DEFAULT,
156          int       max_iterations = DB_DEFAULT_MAX_ITERATIONS,
157          bool      linear_polish = false,
158          bool   quarter_resolution = true,
159          double  scale = DB_POINT_STANDARDDEV,
160          unsigned int reference_update_period = 3,
161          bool   do_motion_smoothing = false,
162          double motion_smoothing_gain = 0.75,
163          int   nr_samples = DB_DEFAULT_NR_SAMPLES,
164          int   chunk_size = DB_DEFAULT_CHUNK_SIZE,
165          int    cd_target_nr_corners = 500,
166          double cm_max_disparity = 0.2,
167          bool   cm_use_smaller_matching_window = false,
168          int    cd_nr_horz_blocks = 5,
169          int    cd_nr_vert_blocks = 5);
170
171    /*!
172     * Reset the transformation type that is being use to perform alignment. Use this to change the alignment type at run time.
173     * \param homography_type   the type of transformation to use for performing alignment (see definitions in \ref LMRobImageHomography)
174    */
175    void ResetHomographyType(int homography_type) { m_homography_type = homography_type; }
176
177    /*!
178     * Enable/Disable motion smoothing. Use this to turn motion smoothing on/off at run time.
179     * \param enable    flag indicating whether to turn the motion smoothing on or off.
180    */
181    void ResetSmoothing(bool enable) { m_do_motion_smoothing = enable; }
182
183    /*!
184     * Align an inspection image to an existing reference image, update the reference image if due and perform motion smoothing if enabled.
185     * \param im                new inspection image
186     * \param H             computed transformation from reference to inspection coordinate frame. Identity is returned if no reference frame was set.
187     * \param force_reference   make this the new reference image
188     */
189    int AddFrame(const unsigned char * const * im, double H[9], bool force_reference=false, bool prewarp=false);
190
191    /*!
192     * Returns true if Init() was run.
193     */
194    bool Initialized() const { return m_initialized; }
195
196    /*!
197     * Returns true if the current frame is being used as the alignment reference.
198    */
199    bool IsCurrentReference() const { return m_current_is_reference; }
200
201    /*!
202     * Returns true if we need to call UpdateReference now.
203     */
204    bool NeedReferenceUpdate();
205
206    /*!
207     * Returns the pointer reference to the alignment reference image data
208    */
209    unsigned char ** GetReferenceImage() { return m_reference_image; }
210
211    /*!
212     * Returns the pointer reference to the double array containing the homogeneous coordinates for the matched reference image corners.
213    */
214    double * GetRefCorners() { return m_corners_ref; }
215    /*!
216     * Returns the pointer reference to the double array containing the homogeneous coordinates for the matched inspection image corners.
217    */
218    double * GetInsCorners() { return m_corners_ins; }
219    /*!
220     * Returns the number of correspondences between the reference and inspection images.
221    */
222    int GetNrMatches() { return m_nr_matches; }
223
224    /*!
225     * Returns the pointer to an array of indices that were found to be RANSAC inliers from the matched corner lists.
226    */
227    int* GetInliers() { return m_inlier_indices; }
228
229    /*!
230     * Returns the number of inliers from the RANSAC matching step.
231    */
232    int  GetNrInliers() { return m_num_inlier_indices; }
233
234    //std::vector<int>& GetInliers();
235    //void Polish(std::vector<int> &inlier_indices);
236
237    /*!
238     * Perform a linear polishing step by re-estimating the alignment transformation using the RANSAC inliers.
239     * \param inlier_indices    pointer to an array of indices that were found to be RANSAC inliers from the matched corner lists.
240     * \param num_inlier_indices    number of inliers i.e. the length of the array passed as the first argument.
241    */
242    void Polish(int *inlier_indices, int &num_inlier_indices);
243
244    /*!
245     * Reset the motion smoothing parameters to their initial values.
246    */
247    void ResetMotionSmoothingParameters() { m_stab_smoother.Init(); }
248
249    /*!
250     * Update the alignment reference image to the specified image.
251     * \param im    pointer to the image data to be used as the new alignment reference.
252     * \param subsample boolean flag to control whether the function should internally subsample the provided image to the size provided in the Init() function.
253    */
254    int UpdateReference(const unsigned char * const * im, bool subsample = true, bool detect_corners = true);
255
256    /*!
257     * Returns the transformation from the display reference to the alignment reference frame
258    */
259    void Get_H_dref_to_ref(double H[9]);
260    /*!
261     * Returns the transformation from the display reference to the inspection reference frame
262    */
263    void Get_H_dref_to_ins(double H[9]);
264    /*!
265     * Set the transformation from the display reference to the inspection reference frame
266     * \param H the transformation to set
267    */
268    void Set_H_dref_to_ins(double H[9]);
269
270    /*!
271     * Reset the display reference to the current frame.
272    */
273    void ResetDisplayReference();
274
275    /*!
276     * Estimate a secondary motion model starting from the specified transformation.
277     * \param H the primary motion model to start from
278    */
279    void EstimateSecondaryModel(double H[9]);
280
281    /*!
282     *
283    */
284    void SelectOutliers();
285
286    char *profile_string;
287
288protected:
289    void Clean();
290    void GenerateQuarterResImage(const unsigned char* const * im);
291
292    int     m_im_width;
293    int     m_im_height;
294
295    // RANSAC and refinement parameters:
296    int m_homography_type;
297    int     m_max_iterations;
298    double  m_scale;
299    int     m_nr_samples;
300    int     m_chunk_size;
301    double  m_outlier_t2;
302
303    // Whether to fit a linear model to just the inliers at the end
304    bool   m_linear_polish;
305    double m_polish_C[36];
306    double m_polish_D[6];
307
308    // local state
309    bool m_current_is_reference;
310    bool m_initialized;
311
312    // inspection to reference homography:
313    double m_H_ref_to_ins[9];
314    double m_H_dref_to_ref[9];
315
316    // feature extraction and matching:
317    db_CornerDetector_u m_cd;
318    db_Matcher_u        m_cm;
319
320    // length of corner arrays:
321    unsigned long m_max_nr_corners;
322
323    // corner locations of reference image features:
324    double * m_x_corners_ref;
325    double * m_y_corners_ref;
326    int  m_nr_corners_ref;
327
328    // corner locations of inspection image features:
329    double * m_x_corners_ins;
330    double * m_y_corners_ins;
331    int      m_nr_corners_ins;
332
333    // length of match index arrays:
334    unsigned long m_max_nr_matches;
335
336    // match indices:
337    int * m_match_index_ref;
338    int * m_match_index_ins;
339    int   m_nr_matches;
340
341    // pointer to internal copy of the reference image:
342    unsigned char ** m_reference_image;
343
344    // pointer to internal copy of last aligned inspection image:
345    unsigned char ** m_aligned_ins_image;
346
347    // pointer to quarter resolution image, if used.
348    unsigned char** m_quarter_res_image;
349
350    // temporary storage for the quarter resolution image processing
351    unsigned char** m_horz_smooth_subsample_image;
352
353    // temporary space for homography computation:
354    double * m_temp_double;
355    int * m_temp_int;
356
357    // homogenous image point arrays:
358    double * m_corners_ref;
359    double * m_corners_ins;
360
361    // Indices of the points within the match lists
362    int * m_inlier_indices;
363    int m_num_inlier_indices;
364
365    //void ComputeInliers(double H[9], std::vector<int> &inlier_indices);
366    void ComputeInliers(double H[9]);
367
368    // cost arrays:
369    void ComputeCostArray();
370    bool m_sq_cost_computed;
371    double * m_sq_cost;
372
373    // cost histogram:
374    void ComputeCostHistogram();
375    int *m_cost_histogram;
376
377    void SetOutlierThreshold();
378
379    // utility function for smoothing the motion parameters.
380    void SmoothMotion(void);
381
382private:
383    double m_K[9];
384    const int m_over_allocation;
385
386    bool m_reference_set;
387
388    // Maximum number of inliers seen until now w.r.t the current reference frame
389    int m_max_inlier_count;
390
391    // Number of cost histogram bins:
392    int m_nr_bins;
393    // All costs above this threshold get put into the last bin:
394    int m_max_cost_pix;
395
396    // whether to quarter the image resolution for processing, or not
397    bool m_quarter_resolution;
398
399    // the period (in number of frames) for reference update.
400    unsigned int m_reference_update_period;
401
402    // the number of frames processed so far.
403    unsigned int m_nr_frames_processed;
404
405    // smoother for motion transformations
406    db_StabilizationSmoother m_stab_smoother;
407
408    // boolean to control whether motion smoothing occurs (or not)
409    bool m_do_motion_smoothing;
410
411    // double to set the gain for motion smoothing
412    double m_motion_smoothing_gain;
413};
414/*!
415 Create look-up tables to undistort images. Only Bougeut (Matlab toolkit)
416 is currently supported. Can be used with db_WarpImageLut_u().
417 \code
418    xd = H*xs;
419    xd = xd/xd(3);
420 \endcode
421 \param lut_x   pre-allocated float image
422 \param lut_y   pre-allocated float image
423 \param w       width
424 \param h       height
425 \param H       image homography from source to destination
426 */
427inline void db_GenerateHomographyLut(float ** lut_x,float ** lut_y,int w,int h,const double H[9])
428{
429    assert(lut_x && lut_y);
430    double x[3] = {0.0,0.0,1.0};
431    double xb[3];
432
433/*
434    double xl[3];
435
436    // Determine the output coordinate system ROI
437    double Hinv[9];
438    db_InvertAffineTransform(Hinv,H);
439    db_Multiply3x3_3x1(xl, Hinv, x);
440    xl[0] = db_SafeDivision(xl[0],xl[2]);
441    xl[1] = db_SafeDivision(xl[1],xl[2]);
442*/
443
444    for ( int i = 0; i < w; ++i )
445        for ( int j = 0; j < h; ++j )
446        {
447            x[0] = double(i);
448            x[1] = double(j);
449            db_Multiply3x3_3x1(xb, H, x);
450            xb[0] = db_SafeDivision(xb[0],xb[2]);
451            xb[1] = db_SafeDivision(xb[1],xb[2]);
452
453            lut_x[j][i] = float(xb[0]);
454            lut_y[j][i] = float(xb[1]);
455        }
456}
457
458/*!
459 * Perform a look-up table warp for packed RGB ([rgbrgbrgb...]) images.
460 * The LUTs must be float images of the same size as source image.
461 * The source value x_s is determined from destination (x_d,y_d) through lut_x
462 * and y_s is determined from lut_y:
463   \code
464   x_s = lut_x[y_d][x_d];
465   y_s = lut_y[y_d][x_d];
466   \endcode
467
468 * \param src   source image (w*3 by h)
469 * \param dst   destination image (w*3 by h)
470 * \param w     width
471 * \param h     height
472 * \param lut_x LUT for x
473 * \param lut_y LUT for y
474 */
475inline void db_WarpImageLutFast_rgb(const unsigned char * const * src, unsigned char ** dst, int w, int h,
476                                  const float * const * lut_x, const float * const * lut_y)
477{
478    assert(src && dst);
479    int xd=0, yd=0;
480
481    for ( int i = 0; i < w; ++i )
482        for ( int j = 0; j < h; ++j )
483        {
484            xd = static_cast<unsigned int>(lut_x[j][i]);
485            yd = static_cast<unsigned int>(lut_y[j][i]);
486            if ( xd >= w || yd >= h ||
487                 xd < 0 || yd < 0)
488            {
489                dst[j][3*i  ] = 0;
490                dst[j][3*i+1] = 0;
491                dst[j][3*i+2] = 0;
492            }
493            else
494            {
495                dst[j][3*i  ] = src[yd][3*xd  ];
496                dst[j][3*i+1] = src[yd][3*xd+1];
497                dst[j][3*i+2] = src[yd][3*xd+2];
498            }
499        }
500}
501
502inline unsigned char db_BilinearInterpolationRGB(double y, double x, const unsigned char * const * v, int offset)
503{
504         int floor_x=(int) x;
505         int floor_y=(int) y;
506
507         int ceil_x=floor_x+1;
508         int ceil_y=floor_y+1;
509
510         unsigned char f00 = v[floor_y][3*floor_x+offset];
511         unsigned char f01 = v[floor_y][3*ceil_x+offset];
512         unsigned char f10 = v[ceil_y][3*floor_x+offset];
513         unsigned char f11 = v[ceil_y][3*ceil_x+offset];
514
515         double xl = x-floor_x;
516         double yl = y-floor_y;
517
518         return (unsigned char)(f00*(1-yl)*(1-xl) + f10*yl*(1-xl) + f01*(1-yl)*xl + f11*yl*xl);
519}
520
521inline void db_WarpImageLutBilinear_rgb(const unsigned char * const * src, unsigned char ** dst, int w, int h,
522                                  const float * const * lut_x, const float * const * lut_y)
523{
524    assert(src && dst);
525    double xd=0.0, yd=0.0;
526
527    for ( int i = 0; i < w; ++i )
528        for ( int j = 0; j < h; ++j )
529        {
530            xd = static_cast<double>(lut_x[j][i]);
531            yd = static_cast<double>(lut_y[j][i]);
532            if ( xd > w-2 || yd > h-2 ||
533                 xd < 0.0 || yd < 0.0)
534            {
535                dst[j][3*i  ] = 0;
536                dst[j][3*i+1] = 0;
537                dst[j][3*i+2] = 0;
538            }
539            else
540            {
541                dst[j][3*i  ] = db_BilinearInterpolationRGB(yd,xd,src,0);
542                dst[j][3*i+1] = db_BilinearInterpolationRGB(yd,xd,src,1);
543                dst[j][3*i+2] = db_BilinearInterpolationRGB(yd,xd,src,2);
544            }
545        }
546}
547
548inline double SquaredInhomogenousHomographyError(double y[3],double H[9],double x[3]){
549    double x0,x1,x2,mult;
550    double sd;
551
552    x0=H[0]*x[0]+H[1]*x[1]+H[2];
553    x1=H[3]*x[0]+H[4]*x[1]+H[5];
554    x2=H[6]*x[0]+H[7]*x[1]+H[8];
555    mult=1.0/((x2!=0.0)?x2:1.0);
556    sd=(y[0]-x0*mult)*(y[0]-x0*mult)+(y[1]-x1*mult)*(y[1]-x1*mult);
557
558    return(sd);
559}
560
561
562// functions related to profiling
563#if PROFILE
564
565/* return current time in milliseconds */
566static double
567now_ms(void)
568{
569    //struct timespec res;
570    struct timeval res;
571    //clock_gettime(CLOCK_REALTIME, &res);
572    gettimeofday(&res, NULL);
573    return 1000.0*res.tv_sec + (double)res.tv_usec/1e3;
574}
575
576#endif
577