dbreg.h revision e295e32b68cf04f0d99138bf4a6d25555f3aef99
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18#pragma once 19 20#ifdef _WIN32 21#ifdef DBREG_EXPORTS 22#define DBREG_API __declspec(dllexport) 23#else 24#define DBREG_API __declspec(dllimport) 25#endif 26#else 27#define DBREG_API 28#endif 29 30// @jke - the next few lines are for extracting timing data. TODO: Remove after test 31#define PROFILE 0 32 33#include "dbstabsmooth.h" 34 35#include <db_feature_detection.h> 36#include <db_feature_matching.h> 37#include <db_rob_image_homography.h> 38 39#if PROFILE 40 #include <sys/time.h> 41#endif 42 43/*! \mainpage db_FrameToReferenceRegistration 44 45 \section intro Introduction 46 47 db_FrameToReferenceRegistration provides a simple interface to a set of sophisticated algorithms for stabilizing 48 video sequences. As its name suggests, the class is used to compute parameters that will allow us to warp incoming video 49 frames and register them with respect to a so-called <i>reference</i> frame. The reference frame is simply the first 50 frame of a sequence; the registration process is that of estimating the parameters of a warp that can be applied to 51 subsequent frames to make those frames align with the reference. A video made up of these warped frames will be more 52 stable than the input video. 53 54 For more technical information on the internal structure of the algorithms used within the db_FrameToRegistration class, 55 please follow this <a href="../Sarnoff image registration.docx">link</a>. 56 57 \section usage Usage 58 In addition to the class constructor, there are two main functions of db_FrameToReferenceRegistration that are of 59 interest to the programmer. db_FrameToReferenceRegistration::Init(...) is used to initialize the parameters of the 60 registration algorithm. db_FrameToReferenceRegistration::AddFrame(...) is the method by which each new video frame 61 is introduced to the registration algorithm, and produces the estimated registration warp parameters. 62 63 The following example illustrates how the major methods of the class db_FrameToReferenceRegistration can be used together 64 to calculate the registration parameters for an image sequence. In the example, the calls to the methods of 65 db_FrameToReferenceRegistration match those found in the API, but supporting code should be considered pseudo-code. 66 For a more complete example, please consult the source code for dbregtest. 67 68 69 \code 70 // feature-based image registration class: 71 db_FrameToReferenceRegistration reg; 72 73 // Image data 74 const unsigned char * const * image_storage; 75 76 // The 3x3 frame to reference registration parameters 77 double frame_to_ref_homography[9]; 78 79 // a counter to count the number of frames processed. 80 unsigned long frame_counter; 81 // ... 82 83 // main loop - keep going while there are images to process. 84 while (ImagesAreAvailable) 85 { 86 // Call functions to place latest data into image_storage 87 // ... 88 89 // if the registration object is not yet initialized, then do so 90 // The arguments to this function are explained in the accompanying 91 // html API documentation 92 if (!reg.Initialized()) 93 { 94 reg.Init(w,h,motion_model_type,25,linear_polish,quarter_resolution, 95 DB_POINT_STANDARDDEV,reference_update_period, 96 do_motion_smoothing,motion_smoothing_gain, 97 DB_DEFAULT_NR_SAMPLES,DB_DEFAULT_CHUNK_SIZE, 98 nr_corners,max_disparity); 99 } 100 101 // Present the new image data to the registration algorithm, 102 // with the result being stored in the frame_to_ref_homography 103 // variable. 104 reg.AddFrame(image_storage,frame_to_ref_homography); 105 106 // frame_to_ref_homography now contains the stabilizing transform 107 // use this to warp the latest image for display, etc. 108 109 // if this is the first frame, we need to tell the registration 110 // class to store the image as its reference. Otherwise, AddFrame 111 // takes care of that. 112 if (frame_counter == 0) 113 { 114 reg.UpdateReference(image_storage); 115 } 116 117 // increment the frame counter 118 frame_counter++; 119 } 120 121 \endcode 122 123 */ 124 125/*! 126 * Performs feature-based frame to reference image registration. 127 */ 128class DBREG_API db_FrameToReferenceRegistration 129{ 130public: 131 db_FrameToReferenceRegistration(void); 132 ~db_FrameToReferenceRegistration(); 133 134 /*! 135 * Set parameters and allocate memory. Note: The default values of these parameters have been set to the values used for the android implementation (i.e. the demo APK). 136 * \param width image width 137 * \param height image height 138 * \param homography_type see definitions in \ref LMRobImageHomography 139 * \param max_iterations max number of polishing steps 140 * \param linear_polish whether to perform a linear polishing step after RANSAC 141 * \param quarter_resolution whether to process input images at quarter resolution (for computational efficiency) 142 * \param scale Cauchy scale coefficient (see db_ExpCauchyReprojectionError() ) 143 * \param reference_update_period how often to update the alignment reference (in units of number of frames) 144 * \param do_motion_smoothing whether to perform display reference smoothing 145 * \param motion_smoothing_gain weight factor to reflect how fast the display reference must follow the current frame if motion smoothing is enabled 146 * \param nr_samples number of times to compute a hypothesis 147 * \param chunk_size size of cost chunks 148 * \param cd_target_nr_corners target number of corners for corner detector 149 * \param cm_max_disparity maximum disparity search range for corner matcher (in units of ratio of image width) 150 * \param cm_use_smaller_matching_window if set to true, uses a correlation window of 5x5 instead of the default 11x11 151 * \param cd_nr_horz_blocks the number of horizontal blocks for the corner detector to partition the image 152 * \param cd_nr_vert_blocks the number of vertical blocks for the corner detector to partition the image 153 */ 154 void Init(int width, int height, 155 int homography_type = DB_HOMOGRAPHY_TYPE_DEFAULT, 156 int max_iterations = DB_DEFAULT_MAX_ITERATIONS, 157 bool linear_polish = false, 158 bool quarter_resolution = true, 159 double scale = DB_POINT_STANDARDDEV, 160 unsigned int reference_update_period = 3, 161 bool do_motion_smoothing = false, 162 double motion_smoothing_gain = 0.75, 163 int nr_samples = DB_DEFAULT_NR_SAMPLES, 164 int chunk_size = DB_DEFAULT_CHUNK_SIZE, 165 int cd_target_nr_corners = 500, 166 double cm_max_disparity = 0.2, 167 bool cm_use_smaller_matching_window = false, 168 int cd_nr_horz_blocks = 5, 169 int cd_nr_vert_blocks = 5); 170 171 /*! 172 * Reset the transformation type that is being use to perform alignment. Use this to change the alignment type at run time. 173 * \param homography_type the type of transformation to use for performing alignment (see definitions in \ref LMRobImageHomography) 174 */ 175 void ResetHomographyType(int homography_type) { m_homography_type = homography_type; } 176 177 /*! 178 * Enable/Disable motion smoothing. Use this to turn motion smoothing on/off at run time. 179 * \param enable flag indicating whether to turn the motion smoothing on or off. 180 */ 181 void ResetSmoothing(bool enable) { m_do_motion_smoothing = enable; } 182 183 /*! 184 * Align an inspection image to an existing reference image, update the reference image if due and perform motion smoothing if enabled. 185 * \param im new inspection image 186 * \param H computed transformation from reference to inspection coordinate frame. Identity is returned if no reference frame was set. 187 * \param force_reference make this the new reference image 188 */ 189 int AddFrame(const unsigned char * const * im, double H[9], bool force_reference=false, bool prewarp=false); 190 191 /*! 192 * Returns true if Init() was run. 193 */ 194 bool Initialized() const { return m_initialized; } 195 196 /*! 197 * Returns true if the current frame is being used as the alignment reference. 198 */ 199 bool IsCurrentReference() const { return m_current_is_reference; } 200 201 /*! 202 * Returns true if we need to call UpdateReference now. 203 */ 204 bool NeedReferenceUpdate(); 205 206 /*! 207 * Returns the pointer reference to the alignment reference image data 208 */ 209 unsigned char ** GetReferenceImage() { return m_reference_image; } 210 211 /*! 212 * Returns the pointer reference to the double array containing the homogeneous coordinates for the matched reference image corners. 213 */ 214 double * GetRefCorners() { return m_corners_ref; } 215 /*! 216 * Returns the pointer reference to the double array containing the homogeneous coordinates for the matched inspection image corners. 217 */ 218 double * GetInsCorners() { return m_corners_ins; } 219 /*! 220 * Returns the number of correspondences between the reference and inspection images. 221 */ 222 int GetNrMatches() { return m_nr_matches; } 223 224 /*! 225 * Returns the pointer to an array of indices that were found to be RANSAC inliers from the matched corner lists. 226 */ 227 int* GetInliers() { return m_inlier_indices; } 228 229 /*! 230 * Returns the number of inliers from the RANSAC matching step. 231 */ 232 int GetNrInliers() { return m_num_inlier_indices; } 233 234 //std::vector<int>& GetInliers(); 235 //void Polish(std::vector<int> &inlier_indices); 236 237 /*! 238 * Perform a linear polishing step by re-estimating the alignment transformation using the RANSAC inliers. 239 * \param inlier_indices pointer to an array of indices that were found to be RANSAC inliers from the matched corner lists. 240 * \param num_inlier_indices number of inliers i.e. the length of the array passed as the first argument. 241 */ 242 void Polish(int *inlier_indices, int &num_inlier_indices); 243 244 /*! 245 * Reset the motion smoothing parameters to their initial values. 246 */ 247 void ResetMotionSmoothingParameters() { m_stab_smoother.Init(); } 248 249 /*! 250 * Update the alignment reference image to the specified image. 251 * \param im pointer to the image data to be used as the new alignment reference. 252 * \param subsample boolean flag to control whether the function should internally subsample the provided image to the size provided in the Init() function. 253 */ 254 int UpdateReference(const unsigned char * const * im, bool subsample = true, bool detect_corners = true); 255 256 /*! 257 * Returns the transformation from the display reference to the alignment reference frame 258 */ 259 void Get_H_dref_to_ref(double H[9]); 260 /*! 261 * Returns the transformation from the display reference to the inspection reference frame 262 */ 263 void Get_H_dref_to_ins(double H[9]); 264 /*! 265 * Set the transformation from the display reference to the inspection reference frame 266 * \param H the transformation to set 267 */ 268 void Set_H_dref_to_ins(double H[9]); 269 270 /*! 271 * Reset the display reference to the current frame. 272 */ 273 void ResetDisplayReference(); 274 275 /*! 276 * Estimate a secondary motion model starting from the specified transformation. 277 * \param H the primary motion model to start from 278 */ 279 void EstimateSecondaryModel(double H[9]); 280 281 /*! 282 * 283 */ 284 void SelectOutliers(); 285 286 char *profile_string; 287 288protected: 289 void Clean(); 290 void GenerateQuarterResImage(const unsigned char* const * im); 291 292 int m_im_width; 293 int m_im_height; 294 295 // RANSAC and refinement parameters: 296 int m_homography_type; 297 int m_max_iterations; 298 double m_scale; 299 int m_nr_samples; 300 int m_chunk_size; 301 double m_outlier_t2; 302 303 // Whether to fit a linear model to just the inliers at the end 304 bool m_linear_polish; 305 double m_polish_C[36]; 306 double m_polish_D[6]; 307 308 // local state 309 bool m_current_is_reference; 310 bool m_initialized; 311 312 // inspection to reference homography: 313 double m_H_ref_to_ins[9]; 314 double m_H_dref_to_ref[9]; 315 316 // feature extraction and matching: 317 db_CornerDetector_u m_cd; 318 db_Matcher_u m_cm; 319 320 // length of corner arrays: 321 unsigned long m_max_nr_corners; 322 323 // corner locations of reference image features: 324 double * m_x_corners_ref; 325 double * m_y_corners_ref; 326 int m_nr_corners_ref; 327 328 // corner locations of inspection image features: 329 double * m_x_corners_ins; 330 double * m_y_corners_ins; 331 int m_nr_corners_ins; 332 333 // length of match index arrays: 334 unsigned long m_max_nr_matches; 335 336 // match indices: 337 int * m_match_index_ref; 338 int * m_match_index_ins; 339 int m_nr_matches; 340 341 // pointer to internal copy of the reference image: 342 unsigned char ** m_reference_image; 343 344 // pointer to internal copy of last aligned inspection image: 345 unsigned char ** m_aligned_ins_image; 346 347 // pointer to quarter resolution image, if used. 348 unsigned char** m_quarter_res_image; 349 350 // temporary storage for the quarter resolution image processing 351 unsigned char** m_horz_smooth_subsample_image; 352 353 // temporary space for homography computation: 354 double * m_temp_double; 355 int * m_temp_int; 356 357 // homogenous image point arrays: 358 double * m_corners_ref; 359 double * m_corners_ins; 360 361 // Indices of the points within the match lists 362 int * m_inlier_indices; 363 int m_num_inlier_indices; 364 365 //void ComputeInliers(double H[9], std::vector<int> &inlier_indices); 366 void ComputeInliers(double H[9]); 367 368 // cost arrays: 369 void ComputeCostArray(); 370 bool m_sq_cost_computed; 371 double * m_sq_cost; 372 373 // cost histogram: 374 void ComputeCostHistogram(); 375 int *m_cost_histogram; 376 377 void SetOutlierThreshold(); 378 379 // utility function for smoothing the motion parameters. 380 void SmoothMotion(void); 381 382private: 383 double m_K[9]; 384 const int m_over_allocation; 385 386 bool m_reference_set; 387 388 // Maximum number of inliers seen until now w.r.t the current reference frame 389 int m_max_inlier_count; 390 391 // Number of cost histogram bins: 392 int m_nr_bins; 393 // All costs above this threshold get put into the last bin: 394 int m_max_cost_pix; 395 396 // whether to quarter the image resolution for processing, or not 397 bool m_quarter_resolution; 398 399 // the period (in number of frames) for reference update. 400 unsigned int m_reference_update_period; 401 402 // the number of frames processed so far. 403 unsigned int m_nr_frames_processed; 404 405 // smoother for motion transformations 406 db_StabilizationSmoother m_stab_smoother; 407 408 // boolean to control whether motion smoothing occurs (or not) 409 bool m_do_motion_smoothing; 410 411 // double to set the gain for motion smoothing 412 double m_motion_smoothing_gain; 413}; 414/*! 415 Create look-up tables to undistort images. Only Bougeut (Matlab toolkit) 416 is currently supported. Can be used with db_WarpImageLut_u(). 417 \code 418 xd = H*xs; 419 xd = xd/xd(3); 420 \endcode 421 \param lut_x pre-allocated float image 422 \param lut_y pre-allocated float image 423 \param w width 424 \param h height 425 \param H image homography from source to destination 426 */ 427inline void db_GenerateHomographyLut(float ** lut_x,float ** lut_y,int w,int h,const double H[9]) 428{ 429 assert(lut_x && lut_y); 430 double x[3] = {0.0,0.0,1.0}; 431 double xb[3]; 432 433/* 434 double xl[3]; 435 436 // Determine the output coordinate system ROI 437 double Hinv[9]; 438 db_InvertAffineTransform(Hinv,H); 439 db_Multiply3x3_3x1(xl, Hinv, x); 440 xl[0] = db_SafeDivision(xl[0],xl[2]); 441 xl[1] = db_SafeDivision(xl[1],xl[2]); 442*/ 443 444 for ( int i = 0; i < w; ++i ) 445 for ( int j = 0; j < h; ++j ) 446 { 447 x[0] = double(i); 448 x[1] = double(j); 449 db_Multiply3x3_3x1(xb, H, x); 450 xb[0] = db_SafeDivision(xb[0],xb[2]); 451 xb[1] = db_SafeDivision(xb[1],xb[2]); 452 453 lut_x[j][i] = float(xb[0]); 454 lut_y[j][i] = float(xb[1]); 455 } 456} 457 458/*! 459 * Perform a look-up table warp for packed RGB ([rgbrgbrgb...]) images. 460 * The LUTs must be float images of the same size as source image. 461 * The source value x_s is determined from destination (x_d,y_d) through lut_x 462 * and y_s is determined from lut_y: 463 \code 464 x_s = lut_x[y_d][x_d]; 465 y_s = lut_y[y_d][x_d]; 466 \endcode 467 468 * \param src source image (w*3 by h) 469 * \param dst destination image (w*3 by h) 470 * \param w width 471 * \param h height 472 * \param lut_x LUT for x 473 * \param lut_y LUT for y 474 */ 475inline void db_WarpImageLutFast_rgb(const unsigned char * const * src, unsigned char ** dst, int w, int h, 476 const float * const * lut_x, const float * const * lut_y) 477{ 478 assert(src && dst); 479 int xd=0, yd=0; 480 481 for ( int i = 0; i < w; ++i ) 482 for ( int j = 0; j < h; ++j ) 483 { 484 xd = static_cast<unsigned int>(lut_x[j][i]); 485 yd = static_cast<unsigned int>(lut_y[j][i]); 486 if ( xd >= w || yd >= h || 487 xd < 0 || yd < 0) 488 { 489 dst[j][3*i ] = 0; 490 dst[j][3*i+1] = 0; 491 dst[j][3*i+2] = 0; 492 } 493 else 494 { 495 dst[j][3*i ] = src[yd][3*xd ]; 496 dst[j][3*i+1] = src[yd][3*xd+1]; 497 dst[j][3*i+2] = src[yd][3*xd+2]; 498 } 499 } 500} 501 502inline unsigned char db_BilinearInterpolationRGB(double y, double x, const unsigned char * const * v, int offset) 503{ 504 int floor_x=(int) x; 505 int floor_y=(int) y; 506 507 int ceil_x=floor_x+1; 508 int ceil_y=floor_y+1; 509 510 unsigned char f00 = v[floor_y][3*floor_x+offset]; 511 unsigned char f01 = v[floor_y][3*ceil_x+offset]; 512 unsigned char f10 = v[ceil_y][3*floor_x+offset]; 513 unsigned char f11 = v[ceil_y][3*ceil_x+offset]; 514 515 double xl = x-floor_x; 516 double yl = y-floor_y; 517 518 return (unsigned char)(f00*(1-yl)*(1-xl) + f10*yl*(1-xl) + f01*(1-yl)*xl + f11*yl*xl); 519} 520 521inline void db_WarpImageLutBilinear_rgb(const unsigned char * const * src, unsigned char ** dst, int w, int h, 522 const float * const * lut_x, const float * const * lut_y) 523{ 524 assert(src && dst); 525 double xd=0.0, yd=0.0; 526 527 for ( int i = 0; i < w; ++i ) 528 for ( int j = 0; j < h; ++j ) 529 { 530 xd = static_cast<double>(lut_x[j][i]); 531 yd = static_cast<double>(lut_y[j][i]); 532 if ( xd > w-2 || yd > h-2 || 533 xd < 0.0 || yd < 0.0) 534 { 535 dst[j][3*i ] = 0; 536 dst[j][3*i+1] = 0; 537 dst[j][3*i+2] = 0; 538 } 539 else 540 { 541 dst[j][3*i ] = db_BilinearInterpolationRGB(yd,xd,src,0); 542 dst[j][3*i+1] = db_BilinearInterpolationRGB(yd,xd,src,1); 543 dst[j][3*i+2] = db_BilinearInterpolationRGB(yd,xd,src,2); 544 } 545 } 546} 547 548inline double SquaredInhomogenousHomographyError(double y[3],double H[9],double x[3]){ 549 double x0,x1,x2,mult; 550 double sd; 551 552 x0=H[0]*x[0]+H[1]*x[1]+H[2]; 553 x1=H[3]*x[0]+H[4]*x[1]+H[5]; 554 x2=H[6]*x[0]+H[7]*x[1]+H[8]; 555 mult=1.0/((x2!=0.0)?x2:1.0); 556 sd=(y[0]-x0*mult)*(y[0]-x0*mult)+(y[1]-x1*mult)*(y[1]-x1*mult); 557 558 return(sd); 559} 560 561 562// functions related to profiling 563#if PROFILE 564 565/* return current time in milliseconds */ 566static double 567now_ms(void) 568{ 569 //struct timespec res; 570 struct timeval res; 571 //clock_gettime(CLOCK_REALTIME, &res); 572 gettimeofday(&res, NULL); 573 return 1000.0*res.tv_sec + (double)res.tv_usec/1e3; 574} 575 576#endif 577