1/*M/////////////////////////////////////////////////////////////////////////////////////// 2// 3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4// 5// By downloading, copying, installing or using the software you agree to this license. 6// If you do not agree to this license, do not download, install, 7// copy or use the software. 8// 9// 10// License Agreement 11// For Open Source Computer Vision Library 12// 13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14// Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15// Third party copyrights are property of their respective owners. 16// 17// Redistribution and use in source and binary forms, with or without modification, 18// are permitted provided that the following conditions are met: 19// 20// * Redistribution's of source code must retain the above copyright notice, 21// this list of conditions and the following disclaimer. 22// 23// * Redistribution's in binary form must reproduce the above copyright notice, 24// this list of conditions and the following disclaimer in the documentation 25// and/or other materials provided with the distribution. 26// 27// * The name of the copyright holders may not be used to endorse or promote products 28// derived from this software without specific prior written permission. 29// 30// This software is provided by the copyright holders and contributors "as is" and 31// any express or implied warranties, including, but not limited to, the implied 32// warranties of merchantability and fitness for a particular purpose are disclaimed. 33// In no event shall the Intel Corporation or contributors be liable for any direct, 34// indirect, incidental, special, exemplary, or consequential damages 35// (including, but not limited to, procurement of substitute goods or services; 36// loss of use, data, or profits; or business interruption) however caused 37// and on any theory of liability, whether in contract, strict liability, 38// or tort (including negligence or otherwise) arising in any way out of 39// the use of this software, even if advised of the possibility of such damage. 40// 41//M*/ 42 43#include "precomp.hpp" 44 45using namespace cv; 46using namespace cv::cuda; 47 48#if !defined HAVE_CUDA || defined(CUDA_DISABLER) 49 50Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int, double, bool, int, int, int, double, int) { throw_no_cuda(); return Ptr<BroxOpticalFlow>(); } 51 52#else 53 54#define MIN_SIZE 32 55 56// CUDA resize() is fast, but it differs from the CPU analog. Disabling this flag 57// leads to an inefficient code. It's for debug purposes only. 58#define ENABLE_CUDA_RESIZE 1 59 60namespace cv { namespace cuda { namespace device { namespace optflow_farneback 61{ 62 void setPolynomialExpansionConsts( 63 int polyN, const float *g, const float *xg, const float *xxg, 64 float ig11, float ig03, float ig33, float ig55); 65 66 void polynomialExpansionGpu(const PtrStepSzf &src, int polyN, PtrStepSzf dst, cudaStream_t stream); 67 68 void setUpdateMatricesConsts(); 69 70 void updateMatricesGpu( 71 const PtrStepSzf flowx, const PtrStepSzf flowy, const PtrStepSzf R0, const PtrStepSzf R1, 72 PtrStepSzf M, cudaStream_t stream); 73 74 void updateFlowGpu( 75 const PtrStepSzf M, PtrStepSzf flowx, PtrStepSzf flowy, cudaStream_t stream); 76 77 void boxFilter5Gpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream); 78 79 void boxFilter5Gpu_CC11(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream); 80 81 void setGaussianBlurKernel(const float *gKer, int ksizeHalf); 82 83 void gaussianBlurGpu( 84 const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream); 85 86 void gaussianBlur5Gpu( 87 const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream); 88 89 void gaussianBlur5Gpu_CC11( 90 const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream); 91 92}}}} 93 94namespace 95{ 96 class FarnebackOpticalFlowImpl : public FarnebackOpticalFlow 97 { 98 public: 99 FarnebackOpticalFlowImpl(int numLevels, double pyrScale, bool fastPyramids, int winSize, 100 int numIters, int polyN, double polySigma, int flags) : 101 numLevels_(numLevels), pyrScale_(pyrScale), fastPyramids_(fastPyramids), winSize_(winSize), 102 numIters_(numIters), polyN_(polyN), polySigma_(polySigma), flags_(flags) 103 { 104 } 105 106 virtual int getNumLevels() const { return numLevels_; } 107 virtual void setNumLevels(int numLevels) { numLevels_ = numLevels; } 108 109 virtual double getPyrScale() const { return pyrScale_; } 110 virtual void setPyrScale(double pyrScale) { pyrScale_ = pyrScale; } 111 112 virtual bool getFastPyramids() const { return fastPyramids_; } 113 virtual void setFastPyramids(bool fastPyramids) { fastPyramids_ = fastPyramids; } 114 115 virtual int getWinSize() const { return winSize_; } 116 virtual void setWinSize(int winSize) { winSize_ = winSize; } 117 118 virtual int getNumIters() const { return numIters_; } 119 virtual void setNumIters(int numIters) { numIters_ = numIters; } 120 121 virtual int getPolyN() const { return polyN_; } 122 virtual void setPolyN(int polyN) { polyN_ = polyN; } 123 124 virtual double getPolySigma() const { return polySigma_; } 125 virtual void setPolySigma(double polySigma) { polySigma_ = polySigma; } 126 127 virtual int getFlags() const { return flags_; } 128 virtual void setFlags(int flags) { flags_ = flags; } 129 130 virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream); 131 132 private: 133 int numLevels_; 134 double pyrScale_; 135 bool fastPyramids_; 136 int winSize_; 137 int numIters_; 138 int polyN_; 139 double polySigma_; 140 int flags_; 141 142 private: 143 void prepareGaussian( 144 int n, double sigma, float *g, float *xg, float *xxg, 145 double &ig11, double &ig03, double &ig33, double &ig55); 146 147 void setPolynomialExpansionConsts(int n, double sigma); 148 149 void updateFlow_boxFilter( 150 const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy, 151 GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]); 152 153 void updateFlow_gaussianBlur( 154 const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy, 155 GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]); 156 157 void calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream); 158 159 GpuMat frames_[2]; 160 GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2]; 161 std::vector<GpuMat> pyramid0_, pyramid1_; 162 }; 163 164 void FarnebackOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, Stream& stream) 165 { 166 const GpuMat frame0 = _frame0.getGpuMat(); 167 const GpuMat frame1 = _frame1.getGpuMat(); 168 169 BufferPool pool(stream); 170 GpuMat flowx = pool.getBuffer(frame0.size(), CV_32FC1); 171 GpuMat flowy = pool.getBuffer(frame0.size(), CV_32FC1); 172 173 calcImpl(frame0, frame1, flowx, flowy, stream); 174 175 GpuMat flows[] = {flowx, flowy}; 176 cuda::merge(flows, 2, _flow, stream); 177 } 178 179 GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat) 180 { 181 if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols) 182 return mat(Rect(0, 0, cols, rows)); 183 184 return mat = GpuMat(rows, cols, type); 185 } 186 187 void FarnebackOpticalFlowImpl::prepareGaussian( 188 int n, double sigma, float *g, float *xg, float *xxg, 189 double &ig11, double &ig03, double &ig33, double &ig55) 190 { 191 double s = 0.; 192 for (int x = -n; x <= n; x++) 193 { 194 g[x] = (float)std::exp(-x*x/(2*sigma*sigma)); 195 s += g[x]; 196 } 197 198 s = 1./s; 199 for (int x = -n; x <= n; x++) 200 { 201 g[x] = (float)(g[x]*s); 202 xg[x] = (float)(x*g[x]); 203 xxg[x] = (float)(x*x*g[x]); 204 } 205 206 Mat_<double> G(6, 6); 207 G.setTo(0); 208 209 for (int y = -n; y <= n; y++) 210 { 211 for (int x = -n; x <= n; x++) 212 { 213 G(0,0) += g[y]*g[x]; 214 G(1,1) += g[y]*g[x]*x*x; 215 G(3,3) += g[y]*g[x]*x*x*x*x; 216 G(5,5) += g[y]*g[x]*x*x*y*y; 217 } 218 } 219 220 //G[0][0] = 1.; 221 G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1); 222 G(4,4) = G(3,3); 223 G(3,4) = G(4,3) = G(5,5); 224 225 // invG: 226 // [ x e e ] 227 // [ y ] 228 // [ y ] 229 // [ e z ] 230 // [ e z ] 231 // [ u ] 232 Mat_<double> invG = G.inv(DECOMP_CHOLESKY); 233 234 ig11 = invG(1,1); 235 ig03 = invG(0,3); 236 ig33 = invG(3,3); 237 ig55 = invG(5,5); 238 } 239 240 void FarnebackOpticalFlowImpl::setPolynomialExpansionConsts(int n, double sigma) 241 { 242 std::vector<float> buf(n*6 + 3); 243 float* g = &buf[0] + n; 244 float* xg = g + n*2 + 1; 245 float* xxg = xg + n*2 + 1; 246 247 if (sigma < FLT_EPSILON) 248 sigma = n*0.3; 249 250 double ig11, ig03, ig33, ig55; 251 prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55); 252 253 device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, static_cast<float>(ig11), static_cast<float>(ig03), static_cast<float>(ig33), static_cast<float>(ig55)); 254 } 255 256 void FarnebackOpticalFlowImpl::updateFlow_boxFilter( 257 const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy, 258 GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]) 259 { 260 if (deviceSupports(FEATURE_SET_COMPUTE_12)) 261 device::optflow_farneback::boxFilter5Gpu(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0])); 262 else 263 device::optflow_farneback::boxFilter5Gpu_CC11(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0])); 264 swap(M, bufM); 265 266 for (int i = 1; i < 5; ++i) 267 streams[i].waitForCompletion(); 268 device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0])); 269 270 if (updateMatrices) 271 device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0])); 272 } 273 274 void FarnebackOpticalFlowImpl::updateFlow_gaussianBlur( 275 const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy, 276 GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]) 277 { 278 if (deviceSupports(FEATURE_SET_COMPUTE_12)) 279 device::optflow_farneback::gaussianBlur5Gpu( 280 M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0])); 281 else 282 device::optflow_farneback::gaussianBlur5Gpu_CC11( 283 M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0])); 284 swap(M, bufM); 285 286 device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0])); 287 288 if (updateMatrices) 289 device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0])); 290 } 291 292 void FarnebackOpticalFlowImpl::calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream) 293 { 294 CV_Assert(frame0.channels() == 1 && frame1.channels() == 1); 295 CV_Assert(frame0.size() == frame1.size()); 296 CV_Assert(polyN_ == 5 || polyN_ == 7); 297 CV_Assert(!fastPyramids_ || std::abs(pyrScale_ - 0.5) < 1e-6); 298 299 Stream streams[5]; 300 if (stream) 301 streams[0] = stream; 302 303 Size size = frame0.size(); 304 GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY; 305 306 flowx.create(size, CV_32F); 307 flowy.create(size, CV_32F); 308 GpuMat flowx0 = flowx; 309 GpuMat flowy0 = flowy; 310 311 // Crop unnecessary levels 312 double scale = 1; 313 int numLevelsCropped = 0; 314 for (; numLevelsCropped < numLevels_; numLevelsCropped++) 315 { 316 scale *= pyrScale_; 317 if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE) 318 break; 319 } 320 321 frame0.convertTo(frames_[0], CV_32F, streams[0]); 322 frame1.convertTo(frames_[1], CV_32F, streams[1]); 323 324 if (fastPyramids_) 325 { 326 // Build Gaussian pyramids using pyrDown() 327 pyramid0_.resize(numLevelsCropped + 1); 328 pyramid1_.resize(numLevelsCropped + 1); 329 pyramid0_[0] = frames_[0]; 330 pyramid1_[0] = frames_[1]; 331 for (int i = 1; i <= numLevelsCropped; ++i) 332 { 333 cuda::pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]); 334 cuda::pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]); 335 } 336 } 337 338 setPolynomialExpansionConsts(polyN_, polySigma_); 339 device::optflow_farneback::setUpdateMatricesConsts(); 340 341 for (int k = numLevelsCropped; k >= 0; k--) 342 { 343 streams[0].waitForCompletion(); 344 345 scale = 1; 346 for (int i = 0; i < k; i++) 347 scale *= pyrScale_; 348 349 double sigma = (1./scale - 1) * 0.5; 350 int smoothSize = cvRound(sigma*5) | 1; 351 smoothSize = std::max(smoothSize, 3); 352 353 int width = cvRound(size.width*scale); 354 int height = cvRound(size.height*scale); 355 356 if (fastPyramids_) 357 { 358 width = pyramid0_[k].cols; 359 height = pyramid0_[k].rows; 360 } 361 362 if (k > 0) 363 { 364 curFlowX.create(height, width, CV_32F); 365 curFlowY.create(height, width, CV_32F); 366 } 367 else 368 { 369 curFlowX = flowx0; 370 curFlowY = flowy0; 371 } 372 373 if (!prevFlowX.data) 374 { 375 if (flags_ & OPTFLOW_USE_INITIAL_FLOW) 376 { 377 cuda::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]); 378 cuda::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]); 379 curFlowX.convertTo(curFlowX, curFlowX.depth(), scale, streams[0]); 380 curFlowY.convertTo(curFlowY, curFlowY.depth(), scale, streams[1]); 381 } 382 else 383 { 384 curFlowX.setTo(0, streams[0]); 385 curFlowY.setTo(0, streams[1]); 386 } 387 } 388 else 389 { 390 cuda::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]); 391 cuda::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]); 392 curFlowX.convertTo(curFlowX, curFlowX.depth(), 1./pyrScale_, streams[0]); 393 curFlowY.convertTo(curFlowY, curFlowY.depth(), 1./pyrScale_, streams[1]); 394 } 395 396 GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_); 397 GpuMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_); 398 GpuMat R[2] = 399 { 400 allocMatFromBuf(5*height, width, CV_32F, R_[0]), 401 allocMatFromBuf(5*height, width, CV_32F, R_[1]) 402 }; 403 404 if (fastPyramids_) 405 { 406 device::optflow_farneback::polynomialExpansionGpu(pyramid0_[k], polyN_, R[0], StreamAccessor::getStream(streams[0])); 407 device::optflow_farneback::polynomialExpansionGpu(pyramid1_[k], polyN_, R[1], StreamAccessor::getStream(streams[1])); 408 } 409 else 410 { 411 GpuMat blurredFrame[2] = 412 { 413 allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]), 414 allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1]) 415 }; 416 GpuMat pyrLevel[2] = 417 { 418 allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]), 419 allocMatFromBuf(height, width, CV_32F, pyrLevel_[1]) 420 }; 421 422 Mat g = getGaussianKernel(smoothSize, sigma, CV_32F); 423 device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2); 424 425 for (int i = 0; i < 2; i++) 426 { 427 device::optflow_farneback::gaussianBlurGpu( 428 frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, StreamAccessor::getStream(streams[i])); 429 cuda::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]); 430 device::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN_, R[i], StreamAccessor::getStream(streams[i])); 431 } 432 } 433 434 streams[1].waitForCompletion(); 435 device::optflow_farneback::updateMatricesGpu(curFlowX, curFlowY, R[0], R[1], M, StreamAccessor::getStream(streams[0])); 436 437 if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN) 438 { 439 Mat g = getGaussianKernel(winSize_, winSize_/2*0.3f, CV_32F); 440 device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize_/2), winSize_/2); 441 } 442 for (int i = 0; i < numIters_; i++) 443 { 444 if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN) 445 updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams); 446 else 447 updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams); 448 } 449 450 prevFlowX = curFlowX; 451 prevFlowY = curFlowY; 452 } 453 454 flowx = curFlowX; 455 flowy = curFlowY; 456 457 if (!stream) 458 streams[0].waitForCompletion(); 459 } 460} 461 462Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int numLevels, double pyrScale, bool fastPyramids, int winSize, 463 int numIters, int polyN, double polySigma, int flags) 464{ 465 return makePtr<FarnebackOpticalFlowImpl>(numLevels, pyrScale, fastPyramids, winSize, 466 numIters, polyN, polySigma, flags); 467} 468 469#endif 470