1/*M/////////////////////////////////////////////////////////////////////////////////////// 2// 3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4// 5// By downloading, copying, installing or using the software you agree to this license. 6// If you do not agree to this license, do not download, install, 7// copy or use the software. 8// 9// 10// License Agreement 11// For Open Source Computer Vision Library 12// 13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14// Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15// Third party copyrights are property of their respective owners. 16// 17// Redistribution and use in source and binary forms, with or without modification, 18// are permitted provided that the following conditions are met: 19// 20// * Redistribution's of source code must retain the above copyright notice, 21// this list of conditions and the following disclaimer. 22// 23// * Redistribution's in binary form must reproduce the above copyright notice, 24// this list of conditions and the following disclaimer in the documentation 25// and/or other materials provided with the distribution. 26// 27// * The name of the copyright holders may not be used to endorse or promote products 28// derived from this software without specific prior written permission. 29// 30// This software is provided by the copyright holders and contributors "as is" and 31// any express or implied warranties, including, but not limited to, the implied 32// warranties of merchantability and fitness for a particular purpose are disclaimed. 33// In no event shall the Intel Corporation or contributors be liable for any direct, 34// indirect, incidental, special, exemplary, or consequential damages 35// (including, but not limited to, procurement of substitute goods or services; 36// loss of use, data, or profits; or business interruption) however caused 37// and on any theory of liability, whether in contract, strict liability, 38// or tort (including negligence or otherwise) arising in any way out of 39// the use of this software, even if advised of the possibility of such damage. 40// 41//M*/ 42 43#include "precomp.hpp" 44 45using namespace cv; 46using namespace cv::cuda; 47 48#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) 49 50Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int) { throw_no_cuda(); return Ptr<cv::cuda::DescriptorMatcher>(); } 51 52#else /* !defined (HAVE_CUDA) */ 53 54namespace cv { namespace cuda { namespace device 55{ 56 namespace bf_match 57 { 58 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, 59 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, 60 cudaStream_t stream); 61 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, 62 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, 63 cudaStream_t stream); 64 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, 65 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, 66 cudaStream_t stream); 67 68 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, 69 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, 70 cudaStream_t stream); 71 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, 72 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, 73 cudaStream_t stream); 74 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, 75 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, 76 cudaStream_t stream); 77 } 78 79 namespace bf_knnmatch 80 { 81 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, 82 const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, 83 cudaStream_t stream); 84 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, 85 const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, 86 cudaStream_t stream); 87 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, 88 const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, 89 cudaStream_t stream); 90 91 template <typename T> void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, 92 const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, 93 cudaStream_t stream); 94 template <typename T> void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, 95 const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, 96 cudaStream_t stream); 97 template <typename T> void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, 98 const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, 99 cudaStream_t stream); 100 } 101 102 namespace bf_radius_match 103 { 104 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, 105 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 106 cudaStream_t stream); 107 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, 108 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 109 cudaStream_t stream); 110 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, 111 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 112 cudaStream_t stream); 113 114 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, 115 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 116 cudaStream_t stream); 117 118 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, 119 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 120 cudaStream_t stream); 121 122 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, 123 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 124 cudaStream_t stream); 125 } 126}}} 127 128namespace 129{ 130 static void makeGpuCollection(const std::vector<GpuMat>& trainDescCollection, 131 const std::vector<GpuMat>& masks, 132 GpuMat& trainCollection, 133 GpuMat& maskCollection) 134 { 135 if (trainDescCollection.empty()) 136 return; 137 138 if (masks.empty()) 139 { 140 Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb))); 141 142 PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>(); 143 144 for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr) 145 *trainCollectionCPU_ptr = trainDescCollection[i]; 146 147 trainCollection.upload(trainCollectionCPU); 148 maskCollection.release(); 149 } 150 else 151 { 152 CV_Assert( masks.size() == trainDescCollection.size() ); 153 154 Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb))); 155 Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb))); 156 157 PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>(); 158 PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr<PtrStepb>(); 159 160 for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr) 161 { 162 const GpuMat& train = trainDescCollection[i]; 163 const GpuMat& mask = masks[i]; 164 165 CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows) ); 166 167 *trainCollectionCPU_ptr = train; 168 *maskCollectionCPU_ptr = mask; 169 } 170 171 trainCollection.upload(trainCollectionCPU); 172 maskCollection.upload(maskCollectionCPU); 173 } 174 } 175 176 class BFMatcher_Impl : public cv::cuda::DescriptorMatcher 177 { 178 public: 179 explicit BFMatcher_Impl(int norm) : norm_(norm) 180 { 181 CV_Assert( norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING ); 182 } 183 184 virtual bool isMaskSupported() const { return true; } 185 186 virtual void add(const std::vector<GpuMat>& descriptors) 187 { 188 trainDescCollection_.insert(trainDescCollection_.end(), descriptors.begin(), descriptors.end()); 189 } 190 191 virtual const std::vector<GpuMat>& getTrainDescriptors() const 192 { 193 return trainDescCollection_; 194 } 195 196 virtual void clear() 197 { 198 trainDescCollection_.clear(); 199 } 200 201 virtual bool empty() const 202 { 203 return trainDescCollection_.empty(); 204 } 205 206 virtual void train() 207 { 208 } 209 210 virtual void match(InputArray queryDescriptors, InputArray trainDescriptors, 211 std::vector<DMatch>& matches, 212 InputArray mask = noArray()); 213 214 virtual void match(InputArray queryDescriptors, 215 std::vector<DMatch>& matches, 216 const std::vector<GpuMat>& masks = std::vector<GpuMat>()); 217 218 virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors, 219 OutputArray matches, 220 InputArray mask = noArray(), 221 Stream& stream = Stream::Null()); 222 223 virtual void matchAsync(InputArray queryDescriptors, 224 OutputArray matches, 225 const std::vector<GpuMat>& masks = std::vector<GpuMat>(), 226 Stream& stream = Stream::Null()); 227 228 virtual void matchConvert(InputArray gpu_matches, 229 std::vector<DMatch>& matches); 230 231 virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors, 232 std::vector<std::vector<DMatch> >& matches, 233 int k, 234 InputArray mask = noArray(), 235 bool compactResult = false); 236 237 virtual void knnMatch(InputArray queryDescriptors, 238 std::vector<std::vector<DMatch> >& matches, 239 int k, 240 const std::vector<GpuMat>& masks = std::vector<GpuMat>(), 241 bool compactResult = false); 242 243 virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors, 244 OutputArray matches, 245 int k, 246 InputArray mask = noArray(), 247 Stream& stream = Stream::Null()); 248 249 virtual void knnMatchAsync(InputArray queryDescriptors, 250 OutputArray matches, 251 int k, 252 const std::vector<GpuMat>& masks = std::vector<GpuMat>(), 253 Stream& stream = Stream::Null()); 254 255 virtual void knnMatchConvert(InputArray gpu_matches, 256 std::vector< std::vector<DMatch> >& matches, 257 bool compactResult = false); 258 259 virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors, 260 std::vector<std::vector<DMatch> >& matches, 261 float maxDistance, 262 InputArray mask = noArray(), 263 bool compactResult = false); 264 265 virtual void radiusMatch(InputArray queryDescriptors, 266 std::vector<std::vector<DMatch> >& matches, 267 float maxDistance, 268 const std::vector<GpuMat>& masks = std::vector<GpuMat>(), 269 bool compactResult = false); 270 271 virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors, 272 OutputArray matches, 273 float maxDistance, 274 InputArray mask = noArray(), 275 Stream& stream = Stream::Null()); 276 277 virtual void radiusMatchAsync(InputArray queryDescriptors, 278 OutputArray matches, 279 float maxDistance, 280 const std::vector<GpuMat>& masks = std::vector<GpuMat>(), 281 Stream& stream = Stream::Null()); 282 283 virtual void radiusMatchConvert(InputArray gpu_matches, 284 std::vector< std::vector<DMatch> >& matches, 285 bool compactResult = false); 286 287 private: 288 int norm_; 289 std::vector<GpuMat> trainDescCollection_; 290 }; 291 292 // 293 // 1 to 1 match 294 // 295 296 void BFMatcher_Impl::match(InputArray _queryDescriptors, InputArray _trainDescriptors, 297 std::vector<DMatch>& matches, 298 InputArray _mask) 299 { 300 GpuMat d_matches; 301 matchAsync(_queryDescriptors, _trainDescriptors, d_matches, _mask); 302 matchConvert(d_matches, matches); 303 } 304 305 void BFMatcher_Impl::match(InputArray _queryDescriptors, 306 std::vector<DMatch>& matches, 307 const std::vector<GpuMat>& masks) 308 { 309 GpuMat d_matches; 310 matchAsync(_queryDescriptors, d_matches, masks); 311 matchConvert(d_matches, matches); 312 } 313 314 void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors, 315 OutputArray _matches, 316 InputArray _mask, 317 Stream& stream) 318 { 319 using namespace cv::cuda::device::bf_match; 320 321 const GpuMat query = _queryDescriptors.getGpuMat(); 322 const GpuMat train = _trainDescriptors.getGpuMat(); 323 const GpuMat mask = _mask.getGpuMat(); 324 325 if (query.empty() || train.empty()) 326 { 327 _matches.release(); 328 return; 329 } 330 331 CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); 332 CV_Assert( train.cols == query.cols && train.type() == query.type() ); 333 CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) ); 334 335 typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, 336 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, 337 cudaStream_t stream); 338 339 static const caller_t callersL1[] = 340 { 341 matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/, 342 matchL1_gpu<unsigned short>, matchL1_gpu<short>, 343 matchL1_gpu<int>, matchL1_gpu<float> 344 }; 345 static const caller_t callersL2[] = 346 { 347 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/, 348 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/, 349 0/*matchL2_gpu<int>*/, matchL2_gpu<float> 350 }; 351 static const caller_t callersHamming[] = 352 { 353 matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/, 354 matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/, 355 matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ 356 }; 357 358 const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; 359 360 const caller_t func = callers[query.depth()]; 361 if (func == 0) 362 { 363 CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); 364 } 365 366 const int nQuery = query.rows; 367 368 _matches.create(2, nQuery, CV_32SC1); 369 GpuMat matches = _matches.getGpuMat(); 370 371 GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0)); 372 GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(1)); 373 374 func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream)); 375 } 376 377 void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, 378 OutputArray _matches, 379 const std::vector<GpuMat>& masks, 380 Stream& stream) 381 { 382 using namespace cv::cuda::device::bf_match; 383 384 const GpuMat query = _queryDescriptors.getGpuMat(); 385 386 if (query.empty() || trainDescCollection_.empty()) 387 { 388 _matches.release(); 389 return; 390 } 391 392 CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); 393 394 GpuMat trainCollection, maskCollection; 395 makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection); 396 397 typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, 398 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, 399 cudaStream_t stream); 400 401 static const caller_t callersL1[] = 402 { 403 matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/, 404 matchL1_gpu<unsigned short>, matchL1_gpu<short>, 405 matchL1_gpu<int>, matchL1_gpu<float> 406 }; 407 static const caller_t callersL2[] = 408 { 409 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/, 410 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/, 411 0/*matchL2_gpu<int>*/, matchL2_gpu<float> 412 }; 413 static const caller_t callersHamming[] = 414 { 415 matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/, 416 matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/, 417 matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ 418 }; 419 420 const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; 421 422 const caller_t func = callers[query.depth()]; 423 if (func == 0) 424 { 425 CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); 426 } 427 428 const int nQuery = query.rows; 429 430 _matches.create(3, nQuery, CV_32SC1); 431 GpuMat matches = _matches.getGpuMat(); 432 433 GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0)); 434 GpuMat imgIdx(1, nQuery, CV_32SC1, matches.ptr(1)); 435 GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(2)); 436 437 func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); 438 } 439 440 void BFMatcher_Impl::matchConvert(InputArray _gpu_matches, 441 std::vector<DMatch>& matches) 442 { 443 Mat gpu_matches; 444 if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT) 445 { 446 _gpu_matches.getGpuMat().download(gpu_matches); 447 } 448 else 449 { 450 gpu_matches = _gpu_matches.getMat(); 451 } 452 453 if (gpu_matches.empty()) 454 { 455 matches.clear(); 456 return; 457 } 458 459 CV_Assert( (gpu_matches.type() == CV_32SC1) && (gpu_matches.rows == 2 || gpu_matches.rows == 3) ); 460 461 const int nQuery = gpu_matches.cols; 462 463 matches.clear(); 464 matches.reserve(nQuery); 465 466 const int* trainIdxPtr = NULL; 467 const int* imgIdxPtr = NULL; 468 const float* distancePtr = NULL; 469 470 if (gpu_matches.rows == 2) 471 { 472 trainIdxPtr = gpu_matches.ptr<int>(0); 473 distancePtr = gpu_matches.ptr<float>(1); 474 } 475 else 476 { 477 trainIdxPtr = gpu_matches.ptr<int>(0); 478 imgIdxPtr = gpu_matches.ptr<int>(1); 479 distancePtr = gpu_matches.ptr<float>(2); 480 } 481 482 for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) 483 { 484 const int trainIdx = trainIdxPtr[queryIdx]; 485 if (trainIdx == -1) 486 continue; 487 488 const int imgIdx = imgIdxPtr ? imgIdxPtr[queryIdx] : 0; 489 const float distance = distancePtr[queryIdx]; 490 491 DMatch m(queryIdx, trainIdx, imgIdx, distance); 492 493 matches.push_back(m); 494 } 495 } 496 497 // 498 // knn match 499 // 500 501 void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, InputArray _trainDescriptors, 502 std::vector<std::vector<DMatch> >& matches, 503 int k, 504 InputArray _mask, 505 bool compactResult) 506 { 507 GpuMat d_matches; 508 knnMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, k, _mask); 509 knnMatchConvert(d_matches, matches, compactResult); 510 } 511 512 void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, 513 std::vector<std::vector<DMatch> >& matches, 514 int k, 515 const std::vector<GpuMat>& masks, 516 bool compactResult) 517 { 518 if (k == 2) 519 { 520 GpuMat d_matches; 521 knnMatchAsync(_queryDescriptors, d_matches, k, masks); 522 knnMatchConvert(d_matches, matches, compactResult); 523 } 524 else 525 { 526 const GpuMat query = _queryDescriptors.getGpuMat(); 527 528 if (query.empty() || trainDescCollection_.empty()) 529 { 530 matches.clear(); 531 return; 532 } 533 534 CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); 535 536 std::vector< std::vector<DMatch> > curMatches; 537 std::vector<DMatch> temp; 538 temp.reserve(2 * k); 539 540 matches.resize(query.rows); 541 for (size_t i = 0; i < matches.size(); ++i) 542 matches[i].reserve(k); 543 544 for (size_t imgIdx = 0; imgIdx < trainDescCollection_.size(); ++imgIdx) 545 { 546 knnMatch(query, trainDescCollection_[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]); 547 548 for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx) 549 { 550 std::vector<DMatch>& localMatch = curMatches[queryIdx]; 551 std::vector<DMatch>& globalMatch = matches[queryIdx]; 552 553 for (size_t i = 0; i < localMatch.size(); ++i) 554 localMatch[i].imgIdx = imgIdx; 555 556 temp.clear(); 557 std::merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), std::back_inserter(temp)); 558 559 globalMatch.clear(); 560 const size_t count = std::min(static_cast<size_t>(k), temp.size()); 561 std::copy(temp.begin(), temp.begin() + count, std::back_inserter(globalMatch)); 562 } 563 } 564 565 if (compactResult) 566 { 567 std::vector< std::vector<DMatch> >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector<DMatch>::empty)); 568 matches.erase(new_end, matches.end()); 569 } 570 } 571 } 572 573 void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors, 574 OutputArray _matches, 575 int k, 576 InputArray _mask, 577 Stream& stream) 578 { 579 using namespace cv::cuda::device::bf_knnmatch; 580 581 const GpuMat query = _queryDescriptors.getGpuMat(); 582 const GpuMat train = _trainDescriptors.getGpuMat(); 583 const GpuMat mask = _mask.getGpuMat(); 584 585 if (query.empty() || train.empty()) 586 { 587 _matches.release(); 588 return; 589 } 590 591 CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); 592 CV_Assert( train.cols == query.cols && train.type() == query.type() ); 593 CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) ); 594 595 typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, 596 const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, 597 cudaStream_t stream); 598 599 static const caller_t callersL1[] = 600 { 601 matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/, 602 matchL1_gpu<unsigned short>, matchL1_gpu<short>, 603 matchL1_gpu<int>, matchL1_gpu<float> 604 }; 605 static const caller_t callersL2[] = 606 { 607 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/, 608 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/, 609 0/*matchL2_gpu<int>*/, matchL2_gpu<float> 610 }; 611 static const caller_t callersHamming[] = 612 { 613 matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/, 614 matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/, 615 matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ 616 }; 617 618 const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; 619 620 const caller_t func = callers[query.depth()]; 621 if (func == 0) 622 { 623 CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); 624 } 625 626 const int nQuery = query.rows; 627 const int nTrain = train.rows; 628 629 GpuMat trainIdx, distance, allDist; 630 if (k == 2) 631 { 632 _matches.create(2, nQuery, CV_32SC2); 633 GpuMat matches = _matches.getGpuMat(); 634 635 trainIdx = GpuMat(1, nQuery, CV_32SC2, matches.ptr(0)); 636 distance = GpuMat(1, nQuery, CV_32FC2, matches.ptr(1)); 637 } 638 else 639 { 640 _matches.create(2 * nQuery, k, CV_32SC1); 641 GpuMat matches = _matches.getGpuMat(); 642 643 trainIdx = GpuMat(nQuery, k, CV_32SC1, matches.ptr(0), matches.step); 644 distance = GpuMat(nQuery, k, CV_32FC1, matches.ptr(nQuery), matches.step); 645 646 BufferPool pool(stream); 647 allDist = pool.getBuffer(nQuery, nTrain, CV_32FC1); 648 } 649 650 trainIdx.setTo(Scalar::all(-1), stream); 651 652 func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream)); 653 } 654 655 void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, 656 OutputArray _matches, 657 int k, 658 const std::vector<GpuMat>& masks, 659 Stream& stream) 660 { 661 using namespace cv::cuda::device::bf_knnmatch; 662 663 if (k != 2) 664 { 665 CV_Error(Error::StsNotImplemented, "only k=2 mode is supported for now"); 666 } 667 668 const GpuMat query = _queryDescriptors.getGpuMat(); 669 670 if (query.empty() || trainDescCollection_.empty()) 671 { 672 _matches.release(); 673 return; 674 } 675 676 CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); 677 678 GpuMat trainCollection, maskCollection; 679 makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection); 680 681 typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, 682 const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, 683 cudaStream_t stream); 684 685 static const caller_t callersL1[] = 686 { 687 match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/, 688 match2L1_gpu<unsigned short>, match2L1_gpu<short>, 689 match2L1_gpu<int>, match2L1_gpu<float> 690 }; 691 static const caller_t callersL2[] = 692 { 693 0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/, 694 0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/, 695 0/*match2L2_gpu<int>*/, match2L2_gpu<float> 696 }; 697 static const caller_t callersHamming[] = 698 { 699 match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/, 700 match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/, 701 match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/ 702 }; 703 704 const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; 705 706 const caller_t func = callers[query.depth()]; 707 if (func == 0) 708 { 709 CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); 710 } 711 712 const int nQuery = query.rows; 713 714 _matches.create(3, nQuery, CV_32SC2); 715 GpuMat matches = _matches.getGpuMat(); 716 717 GpuMat trainIdx(1, nQuery, CV_32SC2, matches.ptr(0)); 718 GpuMat imgIdx(1, nQuery, CV_32SC2, matches.ptr(1)); 719 GpuMat distance(1, nQuery, CV_32FC2, matches.ptr(2)); 720 721 trainIdx.setTo(Scalar::all(-1), stream); 722 723 func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); 724 } 725 726 void BFMatcher_Impl::knnMatchConvert(InputArray _gpu_matches, 727 std::vector< std::vector<DMatch> >& matches, 728 bool compactResult) 729 { 730 Mat gpu_matches; 731 if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT) 732 { 733 _gpu_matches.getGpuMat().download(gpu_matches); 734 } 735 else 736 { 737 gpu_matches = _gpu_matches.getMat(); 738 } 739 740 if (gpu_matches.empty()) 741 { 742 matches.clear(); 743 return; 744 } 745 746 CV_Assert( ((gpu_matches.type() == CV_32SC2) && (gpu_matches.rows == 2 || gpu_matches.rows == 3)) || 747 (gpu_matches.type() == CV_32SC1) ); 748 749 int nQuery = -1, k = -1; 750 751 const int* trainIdxPtr = NULL; 752 const int* imgIdxPtr = NULL; 753 const float* distancePtr = NULL; 754 755 if (gpu_matches.type() == CV_32SC2) 756 { 757 nQuery = gpu_matches.cols; 758 k = 2; 759 760 if (gpu_matches.rows == 2) 761 { 762 trainIdxPtr = gpu_matches.ptr<int>(0); 763 distancePtr = gpu_matches.ptr<float>(1); 764 } 765 else 766 { 767 trainIdxPtr = gpu_matches.ptr<int>(0); 768 imgIdxPtr = gpu_matches.ptr<int>(1); 769 distancePtr = gpu_matches.ptr<float>(2); 770 } 771 } 772 else 773 { 774 nQuery = gpu_matches.rows / 2; 775 k = gpu_matches.cols; 776 777 trainIdxPtr = gpu_matches.ptr<int>(0); 778 distancePtr = gpu_matches.ptr<float>(nQuery); 779 } 780 781 matches.clear(); 782 matches.reserve(nQuery); 783 784 for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) 785 { 786 matches.push_back(std::vector<DMatch>()); 787 std::vector<DMatch>& curMatches = matches.back(); 788 curMatches.reserve(k); 789 790 for (int i = 0; i < k; ++i) 791 { 792 const int trainIdx = *trainIdxPtr; 793 if (trainIdx == -1) 794 continue; 795 796 const int imgIdx = imgIdxPtr ? *imgIdxPtr : 0; 797 const float distance = *distancePtr; 798 799 DMatch m(queryIdx, trainIdx, imgIdx, distance); 800 801 curMatches.push_back(m); 802 803 ++trainIdxPtr; 804 ++distancePtr; 805 if (imgIdxPtr) 806 ++imgIdxPtr; 807 } 808 809 if (compactResult && curMatches.empty()) 810 { 811 matches.pop_back(); 812 } 813 } 814 } 815 816 // 817 // radius match 818 // 819 820 void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, InputArray _trainDescriptors, 821 std::vector<std::vector<DMatch> >& matches, 822 float maxDistance, 823 InputArray _mask, 824 bool compactResult) 825 { 826 GpuMat d_matches; 827 radiusMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, maxDistance, _mask); 828 radiusMatchConvert(d_matches, matches, compactResult); 829 } 830 831 void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, 832 std::vector<std::vector<DMatch> >& matches, 833 float maxDistance, 834 const std::vector<GpuMat>& masks, 835 bool compactResult) 836 { 837 GpuMat d_matches; 838 radiusMatchAsync(_queryDescriptors, d_matches, maxDistance, masks); 839 radiusMatchConvert(d_matches, matches, compactResult); 840 } 841 842 void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors, 843 OutputArray _matches, 844 float maxDistance, 845 InputArray _mask, 846 Stream& stream) 847 { 848 using namespace cv::cuda::device::bf_radius_match; 849 850 const GpuMat query = _queryDescriptors.getGpuMat(); 851 const GpuMat train = _trainDescriptors.getGpuMat(); 852 const GpuMat mask = _mask.getGpuMat(); 853 854 if (query.empty() || train.empty()) 855 { 856 _matches.release(); 857 return; 858 } 859 860 CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); 861 CV_Assert( train.cols == query.cols && train.type() == query.type() ); 862 CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) ); 863 864 typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, 865 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 866 cudaStream_t stream); 867 868 static const caller_t callersL1[] = 869 { 870 matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/, 871 matchL1_gpu<unsigned short>, matchL1_gpu<short>, 872 matchL1_gpu<int>, matchL1_gpu<float> 873 }; 874 static const caller_t callersL2[] = 875 { 876 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/, 877 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/, 878 0/*matchL2_gpu<int>*/, matchL2_gpu<float> 879 }; 880 static const caller_t callersHamming[] = 881 { 882 matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/, 883 matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/, 884 matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ 885 }; 886 887 const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; 888 889 const caller_t func = callers[query.depth()]; 890 if (func == 0) 891 { 892 CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); 893 } 894 895 const int nQuery = query.rows; 896 const int nTrain = train.rows; 897 898 const int cols = std::max((nTrain / 100), nQuery); 899 900 _matches.create(2 * nQuery + 1, cols, CV_32SC1); 901 GpuMat matches = _matches.getGpuMat(); 902 903 GpuMat trainIdx(nQuery, cols, CV_32SC1, matches.ptr(0), matches.step); 904 GpuMat distance(nQuery, cols, CV_32FC1, matches.ptr(nQuery), matches.step); 905 GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(2 * nQuery)); 906 907 nMatches.setTo(Scalar::all(0), stream); 908 909 func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream)); 910 } 911 912 void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, 913 OutputArray _matches, 914 float maxDistance, 915 const std::vector<GpuMat>& masks, 916 Stream& stream) 917 { 918 using namespace cv::cuda::device::bf_radius_match; 919 920 const GpuMat query = _queryDescriptors.getGpuMat(); 921 922 if (query.empty() || trainDescCollection_.empty()) 923 { 924 _matches.release(); 925 return; 926 } 927 928 CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); 929 930 GpuMat trainCollection, maskCollection; 931 makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection); 932 933 typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, 934 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 935 cudaStream_t stream); 936 937 static const caller_t callersL1[] = 938 { 939 matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/, 940 matchL1_gpu<unsigned short>, matchL1_gpu<short>, 941 matchL1_gpu<int>, matchL1_gpu<float> 942 }; 943 static const caller_t callersL2[] = 944 { 945 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/, 946 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/, 947 0/*matchL2_gpu<int>*/, matchL2_gpu<float> 948 }; 949 static const caller_t callersHamming[] = 950 { 951 matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/, 952 matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/, 953 matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ 954 }; 955 956 const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; 957 958 const caller_t func = callers[query.depth()]; 959 if (func == 0) 960 { 961 CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); 962 } 963 964 const int nQuery = query.rows; 965 966 _matches.create(3 * nQuery + 1, nQuery, CV_32FC1); 967 GpuMat matches = _matches.getGpuMat(); 968 969 GpuMat trainIdx(nQuery, nQuery, CV_32SC1, matches.ptr(0), matches.step); 970 GpuMat imgIdx(nQuery, nQuery, CV_32SC1, matches.ptr(nQuery), matches.step); 971 GpuMat distance(nQuery, nQuery, CV_32FC1, matches.ptr(2 * nQuery), matches.step); 972 GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(3 * nQuery)); 973 974 nMatches.setTo(Scalar::all(0), stream); 975 976 std::vector<PtrStepSzb> trains_(trainDescCollection_.begin(), trainDescCollection_.end()); 977 std::vector<PtrStepSzb> masks_(masks.begin(), masks.end()); 978 979 func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], 980 trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream)); 981 } 982 983 void BFMatcher_Impl::radiusMatchConvert(InputArray _gpu_matches, 984 std::vector< std::vector<DMatch> >& matches, 985 bool compactResult) 986 { 987 Mat gpu_matches; 988 if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT) 989 { 990 _gpu_matches.getGpuMat().download(gpu_matches); 991 } 992 else 993 { 994 gpu_matches = _gpu_matches.getMat(); 995 } 996 997 if (gpu_matches.empty()) 998 { 999 matches.clear(); 1000 return; 1001 } 1002 1003 CV_Assert( gpu_matches.type() == CV_32SC1 || gpu_matches.type() == CV_32FC1 ); 1004 1005 int nQuery = -1; 1006 1007 const int* trainIdxPtr = NULL; 1008 const int* imgIdxPtr = NULL; 1009 const float* distancePtr = NULL; 1010 const int* nMatchesPtr = NULL; 1011 1012 if (gpu_matches.type() == CV_32SC1) 1013 { 1014 nQuery = (gpu_matches.rows - 1) / 2; 1015 1016 trainIdxPtr = gpu_matches.ptr<int>(0); 1017 distancePtr = gpu_matches.ptr<float>(nQuery); 1018 nMatchesPtr = gpu_matches.ptr<int>(2 * nQuery); 1019 } 1020 else 1021 { 1022 nQuery = (gpu_matches.rows - 1) / 3; 1023 1024 trainIdxPtr = gpu_matches.ptr<int>(0); 1025 imgIdxPtr = gpu_matches.ptr<int>(nQuery); 1026 distancePtr = gpu_matches.ptr<float>(2 * nQuery); 1027 nMatchesPtr = gpu_matches.ptr<int>(3 * nQuery); 1028 } 1029 1030 matches.clear(); 1031 matches.reserve(nQuery); 1032 1033 for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) 1034 { 1035 const int nMatched = std::min(nMatchesPtr[queryIdx], gpu_matches.cols); 1036 1037 if (nMatched == 0) 1038 { 1039 if (!compactResult) 1040 { 1041 matches.push_back(std::vector<DMatch>()); 1042 } 1043 } 1044 else 1045 { 1046 matches.push_back(std::vector<DMatch>(nMatched)); 1047 std::vector<DMatch>& curMatches = matches.back(); 1048 1049 for (int i = 0; i < nMatched; ++i) 1050 { 1051 const int trainIdx = trainIdxPtr[i]; 1052 1053 const int imgIdx = imgIdxPtr ? imgIdxPtr[i] : 0; 1054 const float distance = distancePtr[i]; 1055 1056 DMatch m(queryIdx, trainIdx, imgIdx, distance); 1057 1058 curMatches[i] = m; 1059 } 1060 1061 std::sort(curMatches.begin(), curMatches.end()); 1062 } 1063 1064 trainIdxPtr += gpu_matches.cols; 1065 distancePtr += gpu_matches.cols; 1066 if (imgIdxPtr) 1067 imgIdxPtr += gpu_matches.cols; 1068 } 1069 } 1070} 1071 1072Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int norm) 1073{ 1074 return makePtr<BFMatcher_Impl>(norm); 1075} 1076 1077#endif /* !defined (HAVE_CUDA) */ 1078