1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                           License Agreement
11//                For Open Source Computer Vision Library
12//
13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15// Third party copyrights are property of their respective owners.
16//
17// Redistribution and use in source and binary forms, with or without modification,
18// are permitted provided that the following conditions are met:
19//
20//   * Redistribution's of source code must retain the above copyright notice,
21//     this list of conditions and the following disclaimer.
22//
23//   * Redistribution's in binary form must reproduce the above copyright notice,
24//     this list of conditions and the following disclaimer in the documentation
25//     and/or other materials provided with the distribution.
26//
27//   * The name of the copyright holders may not be used to endorse or promote products
28//     derived from this software without specific prior written permission.
29//
30// This software is provided by the copyright holders and contributors "as is" and
31// any express or implied warranties, including, but not limited to, the implied
32// warranties of merchantability and fitness for a particular purpose are disclaimed.
33// In no event shall the Intel Corporation or contributors be liable for any direct,
34// indirect, incidental, special, exemplary, or consequential damages
35// (including, but not limited to, procurement of substitute goods or services;
36// loss of use, data, or profits; or business interruption) however caused
37// and on any theory of liability, whether in contract, strict liability,
38// or tort (including negligence or otherwise) arising in any way out of
39// the use of this software, even if advised of the possibility of such damage.
40//
41//M*/
42
43#include "precomp.hpp"
44
45using namespace cv;
46using namespace cv::cuda;
47
48#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
49
50Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int) { throw_no_cuda(); return Ptr<cv::cuda::DescriptorMatcher>(); }
51
52#else /* !defined (HAVE_CUDA) */
53
54namespace cv { namespace cuda { namespace device
55{
56    namespace bf_match
57    {
58        template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
59            const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
60            cudaStream_t stream);
61        template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
62            const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
63            cudaStream_t stream);
64        template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
65            const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
66            cudaStream_t stream);
67
68        template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
69            const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
70            cudaStream_t stream);
71        template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
72            const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
73            cudaStream_t stream);
74        template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
75            const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
76            cudaStream_t stream);
77    }
78
79    namespace bf_knnmatch
80    {
81        template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
82            const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
83            cudaStream_t stream);
84        template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
85            const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
86            cudaStream_t stream);
87        template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
88            const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
89            cudaStream_t stream);
90
91        template <typename T> void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
92            const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
93            cudaStream_t stream);
94        template <typename T> void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
95            const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
96            cudaStream_t stream);
97        template <typename T> void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
98            const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
99            cudaStream_t stream);
100    }
101
102    namespace bf_radius_match
103    {
104        template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
105            const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
106            cudaStream_t stream);
107        template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
108            const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
109            cudaStream_t stream);
110        template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
111            const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
112            cudaStream_t stream);
113
114        template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
115            const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
116            cudaStream_t stream);
117
118        template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
119            const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
120            cudaStream_t stream);
121
122        template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
123            const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
124            cudaStream_t stream);
125    }
126}}}
127
128namespace
129{
130    static void makeGpuCollection(const std::vector<GpuMat>& trainDescCollection,
131                                  const std::vector<GpuMat>& masks,
132                                  GpuMat& trainCollection,
133                                  GpuMat& maskCollection)
134    {
135        if (trainDescCollection.empty())
136            return;
137
138        if (masks.empty())
139        {
140            Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
141
142            PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
143
144            for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
145                *trainCollectionCPU_ptr = trainDescCollection[i];
146
147            trainCollection.upload(trainCollectionCPU);
148            maskCollection.release();
149        }
150        else
151        {
152            CV_Assert( masks.size() == trainDescCollection.size() );
153
154            Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
155            Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb)));
156
157            PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
158            PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr<PtrStepb>();
159
160            for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
161            {
162                const GpuMat& train = trainDescCollection[i];
163                const GpuMat& mask = masks[i];
164
165                CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows) );
166
167                *trainCollectionCPU_ptr = train;
168                *maskCollectionCPU_ptr = mask;
169            }
170
171            trainCollection.upload(trainCollectionCPU);
172            maskCollection.upload(maskCollectionCPU);
173        }
174    }
175
176    class BFMatcher_Impl : public cv::cuda::DescriptorMatcher
177    {
178    public:
179        explicit BFMatcher_Impl(int norm) : norm_(norm)
180        {
181            CV_Assert( norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING );
182        }
183
184        virtual bool isMaskSupported() const { return true; }
185
186        virtual void add(const std::vector<GpuMat>& descriptors)
187        {
188            trainDescCollection_.insert(trainDescCollection_.end(), descriptors.begin(), descriptors.end());
189        }
190
191        virtual const std::vector<GpuMat>& getTrainDescriptors() const
192        {
193            return trainDescCollection_;
194        }
195
196        virtual void clear()
197        {
198            trainDescCollection_.clear();
199        }
200
201        virtual bool empty() const
202        {
203            return trainDescCollection_.empty();
204        }
205
206        virtual void train()
207        {
208        }
209
210        virtual void match(InputArray queryDescriptors, InputArray trainDescriptors,
211                           std::vector<DMatch>& matches,
212                           InputArray mask = noArray());
213
214        virtual void match(InputArray queryDescriptors,
215                           std::vector<DMatch>& matches,
216                           const std::vector<GpuMat>& masks = std::vector<GpuMat>());
217
218        virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
219                                OutputArray matches,
220                                InputArray mask = noArray(),
221                                Stream& stream = Stream::Null());
222
223        virtual void matchAsync(InputArray queryDescriptors,
224                                OutputArray matches,
225                                const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
226                                Stream& stream = Stream::Null());
227
228        virtual void matchConvert(InputArray gpu_matches,
229                                  std::vector<DMatch>& matches);
230
231        virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors,
232                              std::vector<std::vector<DMatch> >& matches,
233                              int k,
234                              InputArray mask = noArray(),
235                              bool compactResult = false);
236
237        virtual void knnMatch(InputArray queryDescriptors,
238                              std::vector<std::vector<DMatch> >& matches,
239                              int k,
240                              const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
241                              bool compactResult = false);
242
243        virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
244                                   OutputArray matches,
245                                   int k,
246                                   InputArray mask = noArray(),
247                                   Stream& stream = Stream::Null());
248
249        virtual void knnMatchAsync(InputArray queryDescriptors,
250                                   OutputArray matches,
251                                   int k,
252                                   const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
253                                   Stream& stream = Stream::Null());
254
255        virtual void knnMatchConvert(InputArray gpu_matches,
256                                     std::vector< std::vector<DMatch> >& matches,
257                                     bool compactResult = false);
258
259        virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors,
260                                 std::vector<std::vector<DMatch> >& matches,
261                                 float maxDistance,
262                                 InputArray mask = noArray(),
263                                 bool compactResult = false);
264
265        virtual void radiusMatch(InputArray queryDescriptors,
266                                 std::vector<std::vector<DMatch> >& matches,
267                                 float maxDistance,
268                                 const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
269                                 bool compactResult = false);
270
271        virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
272                                      OutputArray matches,
273                                      float maxDistance,
274                                      InputArray mask = noArray(),
275                                      Stream& stream = Stream::Null());
276
277        virtual void radiusMatchAsync(InputArray queryDescriptors,
278                                      OutputArray matches,
279                                      float maxDistance,
280                                      const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
281                                      Stream& stream = Stream::Null());
282
283        virtual void radiusMatchConvert(InputArray gpu_matches,
284                                        std::vector< std::vector<DMatch> >& matches,
285                                        bool compactResult = false);
286
287    private:
288        int norm_;
289        std::vector<GpuMat> trainDescCollection_;
290    };
291
292    //
293    // 1 to 1 match
294    //
295
296    void BFMatcher_Impl::match(InputArray _queryDescriptors, InputArray _trainDescriptors,
297                               std::vector<DMatch>& matches,
298                               InputArray _mask)
299    {
300        GpuMat d_matches;
301        matchAsync(_queryDescriptors, _trainDescriptors, d_matches, _mask);
302        matchConvert(d_matches, matches);
303    }
304
305    void BFMatcher_Impl::match(InputArray _queryDescriptors,
306                               std::vector<DMatch>& matches,
307                               const std::vector<GpuMat>& masks)
308    {
309        GpuMat d_matches;
310        matchAsync(_queryDescriptors, d_matches, masks);
311        matchConvert(d_matches, matches);
312    }
313
314    void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
315                                    OutputArray _matches,
316                                    InputArray _mask,
317                                    Stream& stream)
318    {
319        using namespace cv::cuda::device::bf_match;
320
321        const GpuMat query = _queryDescriptors.getGpuMat();
322        const GpuMat train = _trainDescriptors.getGpuMat();
323        const GpuMat mask = _mask.getGpuMat();
324
325        if (query.empty() || train.empty())
326        {
327            _matches.release();
328            return;
329        }
330
331        CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
332        CV_Assert( train.cols == query.cols && train.type() == query.type() );
333        CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
334
335        typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
336                                 const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
337                                 cudaStream_t stream);
338
339        static const caller_t callersL1[] =
340        {
341            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
342            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
343            matchL1_gpu<int>, matchL1_gpu<float>
344        };
345        static const caller_t callersL2[] =
346        {
347            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
348            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
349            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
350        };
351        static const caller_t callersHamming[] =
352        {
353            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
354            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
355            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
356        };
357
358        const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
359
360        const caller_t func = callers[query.depth()];
361        if (func == 0)
362        {
363            CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
364        }
365
366        const int nQuery = query.rows;
367
368        _matches.create(2, nQuery, CV_32SC1);
369        GpuMat matches = _matches.getGpuMat();
370
371        GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
372        GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(1));
373
374        func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream));
375    }
376
377    void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors,
378                                    OutputArray _matches,
379                                    const std::vector<GpuMat>& masks,
380                                    Stream& stream)
381    {
382        using namespace cv::cuda::device::bf_match;
383
384        const GpuMat query = _queryDescriptors.getGpuMat();
385
386        if (query.empty() || trainDescCollection_.empty())
387        {
388            _matches.release();
389            return;
390        }
391
392        CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
393
394        GpuMat trainCollection, maskCollection;
395        makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
396
397        typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
398                                 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
399                                 cudaStream_t stream);
400
401        static const caller_t callersL1[] =
402        {
403            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
404            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
405            matchL1_gpu<int>, matchL1_gpu<float>
406        };
407        static const caller_t callersL2[] =
408        {
409            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
410            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
411            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
412        };
413        static const caller_t callersHamming[] =
414        {
415            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
416            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
417            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
418        };
419
420        const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
421
422        const caller_t func = callers[query.depth()];
423        if (func == 0)
424        {
425            CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
426        }
427
428        const int nQuery = query.rows;
429
430        _matches.create(3, nQuery, CV_32SC1);
431        GpuMat matches = _matches.getGpuMat();
432
433        GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
434        GpuMat imgIdx(1, nQuery, CV_32SC1, matches.ptr(1));
435        GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(2));
436
437        func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
438    }
439
440    void BFMatcher_Impl::matchConvert(InputArray _gpu_matches,
441                                      std::vector<DMatch>& matches)
442    {
443        Mat gpu_matches;
444        if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
445        {
446            _gpu_matches.getGpuMat().download(gpu_matches);
447        }
448        else
449        {
450            gpu_matches = _gpu_matches.getMat();
451        }
452
453        if (gpu_matches.empty())
454        {
455            matches.clear();
456            return;
457        }
458
459        CV_Assert( (gpu_matches.type() == CV_32SC1) && (gpu_matches.rows == 2 || gpu_matches.rows == 3) );
460
461        const int nQuery = gpu_matches.cols;
462
463        matches.clear();
464        matches.reserve(nQuery);
465
466        const int* trainIdxPtr = NULL;
467        const int* imgIdxPtr = NULL;
468        const float* distancePtr = NULL;
469
470        if (gpu_matches.rows == 2)
471        {
472            trainIdxPtr = gpu_matches.ptr<int>(0);
473            distancePtr =  gpu_matches.ptr<float>(1);
474        }
475        else
476        {
477            trainIdxPtr = gpu_matches.ptr<int>(0);
478            imgIdxPtr =  gpu_matches.ptr<int>(1);
479            distancePtr =  gpu_matches.ptr<float>(2);
480        }
481
482        for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
483        {
484            const int trainIdx = trainIdxPtr[queryIdx];
485            if (trainIdx == -1)
486                continue;
487
488            const int imgIdx = imgIdxPtr ? imgIdxPtr[queryIdx] : 0;
489            const float distance = distancePtr[queryIdx];
490
491            DMatch m(queryIdx, trainIdx, imgIdx, distance);
492
493            matches.push_back(m);
494        }
495    }
496
497    //
498    // knn match
499    //
500
501    void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
502                                  std::vector<std::vector<DMatch> >& matches,
503                                  int k,
504                                  InputArray _mask,
505                                  bool compactResult)
506    {
507        GpuMat d_matches;
508        knnMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, k, _mask);
509        knnMatchConvert(d_matches, matches, compactResult);
510    }
511
512    void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors,
513                                  std::vector<std::vector<DMatch> >& matches,
514                                  int k,
515                                  const std::vector<GpuMat>& masks,
516                                  bool compactResult)
517    {
518        if (k == 2)
519        {
520            GpuMat d_matches;
521            knnMatchAsync(_queryDescriptors, d_matches, k, masks);
522            knnMatchConvert(d_matches, matches, compactResult);
523        }
524        else
525        {
526            const GpuMat query = _queryDescriptors.getGpuMat();
527
528            if (query.empty() || trainDescCollection_.empty())
529            {
530                matches.clear();
531                return;
532            }
533
534            CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
535
536            std::vector< std::vector<DMatch> > curMatches;
537            std::vector<DMatch> temp;
538            temp.reserve(2 * k);
539
540            matches.resize(query.rows);
541            for (size_t i = 0; i < matches.size(); ++i)
542                matches[i].reserve(k);
543
544            for (size_t imgIdx = 0; imgIdx < trainDescCollection_.size(); ++imgIdx)
545            {
546                knnMatch(query, trainDescCollection_[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]);
547
548                for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx)
549                {
550                    std::vector<DMatch>& localMatch = curMatches[queryIdx];
551                    std::vector<DMatch>& globalMatch = matches[queryIdx];
552
553                    for (size_t i = 0; i < localMatch.size(); ++i)
554                        localMatch[i].imgIdx = imgIdx;
555
556                    temp.clear();
557                    std::merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), std::back_inserter(temp));
558
559                    globalMatch.clear();
560                    const size_t count = std::min(static_cast<size_t>(k), temp.size());
561                    std::copy(temp.begin(), temp.begin() + count, std::back_inserter(globalMatch));
562                }
563            }
564
565            if (compactResult)
566            {
567                std::vector< std::vector<DMatch> >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector<DMatch>::empty));
568                matches.erase(new_end, matches.end());
569            }
570        }
571    }
572
573    void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
574                                       OutputArray _matches,
575                                       int k,
576                                       InputArray _mask,
577                                       Stream& stream)
578    {
579        using namespace cv::cuda::device::bf_knnmatch;
580
581        const GpuMat query = _queryDescriptors.getGpuMat();
582        const GpuMat train = _trainDescriptors.getGpuMat();
583        const GpuMat mask = _mask.getGpuMat();
584
585        if (query.empty() || train.empty())
586        {
587            _matches.release();
588            return;
589        }
590
591        CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
592        CV_Assert( train.cols == query.cols && train.type() == query.type() );
593        CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
594
595        typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
596                                 const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
597                                 cudaStream_t stream);
598
599        static const caller_t callersL1[] =
600        {
601            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
602            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
603            matchL1_gpu<int>, matchL1_gpu<float>
604        };
605        static const caller_t callersL2[] =
606        {
607            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
608            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
609            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
610        };
611        static const caller_t callersHamming[] =
612        {
613            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
614            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
615            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
616        };
617
618        const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
619
620        const caller_t func = callers[query.depth()];
621        if (func == 0)
622        {
623            CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
624        }
625
626        const int nQuery = query.rows;
627        const int nTrain = train.rows;
628
629        GpuMat trainIdx, distance, allDist;
630        if (k == 2)
631        {
632            _matches.create(2, nQuery, CV_32SC2);
633            GpuMat matches = _matches.getGpuMat();
634
635            trainIdx = GpuMat(1, nQuery, CV_32SC2, matches.ptr(0));
636            distance = GpuMat(1, nQuery, CV_32FC2, matches.ptr(1));
637        }
638        else
639        {
640            _matches.create(2 * nQuery, k, CV_32SC1);
641            GpuMat matches = _matches.getGpuMat();
642
643            trainIdx = GpuMat(nQuery, k, CV_32SC1, matches.ptr(0), matches.step);
644            distance = GpuMat(nQuery, k, CV_32FC1, matches.ptr(nQuery), matches.step);
645
646            BufferPool pool(stream);
647            allDist = pool.getBuffer(nQuery, nTrain, CV_32FC1);
648        }
649
650        trainIdx.setTo(Scalar::all(-1), stream);
651
652        func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream));
653    }
654
655    void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors,
656                                       OutputArray _matches,
657                                       int k,
658                                       const std::vector<GpuMat>& masks,
659                                       Stream& stream)
660    {
661        using namespace cv::cuda::device::bf_knnmatch;
662
663        if (k != 2)
664        {
665            CV_Error(Error::StsNotImplemented, "only k=2 mode is supported for now");
666        }
667
668        const GpuMat query = _queryDescriptors.getGpuMat();
669
670        if (query.empty() || trainDescCollection_.empty())
671        {
672            _matches.release();
673            return;
674        }
675
676        CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
677
678        GpuMat trainCollection, maskCollection;
679        makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
680
681        typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
682                                 const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
683                                 cudaStream_t stream);
684
685        static const caller_t callersL1[] =
686        {
687            match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
688            match2L1_gpu<unsigned short>, match2L1_gpu<short>,
689            match2L1_gpu<int>, match2L1_gpu<float>
690        };
691        static const caller_t callersL2[] =
692        {
693            0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
694            0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
695            0/*match2L2_gpu<int>*/, match2L2_gpu<float>
696        };
697        static const caller_t callersHamming[] =
698        {
699            match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
700            match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
701            match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
702        };
703
704        const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
705
706        const caller_t func = callers[query.depth()];
707        if (func == 0)
708        {
709            CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
710        }
711
712        const int nQuery = query.rows;
713
714        _matches.create(3, nQuery, CV_32SC2);
715        GpuMat matches = _matches.getGpuMat();
716
717        GpuMat trainIdx(1, nQuery, CV_32SC2, matches.ptr(0));
718        GpuMat imgIdx(1, nQuery, CV_32SC2, matches.ptr(1));
719        GpuMat distance(1, nQuery, CV_32FC2, matches.ptr(2));
720
721        trainIdx.setTo(Scalar::all(-1), stream);
722
723        func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
724    }
725
726    void BFMatcher_Impl::knnMatchConvert(InputArray _gpu_matches,
727                                         std::vector< std::vector<DMatch> >& matches,
728                                         bool compactResult)
729    {
730        Mat gpu_matches;
731        if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
732        {
733            _gpu_matches.getGpuMat().download(gpu_matches);
734        }
735        else
736        {
737            gpu_matches = _gpu_matches.getMat();
738        }
739
740        if (gpu_matches.empty())
741        {
742            matches.clear();
743            return;
744        }
745
746        CV_Assert( ((gpu_matches.type() == CV_32SC2) && (gpu_matches.rows == 2 || gpu_matches.rows == 3)) ||
747                   (gpu_matches.type() == CV_32SC1) );
748
749        int nQuery = -1, k = -1;
750
751        const int* trainIdxPtr = NULL;
752        const int* imgIdxPtr = NULL;
753        const float* distancePtr = NULL;
754
755        if (gpu_matches.type() == CV_32SC2)
756        {
757            nQuery = gpu_matches.cols;
758            k = 2;
759
760            if (gpu_matches.rows == 2)
761            {
762                trainIdxPtr = gpu_matches.ptr<int>(0);
763                distancePtr =  gpu_matches.ptr<float>(1);
764            }
765            else
766            {
767                trainIdxPtr = gpu_matches.ptr<int>(0);
768                imgIdxPtr =  gpu_matches.ptr<int>(1);
769                distancePtr =  gpu_matches.ptr<float>(2);
770            }
771        }
772        else
773        {
774            nQuery = gpu_matches.rows / 2;
775            k = gpu_matches.cols;
776
777            trainIdxPtr = gpu_matches.ptr<int>(0);
778            distancePtr =  gpu_matches.ptr<float>(nQuery);
779        }
780
781        matches.clear();
782        matches.reserve(nQuery);
783
784        for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
785        {
786            matches.push_back(std::vector<DMatch>());
787            std::vector<DMatch>& curMatches = matches.back();
788            curMatches.reserve(k);
789
790            for (int i = 0; i < k; ++i)
791            {
792                const int trainIdx = *trainIdxPtr;
793                if (trainIdx == -1)
794                    continue;
795
796                const int imgIdx = imgIdxPtr ? *imgIdxPtr : 0;
797                const float distance = *distancePtr;
798
799                DMatch m(queryIdx, trainIdx, imgIdx, distance);
800
801                curMatches.push_back(m);
802
803                ++trainIdxPtr;
804                ++distancePtr;
805                if (imgIdxPtr)
806                    ++imgIdxPtr;
807            }
808
809            if (compactResult && curMatches.empty())
810            {
811                matches.pop_back();
812            }
813        }
814    }
815
816    //
817    // radius match
818    //
819
820    void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
821                                     std::vector<std::vector<DMatch> >& matches,
822                                     float maxDistance,
823                                     InputArray _mask,
824                                     bool compactResult)
825    {
826        GpuMat d_matches;
827        radiusMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, maxDistance, _mask);
828        radiusMatchConvert(d_matches, matches, compactResult);
829    }
830
831    void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors,
832                                     std::vector<std::vector<DMatch> >& matches,
833                                     float maxDistance,
834                                     const std::vector<GpuMat>& masks,
835                                     bool compactResult)
836    {
837        GpuMat d_matches;
838        radiusMatchAsync(_queryDescriptors, d_matches, maxDistance, masks);
839        radiusMatchConvert(d_matches, matches, compactResult);
840    }
841
842    void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
843                                          OutputArray _matches,
844                                          float maxDistance,
845                                          InputArray _mask,
846                                          Stream& stream)
847    {
848        using namespace cv::cuda::device::bf_radius_match;
849
850        const GpuMat query = _queryDescriptors.getGpuMat();
851        const GpuMat train = _trainDescriptors.getGpuMat();
852        const GpuMat mask = _mask.getGpuMat();
853
854        if (query.empty() || train.empty())
855        {
856            _matches.release();
857            return;
858        }
859
860        CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
861        CV_Assert( train.cols == query.cols && train.type() == query.type() );
862        CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
863
864        typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
865                                 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
866                                 cudaStream_t stream);
867
868        static const caller_t callersL1[] =
869        {
870            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
871            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
872            matchL1_gpu<int>, matchL1_gpu<float>
873        };
874        static const caller_t callersL2[] =
875        {
876            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
877            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
878            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
879        };
880        static const caller_t callersHamming[] =
881        {
882            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
883            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
884            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
885        };
886
887        const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
888
889        const caller_t func = callers[query.depth()];
890        if (func == 0)
891        {
892            CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
893        }
894
895        const int nQuery = query.rows;
896        const int nTrain = train.rows;
897
898        const int cols = std::max((nTrain / 100), nQuery);
899
900        _matches.create(2 * nQuery + 1, cols, CV_32SC1);
901        GpuMat matches = _matches.getGpuMat();
902
903        GpuMat trainIdx(nQuery, cols, CV_32SC1, matches.ptr(0), matches.step);
904        GpuMat distance(nQuery, cols, CV_32FC1, matches.ptr(nQuery), matches.step);
905        GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(2 * nQuery));
906
907        nMatches.setTo(Scalar::all(0), stream);
908
909        func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream));
910    }
911
912    void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors,
913                                          OutputArray _matches,
914                                          float maxDistance,
915                                          const std::vector<GpuMat>& masks,
916                                          Stream& stream)
917    {
918        using namespace cv::cuda::device::bf_radius_match;
919
920        const GpuMat query = _queryDescriptors.getGpuMat();
921
922        if (query.empty() || trainDescCollection_.empty())
923        {
924            _matches.release();
925            return;
926        }
927
928        CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
929
930        GpuMat trainCollection, maskCollection;
931        makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
932
933        typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
934                                 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
935                                 cudaStream_t stream);
936
937        static const caller_t callersL1[] =
938        {
939            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
940            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
941            matchL1_gpu<int>, matchL1_gpu<float>
942        };
943        static const caller_t callersL2[] =
944        {
945            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
946            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
947            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
948        };
949        static const caller_t callersHamming[] =
950        {
951            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
952            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
953            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
954        };
955
956        const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
957
958        const caller_t func = callers[query.depth()];
959        if (func == 0)
960        {
961            CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
962        }
963
964        const int nQuery = query.rows;
965
966        _matches.create(3 * nQuery + 1, nQuery, CV_32FC1);
967        GpuMat matches = _matches.getGpuMat();
968
969        GpuMat trainIdx(nQuery, nQuery, CV_32SC1, matches.ptr(0), matches.step);
970        GpuMat imgIdx(nQuery, nQuery, CV_32SC1, matches.ptr(nQuery), matches.step);
971        GpuMat distance(nQuery, nQuery, CV_32FC1, matches.ptr(2 * nQuery), matches.step);
972        GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(3 * nQuery));
973
974        nMatches.setTo(Scalar::all(0), stream);
975
976        std::vector<PtrStepSzb> trains_(trainDescCollection_.begin(), trainDescCollection_.end());
977        std::vector<PtrStepSzb> masks_(masks.begin(), masks.end());
978
979        func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
980            trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream));
981    }
982
983    void BFMatcher_Impl::radiusMatchConvert(InputArray _gpu_matches,
984                                            std::vector< std::vector<DMatch> >& matches,
985                                            bool compactResult)
986    {
987        Mat gpu_matches;
988        if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
989        {
990            _gpu_matches.getGpuMat().download(gpu_matches);
991        }
992        else
993        {
994            gpu_matches = _gpu_matches.getMat();
995        }
996
997        if (gpu_matches.empty())
998        {
999            matches.clear();
1000            return;
1001        }
1002
1003        CV_Assert( gpu_matches.type() == CV_32SC1 || gpu_matches.type() == CV_32FC1 );
1004
1005        int nQuery = -1;
1006
1007        const int* trainIdxPtr = NULL;
1008        const int* imgIdxPtr = NULL;
1009        const float* distancePtr = NULL;
1010        const int* nMatchesPtr = NULL;
1011
1012        if (gpu_matches.type() == CV_32SC1)
1013        {
1014            nQuery = (gpu_matches.rows - 1) / 2;
1015
1016            trainIdxPtr = gpu_matches.ptr<int>(0);
1017            distancePtr =  gpu_matches.ptr<float>(nQuery);
1018            nMatchesPtr = gpu_matches.ptr<int>(2 * nQuery);
1019        }
1020        else
1021        {
1022            nQuery = (gpu_matches.rows - 1) / 3;
1023
1024            trainIdxPtr = gpu_matches.ptr<int>(0);
1025            imgIdxPtr = gpu_matches.ptr<int>(nQuery);
1026            distancePtr =  gpu_matches.ptr<float>(2 * nQuery);
1027            nMatchesPtr = gpu_matches.ptr<int>(3 * nQuery);
1028        }
1029
1030        matches.clear();
1031        matches.reserve(nQuery);
1032
1033        for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
1034        {
1035            const int nMatched = std::min(nMatchesPtr[queryIdx], gpu_matches.cols);
1036
1037            if (nMatched == 0)
1038            {
1039                if (!compactResult)
1040                {
1041                    matches.push_back(std::vector<DMatch>());
1042                }
1043            }
1044            else
1045            {
1046                matches.push_back(std::vector<DMatch>(nMatched));
1047                std::vector<DMatch>& curMatches = matches.back();
1048
1049                for (int i = 0; i < nMatched; ++i)
1050                {
1051                    const int trainIdx = trainIdxPtr[i];
1052
1053                    const int imgIdx = imgIdxPtr ? imgIdxPtr[i] : 0;
1054                    const float distance = distancePtr[i];
1055
1056                    DMatch m(queryIdx, trainIdx, imgIdx, distance);
1057
1058                    curMatches[i] = m;
1059                }
1060
1061                std::sort(curMatches.begin(), curMatches.end());
1062            }
1063
1064            trainIdxPtr += gpu_matches.cols;
1065            distancePtr += gpu_matches.cols;
1066            if (imgIdxPtr)
1067                imgIdxPtr += gpu_matches.cols;
1068        }
1069    }
1070}
1071
1072Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int norm)
1073{
1074    return makePtr<BFMatcher_Impl>(norm);
1075}
1076
1077#endif /* !defined (HAVE_CUDA) */
1078