1#include <stdexcept>
2#include "opencv2/imgproc.hpp"
3#include "opencv2/highgui.hpp"
4#include "opencv2/calib3d.hpp"
5#include "opencv2/video.hpp"
6#include "opencv2/cudalegacy.hpp"
7#include "opencv2/cudaimgproc.hpp"
8#include "opencv2/cudaarithm.hpp"
9#include "opencv2/cudawarping.hpp"
10#include "opencv2/cudafeatures2d.hpp"
11#include "opencv2/cudafilters.hpp"
12#include "opencv2/cudaoptflow.hpp"
13#include "opencv2/cudabgsegm.hpp"
14
15#include "performance.h"
16
17#include "opencv2/opencv_modules.hpp"
18
19#ifdef HAVE_OPENCV_XFEATURES2D
20#include "opencv2/xfeatures2d/cuda.hpp"
21#include "opencv2/xfeatures2d/nonfree.hpp"
22#endif
23
24#ifdef HAVE_OPENCV_BGSEGM
25#include "opencv2/bgsegm.hpp"
26#endif
27
28using namespace std;
29using namespace cv;
30
31
32TEST(matchTemplate)
33{
34    Mat src, templ, dst;
35    gen(src, 3000, 3000, CV_32F, 0, 1);
36
37    cuda::GpuMat d_src(src), d_templ, d_dst;
38
39    Ptr<cuda::TemplateMatching> alg = cuda::createTemplateMatching(src.type(), TM_CCORR);
40
41    for (int templ_size = 5; templ_size < 200; templ_size *= 5)
42    {
43        SUBTEST << src.cols << 'x' << src.rows << ", 32FC1" << ", templ " << templ_size << 'x' << templ_size << ", CCORR";
44
45        gen(templ, templ_size, templ_size, CV_32F, 0, 1);
46        matchTemplate(src, templ, dst, TM_CCORR);
47
48        CPU_ON;
49        matchTemplate(src, templ, dst, TM_CCORR);
50        CPU_OFF;
51
52        d_templ.upload(templ);
53        alg->match(d_src, d_templ, d_dst);
54
55        CUDA_ON;
56        alg->match(d_src, d_templ, d_dst);
57        CUDA_OFF;
58    }
59}
60
61
62TEST(minMaxLoc)
63{
64    Mat src;
65    cuda::GpuMat d_src;
66
67    double min_val, max_val;
68    Point min_loc, max_loc;
69
70    for (int size = 2000; size <= 8000; size *= 2)
71    {
72        SUBTEST << size << 'x' << size << ", 32F";
73
74        gen(src, size, size, CV_32F, 0, 1);
75
76        CPU_ON;
77        minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
78        CPU_OFF;
79
80        d_src.upload(src);
81
82        CUDA_ON;
83        cuda::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
84        CUDA_OFF;
85    }
86}
87
88
89TEST(remap)
90{
91    Mat src, dst, xmap, ymap;
92    cuda::GpuMat d_src, d_dst, d_xmap, d_ymap;
93
94    int interpolation = INTER_LINEAR;
95    int borderMode = BORDER_REPLICATE;
96
97    for (int size = 1000; size <= 4000; size *= 2)
98    {
99        SUBTEST << size << 'x' << size << ", 8UC4, INTER_LINEAR, BORDER_REPLICATE";
100
101        gen(src, size, size, CV_8UC4, 0, 256);
102
103        xmap.create(size, size, CV_32F);
104        ymap.create(size, size, CV_32F);
105        for (int i = 0; i < size; ++i)
106        {
107            float* xmap_row = xmap.ptr<float>(i);
108            float* ymap_row = ymap.ptr<float>(i);
109            for (int j = 0; j < size; ++j)
110            {
111                xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;
112                ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;
113            }
114        }
115
116        remap(src, dst, xmap, ymap, interpolation, borderMode);
117
118        CPU_ON;
119        remap(src, dst, xmap, ymap, interpolation, borderMode);
120        CPU_OFF;
121
122        d_src.upload(src);
123        d_xmap.upload(xmap);
124        d_ymap.upload(ymap);
125
126        cuda::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
127
128        CUDA_ON;
129        cuda::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
130        CUDA_OFF;
131    }
132}
133
134
135TEST(dft)
136{
137    Mat src, dst;
138    cuda::GpuMat d_src, d_dst;
139
140    for (int size = 1000; size <= 4000; size *= 2)
141    {
142        SUBTEST << size << 'x' << size << ", 32FC2, complex-to-complex";
143
144        gen(src, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1));
145
146        dft(src, dst);
147
148        CPU_ON;
149        dft(src, dst);
150        CPU_OFF;
151
152        d_src.upload(src);
153
154        cuda::dft(d_src, d_dst, Size(size, size));
155
156        CUDA_ON;
157        cuda::dft(d_src, d_dst, Size(size, size));
158        CUDA_OFF;
159    }
160}
161
162
163TEST(cornerHarris)
164{
165    Mat src, dst;
166    cuda::GpuMat d_src, d_dst;
167
168    for (int size = 1000; size <= 4000; size *= 2)
169    {
170        SUBTEST << size << 'x' << size << ", 32FC1, BORDER_REFLECT101";
171
172        gen(src, size, size, CV_32F, 0, 1);
173
174        cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101);
175
176        CPU_ON;
177        cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101);
178        CPU_OFF;
179
180        d_src.upload(src);
181
182        Ptr<cuda::CornernessCriteria> harris = cuda::createHarrisCorner(src.type(), 5, 7, 0.1, BORDER_REFLECT101);
183
184        harris->compute(d_src, d_dst);
185
186        CUDA_ON;
187        harris->compute(d_src, d_dst);
188        CUDA_OFF;
189    }
190}
191
192
193TEST(integral)
194{
195    Mat src, sum;
196    cuda::GpuMat d_src, d_sum;
197
198    for (int size = 1000; size <= 4000; size *= 2)
199    {
200        SUBTEST << size << 'x' << size << ", 8UC1";
201
202        gen(src, size, size, CV_8U, 0, 256);
203
204        integral(src, sum);
205
206        CPU_ON;
207        integral(src, sum);
208        CPU_OFF;
209
210        d_src.upload(src);
211
212        cuda::integral(d_src, d_sum);
213
214        CUDA_ON;
215        cuda::integral(d_src, d_sum);
216        CUDA_OFF;
217    }
218}
219
220
221TEST(norm)
222{
223    Mat src;
224    cuda::GpuMat d_src, d_buf;
225
226    for (int size = 2000; size <= 4000; size += 1000)
227    {
228        SUBTEST << size << 'x' << size << ", 32FC4, NORM_INF";
229
230        gen(src, size, size, CV_32FC4, Scalar::all(0), Scalar::all(1));
231
232        norm(src, NORM_INF);
233
234        CPU_ON;
235        norm(src, NORM_INF);
236        CPU_OFF;
237
238        d_src.upload(src);
239
240        cuda::norm(d_src, NORM_INF, d_buf);
241
242        CUDA_ON;
243        cuda::norm(d_src, NORM_INF, d_buf);
244        CUDA_OFF;
245    }
246}
247
248
249TEST(meanShift)
250{
251    int sp = 10, sr = 10;
252
253    Mat src, dst;
254    cuda::GpuMat d_src, d_dst;
255
256    for (int size = 400; size <= 800; size *= 2)
257    {
258        SUBTEST << size << 'x' << size << ", 8UC3 vs 8UC4";
259
260        gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256));
261
262        pyrMeanShiftFiltering(src, dst, sp, sr);
263
264        CPU_ON;
265        pyrMeanShiftFiltering(src, dst, sp, sr);
266        CPU_OFF;
267
268        gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
269
270        d_src.upload(src);
271
272        cuda::meanShiftFiltering(d_src, d_dst, sp, sr);
273
274        CUDA_ON;
275        cuda::meanShiftFiltering(d_src, d_dst, sp, sr);
276        CUDA_OFF;
277    }
278}
279
280#ifdef HAVE_OPENCV_XFEATURES2D
281
282TEST(SURF)
283{
284    Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
285    if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
286
287    Ptr<Feature2D> surf = xfeatures2d::SURF::create();
288    vector<KeyPoint> keypoints;
289    Mat descriptors;
290
291    surf->detectAndCompute(src, Mat(), keypoints, descriptors);
292
293    CPU_ON;
294    surf->detectAndCompute(src, Mat(), keypoints, descriptors);
295    CPU_OFF;
296
297    cuda::SURF_CUDA d_surf;
298    cuda::GpuMat d_src(src);
299    cuda::GpuMat d_keypoints;
300    cuda::GpuMat d_descriptors;
301
302    d_surf(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
303
304    CUDA_ON;
305    d_surf(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
306    CUDA_OFF;
307}
308
309#endif
310
311
312TEST(FAST)
313{
314    Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
315    if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
316
317    vector<KeyPoint> keypoints;
318
319    FAST(src, keypoints, 20);
320
321    CPU_ON;
322    FAST(src, keypoints, 20);
323    CPU_OFF;
324
325    cv::Ptr<cv::cuda::FastFeatureDetector> d_FAST = cv::cuda::FastFeatureDetector::create(20);
326    cuda::GpuMat d_src(src);
327    cuda::GpuMat d_keypoints;
328
329    d_FAST->detectAsync(d_src, d_keypoints);
330
331    CUDA_ON;
332    d_FAST->detectAsync(d_src, d_keypoints);
333    CUDA_OFF;
334}
335
336
337TEST(ORB)
338{
339    Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
340    if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
341
342    Ptr<ORB> orb = ORB::create(4000);
343
344    vector<KeyPoint> keypoints;
345    Mat descriptors;
346
347    orb->detectAndCompute(src, Mat(), keypoints, descriptors);
348
349    CPU_ON;
350    orb->detectAndCompute(src, Mat(), keypoints, descriptors);
351    CPU_OFF;
352
353    Ptr<cuda::ORB> d_orb = cuda::ORB::create();
354    cuda::GpuMat d_src(src);
355    cuda::GpuMat d_keypoints;
356    cuda::GpuMat d_descriptors;
357
358    d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
359
360    CUDA_ON;
361    d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
362    CUDA_OFF;
363}
364
365
366TEST(BruteForceMatcher)
367{
368    // Init CPU matcher
369
370    int desc_len = 64;
371
372    BFMatcher matcher(NORM_L2);
373
374    Mat query;
375    gen(query, 3000, desc_len, CV_32F, 0, 1);
376
377    Mat train;
378    gen(train, 3000, desc_len, CV_32F, 0, 1);
379
380    // Init CUDA matcher
381
382    Ptr<cuda::DescriptorMatcher> d_matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2);
383
384    cuda::GpuMat d_query(query);
385    cuda::GpuMat d_train(train);
386
387    // Output
388    vector< vector<DMatch> > matches(2);
389    cuda::GpuMat d_matches;
390
391    SUBTEST << "match";
392
393    matcher.match(query, train, matches[0]);
394
395    CPU_ON;
396    matcher.match(query, train, matches[0]);
397    CPU_OFF;
398
399    d_matcher->matchAsync(d_query, d_train, d_matches);
400
401    CUDA_ON;
402    d_matcher->matchAsync(d_query, d_train, d_matches);
403    CUDA_OFF;
404
405    SUBTEST << "knnMatch";
406
407    matcher.knnMatch(query, train, matches, 2);
408
409    CPU_ON;
410    matcher.knnMatch(query, train, matches, 2);
411    CPU_OFF;
412
413    d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
414
415    CUDA_ON;
416    d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
417    CUDA_OFF;
418
419    SUBTEST << "radiusMatch";
420
421    float max_distance = 2.0f;
422
423    matcher.radiusMatch(query, train, matches, max_distance);
424
425    CPU_ON;
426    matcher.radiusMatch(query, train, matches, max_distance);
427    CPU_OFF;
428
429    d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
430
431    CUDA_ON;
432    d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
433    CUDA_OFF;
434}
435
436
437TEST(magnitude)
438{
439    Mat x, y, mag;
440    cuda::GpuMat d_x, d_y, d_mag;
441
442    for (int size = 2000; size <= 4000; size += 1000)
443    {
444        SUBTEST << size << 'x' << size << ", 32FC1";
445
446        gen(x, size, size, CV_32F, 0, 1);
447        gen(y, size, size, CV_32F, 0, 1);
448
449        magnitude(x, y, mag);
450
451        CPU_ON;
452        magnitude(x, y, mag);
453        CPU_OFF;
454
455        d_x.upload(x);
456        d_y.upload(y);
457
458        cuda::magnitude(d_x, d_y, d_mag);
459
460        CUDA_ON;
461        cuda::magnitude(d_x, d_y, d_mag);
462        CUDA_OFF;
463    }
464}
465
466
467TEST(add)
468{
469    Mat src1, src2, dst;
470    cuda::GpuMat d_src1, d_src2, d_dst;
471
472    for (int size = 2000; size <= 4000; size += 1000)
473    {
474        SUBTEST << size << 'x' << size << ", 32FC1";
475
476        gen(src1, size, size, CV_32F, 0, 1);
477        gen(src2, size, size, CV_32F, 0, 1);
478
479        add(src1, src2, dst);
480
481        CPU_ON;
482        add(src1, src2, dst);
483        CPU_OFF;
484
485        d_src1.upload(src1);
486        d_src2.upload(src2);
487
488        cuda::add(d_src1, d_src2, d_dst);
489
490        CUDA_ON;
491        cuda::add(d_src1, d_src2, d_dst);
492        CUDA_OFF;
493    }
494}
495
496
497TEST(log)
498{
499    Mat src, dst;
500    cuda::GpuMat d_src, d_dst;
501
502    for (int size = 2000; size <= 4000; size += 1000)
503    {
504        SUBTEST << size << 'x' << size << ", 32F";
505
506        gen(src, size, size, CV_32F, 1, 10);
507
508        log(src, dst);
509
510        CPU_ON;
511        log(src, dst);
512        CPU_OFF;
513
514        d_src.upload(src);
515
516        cuda::log(d_src, d_dst);
517
518        CUDA_ON;
519        cuda::log(d_src, d_dst);
520        CUDA_OFF;
521    }
522}
523
524
525TEST(mulSpectrums)
526{
527    Mat src1, src2, dst;
528    cuda::GpuMat d_src1, d_src2, d_dst;
529
530    for (int size = 2000; size <= 4000; size += 1000)
531    {
532        SUBTEST << size << 'x' << size;
533
534        gen(src1, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1));
535        gen(src2, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1));
536
537        mulSpectrums(src1, src2, dst, 0, true);
538
539        CPU_ON;
540        mulSpectrums(src1, src2, dst, 0, true);
541        CPU_OFF;
542
543        d_src1.upload(src1);
544        d_src2.upload(src2);
545
546        cuda::mulSpectrums(d_src1, d_src2, d_dst, 0, true);
547
548        CUDA_ON;
549        cuda::mulSpectrums(d_src1, d_src2, d_dst, 0, true);
550        CUDA_OFF;
551    }
552}
553
554
555TEST(resize)
556{
557    Mat src, dst;
558    cuda::GpuMat d_src, d_dst;
559
560    for (int size = 1000; size <= 3000; size += 1000)
561    {
562        SUBTEST << size << 'x' << size << ", 8UC4, up";
563
564        gen(src, size, size, CV_8UC4, 0, 256);
565
566        resize(src, dst, Size(), 2.0, 2.0);
567
568        CPU_ON;
569        resize(src, dst, Size(), 2.0, 2.0);
570        CPU_OFF;
571
572        d_src.upload(src);
573
574        cuda::resize(d_src, d_dst, Size(), 2.0, 2.0);
575
576        CUDA_ON;
577        cuda::resize(d_src, d_dst, Size(), 2.0, 2.0);
578        CUDA_OFF;
579    }
580
581    for (int size = 1000; size <= 3000; size += 1000)
582    {
583        SUBTEST << size << 'x' << size << ", 8UC4, down";
584
585        gen(src, size, size, CV_8UC4, 0, 256);
586
587        resize(src, dst, Size(), 0.5, 0.5);
588
589        CPU_ON;
590        resize(src, dst, Size(), 0.5, 0.5);
591        CPU_OFF;
592
593        d_src.upload(src);
594
595        cuda::resize(d_src, d_dst, Size(), 0.5, 0.5);
596
597        CUDA_ON;
598        cuda::resize(d_src, d_dst, Size(), 0.5, 0.5);
599        CUDA_OFF;
600    }
601}
602
603
604TEST(cvtColor)
605{
606    Mat src, dst;
607    cuda::GpuMat d_src, d_dst;
608
609    gen(src, 4000, 4000, CV_8UC1, 0, 255);
610    d_src.upload(src);
611
612    SUBTEST << "4000x4000, 8UC1, COLOR_GRAY2BGRA";
613
614    cvtColor(src, dst, COLOR_GRAY2BGRA, 4);
615
616    CPU_ON;
617    cvtColor(src, dst, COLOR_GRAY2BGRA, 4);
618    CPU_OFF;
619
620    cuda::cvtColor(d_src, d_dst, COLOR_GRAY2BGRA, 4);
621
622    CUDA_ON;
623    cuda::cvtColor(d_src, d_dst, COLOR_GRAY2BGRA, 4);
624    CUDA_OFF;
625
626    cv::swap(src, dst);
627    d_src.swap(d_dst);
628
629    SUBTEST << "4000x4000, 8UC3 vs 8UC4, COLOR_BGR2YCrCb";
630
631    cvtColor(src, dst, COLOR_BGR2YCrCb);
632
633    CPU_ON;
634    cvtColor(src, dst, COLOR_BGR2YCrCb);
635    CPU_OFF;
636
637    cuda::cvtColor(d_src, d_dst, COLOR_BGR2YCrCb, 4);
638
639    CUDA_ON;
640    cuda::cvtColor(d_src, d_dst, COLOR_BGR2YCrCb, 4);
641    CUDA_OFF;
642
643    cv::swap(src, dst);
644    d_src.swap(d_dst);
645
646    SUBTEST << "4000x4000, 8UC4, COLOR_YCrCb2BGR";
647
648    cvtColor(src, dst, COLOR_YCrCb2BGR, 4);
649
650    CPU_ON;
651    cvtColor(src, dst, COLOR_YCrCb2BGR, 4);
652    CPU_OFF;
653
654    cuda::cvtColor(d_src, d_dst, COLOR_YCrCb2BGR, 4);
655
656    CUDA_ON;
657    cuda::cvtColor(d_src, d_dst, COLOR_YCrCb2BGR, 4);
658    CUDA_OFF;
659
660    cv::swap(src, dst);
661    d_src.swap(d_dst);
662
663    SUBTEST << "4000x4000, 8UC3 vs 8UC4, COLOR_BGR2XYZ";
664
665    cvtColor(src, dst, COLOR_BGR2XYZ);
666
667    CPU_ON;
668    cvtColor(src, dst, COLOR_BGR2XYZ);
669    CPU_OFF;
670
671    cuda::cvtColor(d_src, d_dst, COLOR_BGR2XYZ, 4);
672
673    CUDA_ON;
674    cuda::cvtColor(d_src, d_dst, COLOR_BGR2XYZ, 4);
675    CUDA_OFF;
676
677    cv::swap(src, dst);
678    d_src.swap(d_dst);
679
680    SUBTEST << "4000x4000, 8UC4, COLOR_XYZ2BGR";
681
682    cvtColor(src, dst, COLOR_XYZ2BGR, 4);
683
684    CPU_ON;
685    cvtColor(src, dst, COLOR_XYZ2BGR, 4);
686    CPU_OFF;
687
688    cuda::cvtColor(d_src, d_dst, COLOR_XYZ2BGR, 4);
689
690    CUDA_ON;
691    cuda::cvtColor(d_src, d_dst, COLOR_XYZ2BGR, 4);
692    CUDA_OFF;
693
694    cv::swap(src, dst);
695    d_src.swap(d_dst);
696
697    SUBTEST << "4000x4000, 8UC3 vs 8UC4, COLOR_BGR2HSV";
698
699    cvtColor(src, dst, COLOR_BGR2HSV);
700
701    CPU_ON;
702    cvtColor(src, dst, COLOR_BGR2HSV);
703    CPU_OFF;
704
705    cuda::cvtColor(d_src, d_dst, COLOR_BGR2HSV, 4);
706
707    CUDA_ON;
708    cuda::cvtColor(d_src, d_dst, COLOR_BGR2HSV, 4);
709    CUDA_OFF;
710
711    cv::swap(src, dst);
712    d_src.swap(d_dst);
713
714    SUBTEST << "4000x4000, 8UC4, COLOR_HSV2BGR";
715
716    cvtColor(src, dst, COLOR_HSV2BGR, 4);
717
718    CPU_ON;
719    cvtColor(src, dst, COLOR_HSV2BGR, 4);
720    CPU_OFF;
721
722    cuda::cvtColor(d_src, d_dst, COLOR_HSV2BGR, 4);
723
724    CUDA_ON;
725    cuda::cvtColor(d_src, d_dst, COLOR_HSV2BGR, 4);
726    CUDA_OFF;
727
728    cv::swap(src, dst);
729    d_src.swap(d_dst);
730}
731
732
733TEST(erode)
734{
735    Mat src, dst, ker;
736    cuda::GpuMat d_src, d_buf, d_dst;
737
738    for (int size = 2000; size <= 4000; size += 1000)
739    {
740        SUBTEST << size << 'x' << size;
741
742        gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
743        ker = getStructuringElement(MORPH_RECT, Size(3, 3));
744
745        erode(src, dst, ker);
746
747        CPU_ON;
748        erode(src, dst, ker);
749        CPU_OFF;
750
751        d_src.upload(src);
752
753        Ptr<cuda::Filter> erode = cuda::createMorphologyFilter(MORPH_ERODE, d_src.type(), ker);
754
755        erode->apply(d_src, d_dst);
756
757        CUDA_ON;
758        erode->apply(d_src, d_dst);
759        CUDA_OFF;
760    }
761}
762
763TEST(threshold)
764{
765    Mat src, dst;
766    cuda::GpuMat d_src, d_dst;
767
768    for (int size = 2000; size <= 4000; size += 1000)
769    {
770        SUBTEST << size << 'x' << size << ", 8UC1, THRESH_BINARY";
771
772        gen(src, size, size, CV_8U, 0, 100);
773
774        threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
775
776        CPU_ON;
777        threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
778        CPU_OFF;
779
780        d_src.upload(src);
781
782        cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
783
784        CUDA_ON;
785        cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
786        CUDA_OFF;
787    }
788
789    for (int size = 2000; size <= 4000; size += 1000)
790    {
791        SUBTEST << size << 'x' << size << ", 32FC1, THRESH_TRUNC [NPP]";
792
793        gen(src, size, size, CV_32FC1, 0, 100);
794
795        threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
796
797        CPU_ON;
798        threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
799        CPU_OFF;
800
801        d_src.upload(src);
802
803        cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
804
805        CUDA_ON;
806        cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
807        CUDA_OFF;
808    }
809}
810
811TEST(pow)
812{
813    Mat src, dst;
814    cuda::GpuMat d_src, d_dst;
815
816    for (int size = 1000; size <= 4000; size += 1000)
817    {
818        SUBTEST << size << 'x' << size << ", 32F";
819
820        gen(src, size, size, CV_32F, 0, 100);
821
822        pow(src, -2.0, dst);
823
824        CPU_ON;
825        pow(src, -2.0, dst);
826        CPU_OFF;
827
828        d_src.upload(src);
829
830        cuda::pow(d_src, -2.0, d_dst);
831
832        CUDA_ON;
833        cuda::pow(d_src, -2.0, d_dst);
834        CUDA_OFF;
835    }
836}
837
838
839TEST(projectPoints)
840{
841    Mat src;
842    vector<Point2f> dst;
843    cuda::GpuMat d_src, d_dst;
844
845    Mat rvec; gen(rvec, 1, 3, CV_32F, 0, 1);
846    Mat tvec; gen(tvec, 1, 3, CV_32F, 0, 1);
847    Mat camera_mat; gen(camera_mat, 3, 3, CV_32F, 0, 1);
848    camera_mat.at<float>(0, 1) = 0.f;
849    camera_mat.at<float>(1, 0) = 0.f;
850    camera_mat.at<float>(2, 0) = 0.f;
851    camera_mat.at<float>(2, 1) = 0.f;
852
853    for (int size = (int)1e6, count = 0; size >= 1e5 && count < 5; size = int(size / 1.4), count++)
854    {
855        SUBTEST << size;
856
857        gen(src, 1, size, CV_32FC3, Scalar::all(0), Scalar::all(10));
858
859        projectPoints(src, rvec, tvec, camera_mat, Mat::zeros(1, 8, CV_32F), dst);
860
861        CPU_ON;
862        projectPoints(src, rvec, tvec, camera_mat, Mat::zeros(1, 8, CV_32F), dst);
863        CPU_OFF;
864
865        d_src.upload(src);
866
867        cuda::projectPoints(d_src, rvec, tvec, camera_mat, Mat(), d_dst);
868
869        CUDA_ON;
870        cuda::projectPoints(d_src, rvec, tvec, camera_mat, Mat(), d_dst);
871        CUDA_OFF;
872    }
873}
874
875
876static void InitSolvePnpRansac()
877{
878    Mat object; gen(object, 1, 4, CV_32FC3, Scalar::all(0), Scalar::all(100));
879    Mat image; gen(image, 1, 4, CV_32FC2, Scalar::all(0), Scalar::all(100));
880    Mat rvec, tvec;
881    cuda::solvePnPRansac(object, image, Mat::eye(3, 3, CV_32F), Mat(), rvec, tvec);
882}
883
884
885TEST(solvePnPRansac)
886{
887    InitSolvePnpRansac();
888
889    for (int num_points = 5000; num_points <= 300000; num_points = int(num_points * 3.76))
890    {
891        SUBTEST << num_points;
892
893        Mat object; gen(object, 1, num_points, CV_32FC3, Scalar::all(10), Scalar::all(100));
894        Mat image; gen(image, 1, num_points, CV_32FC2, Scalar::all(10), Scalar::all(100));
895        Mat camera_mat; gen(camera_mat, 3, 3, CV_32F, 0.5, 1);
896        camera_mat.at<float>(0, 1) = 0.f;
897        camera_mat.at<float>(1, 0) = 0.f;
898        camera_mat.at<float>(2, 0) = 0.f;
899        camera_mat.at<float>(2, 1) = 0.f;
900
901        Mat rvec, tvec;
902        const int num_iters = 200;
903        const float max_dist = 2.0f;
904        vector<int> inliers_cpu, inliers_gpu;
905
906        CPU_ON;
907        solvePnPRansac(object, image, camera_mat, Mat::zeros(1, 8, CV_32F), rvec, tvec, false, num_iters,
908                       max_dist, int(num_points * 0.05), inliers_cpu);
909        CPU_OFF;
910
911        CUDA_ON;
912        cuda::solvePnPRansac(object, image, camera_mat, Mat::zeros(1, 8, CV_32F), rvec, tvec, false, num_iters,
913                            max_dist, int(num_points * 0.05), &inliers_gpu);
914        CUDA_OFF;
915    }
916}
917
918TEST(GaussianBlur)
919{
920    for (int size = 1000; size <= 4000; size += 1000)
921    {
922        SUBTEST << size << 'x' << size << ", 8UC4";
923
924        Mat src, dst;
925
926        gen(src, size, size, CV_8UC4, 0, 256);
927
928        GaussianBlur(src, dst, Size(3, 3), 1);
929
930        CPU_ON;
931        GaussianBlur(src, dst, Size(3, 3), 1);
932        CPU_OFF;
933
934        cuda::GpuMat d_src(src);
935        cuda::GpuMat d_dst(src.size(), src.type());
936        cuda::GpuMat d_buf;
937
938        cv::Ptr<cv::cuda::Filter> gauss = cv::cuda::createGaussianFilter(d_src.type(), -1, cv::Size(3, 3), 1);
939
940        gauss->apply(d_src, d_dst);
941
942        CUDA_ON;
943        gauss->apply(d_src, d_dst);
944        CUDA_OFF;
945    }
946}
947
948TEST(filter2D)
949{
950    for (int size = 512; size <= 2048; size *= 2)
951    {
952        Mat src;
953        gen(src, size, size, CV_8UC4, 0, 256);
954
955        for (int ksize = 3; ksize <= 16; ksize += 2)
956        {
957            SUBTEST << "ksize = " << ksize << ", " << size << 'x' << size << ", 8UC4";
958
959            Mat kernel;
960            gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0);
961
962            Mat dst;
963            cv::filter2D(src, dst, -1, kernel);
964
965            CPU_ON;
966            cv::filter2D(src, dst, -1, kernel);
967            CPU_OFF;
968
969            cuda::GpuMat d_src(src);
970            cuda::GpuMat d_dst;
971
972            Ptr<cuda::Filter> filter2D = cuda::createLinearFilter(d_src.type(), -1, kernel);
973            filter2D->apply(d_src, d_dst);
974
975            CUDA_ON;
976            filter2D->apply(d_src, d_dst);
977            CUDA_OFF;
978        }
979    }
980}
981
982TEST(pyrDown)
983{
984    for (int size = 4000; size >= 1000; size -= 1000)
985    {
986        SUBTEST << size << 'x' << size << ", 8UC4";
987
988        Mat src, dst;
989        gen(src, size, size, CV_8UC4, 0, 256);
990
991        pyrDown(src, dst);
992
993        CPU_ON;
994        pyrDown(src, dst);
995        CPU_OFF;
996
997        cuda::GpuMat d_src(src);
998        cuda::GpuMat d_dst;
999
1000        cuda::pyrDown(d_src, d_dst);
1001
1002        CUDA_ON;
1003        cuda::pyrDown(d_src, d_dst);
1004        CUDA_OFF;
1005    }
1006}
1007
1008TEST(pyrUp)
1009{
1010    for (int size = 2000; size >= 1000; size -= 1000)
1011    {
1012        SUBTEST << size << 'x' << size << ", 8UC4";
1013
1014        Mat src, dst;
1015
1016        gen(src, size, size, CV_8UC4, 0, 256);
1017
1018        pyrUp(src, dst);
1019
1020        CPU_ON;
1021        pyrUp(src, dst);
1022        CPU_OFF;
1023
1024        cuda::GpuMat d_src(src);
1025        cuda::GpuMat d_dst;
1026
1027        cuda::pyrUp(d_src, d_dst);
1028
1029        CUDA_ON;
1030        cuda::pyrUp(d_src, d_dst);
1031        CUDA_OFF;
1032    }
1033}
1034
1035
1036TEST(equalizeHist)
1037{
1038    for (int size = 1000; size < 4000; size += 1000)
1039    {
1040        SUBTEST << size << 'x' << size;
1041
1042        Mat src, dst;
1043
1044        gen(src, size, size, CV_8UC1, 0, 256);
1045
1046        equalizeHist(src, dst);
1047
1048        CPU_ON;
1049        equalizeHist(src, dst);
1050        CPU_OFF;
1051
1052        cuda::GpuMat d_src(src);
1053        cuda::GpuMat d_dst;
1054
1055        cuda::equalizeHist(d_src, d_dst);
1056
1057        CUDA_ON;
1058        cuda::equalizeHist(d_src, d_dst);
1059        CUDA_OFF;
1060    }
1061}
1062
1063
1064TEST(Canny)
1065{
1066    Mat img = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
1067
1068    if (img.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
1069
1070    Mat edges(img.size(), CV_8UC1);
1071
1072    CPU_ON;
1073    Canny(img, edges, 50.0, 100.0);
1074    CPU_OFF;
1075
1076    cuda::GpuMat d_img(img);
1077    cuda::GpuMat d_edges;
1078
1079    Ptr<cuda::CannyEdgeDetector> canny = cuda::createCannyEdgeDetector(50.0, 100.0);
1080
1081    canny->detect(d_img, d_edges);
1082
1083    CUDA_ON;
1084    canny->detect(d_img, d_edges);
1085    CUDA_OFF;
1086}
1087
1088
1089TEST(reduce)
1090{
1091    for (int size = 1000; size < 4000; size += 1000)
1092    {
1093        Mat src;
1094        gen(src, size, size, CV_32F, 0, 255);
1095
1096        Mat dst0;
1097        Mat dst1;
1098
1099        cuda::GpuMat d_src(src);
1100        cuda::GpuMat d_dst0;
1101        cuda::GpuMat d_dst1;
1102
1103        SUBTEST << size << 'x' << size << ", dim = 0";
1104
1105        reduce(src, dst0, 0, REDUCE_MIN);
1106
1107        CPU_ON;
1108        reduce(src, dst0, 0, REDUCE_MIN);
1109        CPU_OFF;
1110
1111        cuda::reduce(d_src, d_dst0, 0, REDUCE_MIN);
1112
1113        CUDA_ON;
1114        cuda::reduce(d_src, d_dst0, 0, REDUCE_MIN);
1115        CUDA_OFF;
1116
1117        SUBTEST << size << 'x' << size << ", dim = 1";
1118
1119        reduce(src, dst1, 1, REDUCE_MIN);
1120
1121        CPU_ON;
1122        reduce(src, dst1, 1, REDUCE_MIN);
1123        CPU_OFF;
1124
1125        cuda::reduce(d_src, d_dst1, 1, REDUCE_MIN);
1126
1127        CUDA_ON;
1128        cuda::reduce(d_src, d_dst1, 1, REDUCE_MIN);
1129        CUDA_OFF;
1130    }
1131}
1132
1133
1134TEST(gemm)
1135{
1136    Mat src1, src2, src3, dst;
1137    cuda::GpuMat d_src1, d_src2, d_src3, d_dst;
1138
1139    for (int size = 512; size <= 1024; size *= 2)
1140    {
1141        SUBTEST << size << 'x' << size;
1142
1143        gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
1144        gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
1145        gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
1146
1147        gemm(src1, src2, 1.0, src3, 1.0, dst);
1148
1149        CPU_ON;
1150        gemm(src1, src2, 1.0, src3, 1.0, dst);
1151        CPU_OFF;
1152
1153        d_src1.upload(src1);
1154        d_src2.upload(src2);
1155        d_src3.upload(src3);
1156
1157        cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
1158
1159        CUDA_ON;
1160        cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
1161        CUDA_OFF;
1162    }
1163}
1164
1165TEST(GoodFeaturesToTrack)
1166{
1167    Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
1168    if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
1169
1170    vector<Point2f> pts;
1171
1172    goodFeaturesToTrack(src, pts, 8000, 0.01, 0.0);
1173
1174    CPU_ON;
1175    goodFeaturesToTrack(src, pts, 8000, 0.01, 0.0);
1176    CPU_OFF;
1177
1178    Ptr<cuda::CornersDetector> detector = cuda::createGoodFeaturesToTrackDetector(src.type(), 8000, 0.01, 0.0);
1179
1180    cuda::GpuMat d_src(src);
1181    cuda::GpuMat d_pts;
1182
1183    detector->detect(d_src, d_pts);
1184
1185    CUDA_ON;
1186    detector->detect(d_src, d_pts);
1187    CUDA_OFF;
1188}
1189
1190#ifdef HAVE_OPENCV_BGSEGM
1191
1192TEST(MOG)
1193{
1194    const std::string inputFile = abspath("../data/768x576.avi");
1195
1196    cv::VideoCapture cap(inputFile);
1197    if (!cap.isOpened()) throw runtime_error("can't open ../data/768x576.avi");
1198
1199    cv::Mat frame;
1200    cap >> frame;
1201
1202    cv::Ptr<cv::BackgroundSubtractor> mog = cv::bgsegm::createBackgroundSubtractorMOG();
1203    cv::Mat foreground;
1204
1205    mog->apply(frame, foreground, 0.01);
1206
1207    while (!TestSystem::instance().stop())
1208    {
1209        cap >> frame;
1210
1211        TestSystem::instance().cpuOn();
1212
1213        mog->apply(frame, foreground, 0.01);
1214
1215        TestSystem::instance().cpuOff();
1216    }
1217    TestSystem::instance().cpuComplete();
1218
1219    cap.open(inputFile);
1220
1221    cap >> frame;
1222
1223    cv::cuda::GpuMat d_frame(frame);
1224    cv::Ptr<cv::BackgroundSubtractor> d_mog = cv::cuda::createBackgroundSubtractorMOG();
1225    cv::cuda::GpuMat d_foreground;
1226
1227    d_mog->apply(d_frame, d_foreground, 0.01);
1228
1229    while (!TestSystem::instance().stop())
1230    {
1231        cap >> frame;
1232        d_frame.upload(frame);
1233
1234        TestSystem::instance().gpuOn();
1235
1236        d_mog->apply(d_frame, d_foreground, 0.01);
1237
1238        TestSystem::instance().gpuOff();
1239    }
1240    TestSystem::instance().gpuComplete();
1241}
1242
1243#endif
1244
1245TEST(MOG2)
1246{
1247    const std::string inputFile = abspath("../data/768x576.avi");
1248
1249    cv::VideoCapture cap(inputFile);
1250    if (!cap.isOpened()) throw runtime_error("can't open ../data/768x576.avi");
1251
1252    cv::Mat frame;
1253    cap >> frame;
1254
1255    cv::Ptr<cv::BackgroundSubtractor> mog2 = cv::createBackgroundSubtractorMOG2();
1256    cv::Mat foreground;
1257    cv::Mat background;
1258
1259    mog2->apply(frame, foreground);
1260    mog2->getBackgroundImage(background);
1261
1262    while (!TestSystem::instance().stop())
1263    {
1264        cap >> frame;
1265
1266        TestSystem::instance().cpuOn();
1267
1268        mog2->apply(frame, foreground);
1269        mog2->getBackgroundImage(background);
1270
1271        TestSystem::instance().cpuOff();
1272    }
1273    TestSystem::instance().cpuComplete();
1274
1275    cap.open(inputFile);
1276
1277    cap >> frame;
1278
1279    cv::Ptr<cv::BackgroundSubtractor> d_mog2 = cv::cuda::createBackgroundSubtractorMOG2();
1280    cv::cuda::GpuMat d_frame(frame);
1281    cv::cuda::GpuMat d_foreground;
1282    cv::cuda::GpuMat d_background;
1283
1284    d_mog2->apply(d_frame, d_foreground);
1285    d_mog2->getBackgroundImage(d_background);
1286
1287    while (!TestSystem::instance().stop())
1288    {
1289        cap >> frame;
1290        d_frame.upload(frame);
1291
1292        TestSystem::instance().gpuOn();
1293
1294        d_mog2->apply(d_frame, d_foreground);
1295        d_mog2->getBackgroundImage(d_background);
1296
1297        TestSystem::instance().gpuOff();
1298    }
1299    TestSystem::instance().gpuComplete();
1300}
1301