1/* This sample demonstrates working on one piece of data using two GPUs.
2   It splits input into two parts and processes them separately on different
3   GPUs. */
4
5// Disable some warnings which are caused with CUDA headers
6#if defined(_MSC_VER)
7#pragma warning(disable: 4201 4408 4100)
8#endif
9
10#include <iostream>
11#include "cvconfig.h"
12#include "opencv2/core/core.hpp"
13#include "opencv2/highgui/highgui.hpp"
14#include "opencv2/cudastereo.hpp"
15
16#ifdef HAVE_TBB
17#  include "tbb/tbb_stddef.h"
18#  if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
19#    include "tbb/tbb.h"
20#    include "tbb/task.h"
21#    undef min
22#    undef max
23#  else
24#    undef HAVE_TBB
25#  endif
26#endif
27
28#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__)
29
30int main()
31{
32#if !defined(HAVE_CUDA)
33    std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n";
34#endif
35
36#if !defined(HAVE_TBB)
37    std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
38#endif
39
40#if defined(__arm__)
41    std::cout << "Unsupported for ARM CUDA library." << std::endl;
42#endif
43
44    return 0;
45}
46
47#else
48
49#include <cuda.h>
50#include <cuda_runtime.h>
51
52using namespace std;
53using namespace cv;
54using namespace cv::cuda;
55
56struct Worker { void operator()(int device_id) const; };
57void destroyContexts();
58
59#define safeCall(expr) safeCall_(expr, #expr, __FILE__, __LINE__)
60inline void safeCall_(int code, const char* expr, const char* file, int line)
61{
62    if (code != CUDA_SUCCESS)
63    {
64        std::cout << "CUDA driver API error: code " << code << ", expr " << expr
65            << ", file " << file << ", line " << line << endl;
66        destroyContexts();
67        exit(-1);
68    }
69}
70
71// Each GPU is associated with its own context
72CUcontext contexts[2];
73
74void inline contextOn(int id)
75{
76    safeCall(cuCtxPushCurrent(contexts[id]));
77}
78
79void inline contextOff()
80{
81    CUcontext prev_context;
82    safeCall(cuCtxPopCurrent(&prev_context));
83}
84
85// GPUs data
86GpuMat d_left[2];
87GpuMat d_right[2];
88Ptr<cuda::StereoBM> bm[2];
89GpuMat d_result[2];
90
91static void printHelp()
92{
93    std::cout << "Usage: driver_api_stereo_multi_gpu --left <left_image> --right <right_image>\n";
94}
95
96int main(int argc, char** argv)
97{
98    if (argc < 5)
99    {
100        printHelp();
101        return -1;
102    }
103
104    int num_devices = getCudaEnabledDeviceCount();
105    if (num_devices < 2)
106    {
107        std::cout << "Two or more GPUs are required\n";
108        return -1;
109    }
110
111    for (int i = 0; i < num_devices; ++i)
112    {
113        cv::cuda::printShortCudaDeviceInfo(i);
114
115        DeviceInfo dev_info(i);
116        if (!dev_info.isCompatible())
117        {
118            std::cout << "GPU module isn't built for GPU #" << i << " ("
119                 << dev_info.name() << ", CC " << dev_info.majorVersion()
120                 << dev_info.minorVersion() << "\n";
121            return -1;
122        }
123    }
124
125    // Load input data
126    Mat left, right;
127    for (int i = 1; i < argc; ++i)
128    {
129        if (string(argv[i]) == "--left")
130        {
131            left = imread(argv[++i], cv::IMREAD_GRAYSCALE);
132            CV_Assert(!left.empty());
133        }
134        else if (string(argv[i]) == "--right")
135        {
136            right = imread(argv[++i], cv::IMREAD_GRAYSCALE);
137            CV_Assert(!right.empty());
138        }
139        else if (string(argv[i]) == "--help")
140        {
141            printHelp();
142            return -1;
143        }
144    }
145
146
147    // Init CUDA Driver API
148    safeCall(cuInit(0));
149
150    // Create context for GPU #0
151    CUdevice device;
152    safeCall(cuDeviceGet(&device, 0));
153    safeCall(cuCtxCreate(&contexts[0], 0, device));
154    contextOff();
155
156    // Create context for GPU #1
157    safeCall(cuDeviceGet(&device, 1));
158    safeCall(cuCtxCreate(&contexts[1], 0, device));
159    contextOff();
160
161    // Split source images for processing on GPU #0
162    contextOn(0);
163    d_left[0].upload(left.rowRange(0, left.rows / 2));
164    d_right[0].upload(right.rowRange(0, right.rows / 2));
165    bm[0] = cuda::createStereoBM();
166    contextOff();
167
168    // Split source images for processing on the GPU #1
169    contextOn(1);
170    d_left[1].upload(left.rowRange(left.rows / 2, left.rows));
171    d_right[1].upload(right.rowRange(right.rows / 2, right.rows));
172    bm[1] = cuda::createStereoBM();
173    contextOff();
174
175    // Execute calculation in two threads using two GPUs
176    int devices[] = {0, 1};
177    tbb::parallel_do(devices, devices + 2, Worker());
178
179    // Release the first GPU resources
180    contextOn(0);
181    imshow("GPU #0 result", Mat(d_result[0]));
182    d_left[0].release();
183    d_right[0].release();
184    d_result[0].release();
185    bm[0].release();
186    contextOff();
187
188    // Release the second GPU resources
189    contextOn(1);
190    imshow("GPU #1 result", Mat(d_result[1]));
191    d_left[1].release();
192    d_right[1].release();
193    d_result[1].release();
194    bm[1].release();
195    contextOff();
196
197    waitKey();
198    destroyContexts();
199    return 0;
200}
201
202
203void Worker::operator()(int device_id) const
204{
205    contextOn(device_id);
206
207    bm[device_id]->compute(d_left[device_id], d_right[device_id], d_result[device_id]);
208
209    std::cout << "GPU #" << device_id << " (" << DeviceInfo().name()
210        << "): finished\n";
211
212    contextOff();
213}
214
215
216void destroyContexts()
217{
218    safeCall(cuCtxDestroy(contexts[0]));
219    safeCall(cuCtxDestroy(contexts[1]));
220}
221
222#endif
223