1/* This sample demonstrates working on one piece of data using two GPUs. 2 It splits input into two parts and processes them separately on different 3 GPUs. */ 4 5// Disable some warnings which are caused with CUDA headers 6#if defined(_MSC_VER) 7#pragma warning(disable: 4201 4408 4100) 8#endif 9 10#include <iostream> 11#include "cvconfig.h" 12#include "opencv2/core/core.hpp" 13#include "opencv2/highgui/highgui.hpp" 14#include "opencv2/cudastereo.hpp" 15 16#ifdef HAVE_TBB 17# include "tbb/tbb_stddef.h" 18# if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202 19# include "tbb/tbb.h" 20# include "tbb/task.h" 21# undef min 22# undef max 23# else 24# undef HAVE_TBB 25# endif 26#endif 27 28#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__) 29 30int main() 31{ 32#if !defined(HAVE_CUDA) 33 std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n"; 34#endif 35 36#if !defined(HAVE_TBB) 37 std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n"; 38#endif 39 40#if defined(__arm__) 41 std::cout << "Unsupported for ARM CUDA library." << std::endl; 42#endif 43 44 return 0; 45} 46 47#else 48 49#include <cuda.h> 50#include <cuda_runtime.h> 51 52using namespace std; 53using namespace cv; 54using namespace cv::cuda; 55 56struct Worker { void operator()(int device_id) const; }; 57void destroyContexts(); 58 59#define safeCall(expr) safeCall_(expr, #expr, __FILE__, __LINE__) 60inline void safeCall_(int code, const char* expr, const char* file, int line) 61{ 62 if (code != CUDA_SUCCESS) 63 { 64 std::cout << "CUDA driver API error: code " << code << ", expr " << expr 65 << ", file " << file << ", line " << line << endl; 66 destroyContexts(); 67 exit(-1); 68 } 69} 70 71// Each GPU is associated with its own context 72CUcontext contexts[2]; 73 74void inline contextOn(int id) 75{ 76 safeCall(cuCtxPushCurrent(contexts[id])); 77} 78 79void inline contextOff() 80{ 81 CUcontext prev_context; 82 safeCall(cuCtxPopCurrent(&prev_context)); 83} 84 85// GPUs data 86GpuMat d_left[2]; 87GpuMat d_right[2]; 88Ptr<cuda::StereoBM> bm[2]; 89GpuMat d_result[2]; 90 91static void printHelp() 92{ 93 std::cout << "Usage: driver_api_stereo_multi_gpu --left <left_image> --right <right_image>\n"; 94} 95 96int main(int argc, char** argv) 97{ 98 if (argc < 5) 99 { 100 printHelp(); 101 return -1; 102 } 103 104 int num_devices = getCudaEnabledDeviceCount(); 105 if (num_devices < 2) 106 { 107 std::cout << "Two or more GPUs are required\n"; 108 return -1; 109 } 110 111 for (int i = 0; i < num_devices; ++i) 112 { 113 cv::cuda::printShortCudaDeviceInfo(i); 114 115 DeviceInfo dev_info(i); 116 if (!dev_info.isCompatible()) 117 { 118 std::cout << "GPU module isn't built for GPU #" << i << " (" 119 << dev_info.name() << ", CC " << dev_info.majorVersion() 120 << dev_info.minorVersion() << "\n"; 121 return -1; 122 } 123 } 124 125 // Load input data 126 Mat left, right; 127 for (int i = 1; i < argc; ++i) 128 { 129 if (string(argv[i]) == "--left") 130 { 131 left = imread(argv[++i], cv::IMREAD_GRAYSCALE); 132 CV_Assert(!left.empty()); 133 } 134 else if (string(argv[i]) == "--right") 135 { 136 right = imread(argv[++i], cv::IMREAD_GRAYSCALE); 137 CV_Assert(!right.empty()); 138 } 139 else if (string(argv[i]) == "--help") 140 { 141 printHelp(); 142 return -1; 143 } 144 } 145 146 147 // Init CUDA Driver API 148 safeCall(cuInit(0)); 149 150 // Create context for GPU #0 151 CUdevice device; 152 safeCall(cuDeviceGet(&device, 0)); 153 safeCall(cuCtxCreate(&contexts[0], 0, device)); 154 contextOff(); 155 156 // Create context for GPU #1 157 safeCall(cuDeviceGet(&device, 1)); 158 safeCall(cuCtxCreate(&contexts[1], 0, device)); 159 contextOff(); 160 161 // Split source images for processing on GPU #0 162 contextOn(0); 163 d_left[0].upload(left.rowRange(0, left.rows / 2)); 164 d_right[0].upload(right.rowRange(0, right.rows / 2)); 165 bm[0] = cuda::createStereoBM(); 166 contextOff(); 167 168 // Split source images for processing on the GPU #1 169 contextOn(1); 170 d_left[1].upload(left.rowRange(left.rows / 2, left.rows)); 171 d_right[1].upload(right.rowRange(right.rows / 2, right.rows)); 172 bm[1] = cuda::createStereoBM(); 173 contextOff(); 174 175 // Execute calculation in two threads using two GPUs 176 int devices[] = {0, 1}; 177 tbb::parallel_do(devices, devices + 2, Worker()); 178 179 // Release the first GPU resources 180 contextOn(0); 181 imshow("GPU #0 result", Mat(d_result[0])); 182 d_left[0].release(); 183 d_right[0].release(); 184 d_result[0].release(); 185 bm[0].release(); 186 contextOff(); 187 188 // Release the second GPU resources 189 contextOn(1); 190 imshow("GPU #1 result", Mat(d_result[1])); 191 d_left[1].release(); 192 d_right[1].release(); 193 d_result[1].release(); 194 bm[1].release(); 195 contextOff(); 196 197 waitKey(); 198 destroyContexts(); 199 return 0; 200} 201 202 203void Worker::operator()(int device_id) const 204{ 205 contextOn(device_id); 206 207 bm[device_id]->compute(d_left[device_id], d_right[device_id], d_result[device_id]); 208 209 std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() 210 << "): finished\n"; 211 212 contextOff(); 213} 214 215 216void destroyContexts() 217{ 218 safeCall(cuCtxDestroy(contexts[0])); 219 safeCall(cuCtxDestroy(contexts[1])); 220} 221 222#endif 223