1/* This sample demonstrates the way you can perform independed tasks 2 on the different GPUs */ 3 4// Disable some warnings which are caused with CUDA headers 5#if defined(_MSC_VER) 6#pragma warning(disable: 4201 4408 4100) 7#endif 8 9#include <iostream> 10#include "cvconfig.h" 11#include "opencv2/core/core.hpp" 12#include "opencv2/cudaarithm.hpp" 13 14#ifdef HAVE_TBB 15# include "tbb/tbb_stddef.h" 16# if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202 17# include "tbb/tbb.h" 18# include "tbb/task.h" 19# undef min 20# undef max 21# else 22# undef HAVE_TBB 23# endif 24#endif 25 26#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__) 27 28int main() 29{ 30#if !defined(HAVE_CUDA) 31 std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n"; 32#endif 33 34#if !defined(HAVE_TBB) 35 std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n"; 36#endif 37 38#if defined(__arm__) 39 std::cout << "Unsupported for ARM CUDA library." << std::endl; 40#endif 41 42 return 0; 43} 44 45#else 46 47#include <cuda.h> 48#include <cuda_runtime.h> 49 50using namespace std; 51using namespace cv; 52using namespace cv::cuda; 53 54struct Worker { void operator()(int device_id) const; }; 55void destroyContexts(); 56 57#define safeCall(expr) safeCall_(expr, #expr, __FILE__, __LINE__) 58inline void safeCall_(int code, const char* expr, const char* file, int line) 59{ 60 if (code != CUDA_SUCCESS) 61 { 62 std::cout << "CUDA driver API error: code " << code << ", expr " << expr 63 << ", file " << file << ", line " << line << endl; 64 destroyContexts(); 65 exit(-1); 66 } 67} 68 69// Each GPU is associated with its own context 70CUcontext contexts[2]; 71 72int main() 73{ 74 int num_devices = getCudaEnabledDeviceCount(); 75 if (num_devices < 2) 76 { 77 std::cout << "Two or more GPUs are required\n"; 78 return -1; 79 } 80 81 for (int i = 0; i < num_devices; ++i) 82 { 83 cv::cuda::printShortCudaDeviceInfo(i); 84 85 DeviceInfo dev_info(i); 86 if (!dev_info.isCompatible()) 87 { 88 std::cout << "CUDA module isn't built for GPU #" << i << " (" 89 << dev_info.name() << ", CC " << dev_info.majorVersion() 90 << dev_info.minorVersion() << "\n"; 91 return -1; 92 } 93 } 94 95 // Init CUDA Driver API 96 safeCall(cuInit(0)); 97 98 // Create context for GPU #0 99 CUdevice device; 100 safeCall(cuDeviceGet(&device, 0)); 101 safeCall(cuCtxCreate(&contexts[0], 0, device)); 102 103 CUcontext prev_context; 104 safeCall(cuCtxPopCurrent(&prev_context)); 105 106 // Create context for GPU #1 107 safeCall(cuDeviceGet(&device, 1)); 108 safeCall(cuCtxCreate(&contexts[1], 0, device)); 109 110 safeCall(cuCtxPopCurrent(&prev_context)); 111 112 // Execute calculation in two threads using two GPUs 113 int devices[] = {0, 1}; 114 tbb::parallel_do(devices, devices + 2, Worker()); 115 116 destroyContexts(); 117 return 0; 118} 119 120 121void Worker::operator()(int device_id) const 122{ 123 // Set the proper context 124 safeCall(cuCtxPushCurrent(contexts[device_id])); 125 126 Mat src(1000, 1000, CV_32F); 127 Mat dst; 128 129 RNG rng(0); 130 rng.fill(src, RNG::UNIFORM, 0, 1); 131 132 // CPU works 133 cv::transpose(src, dst); 134 135 // GPU works 136 GpuMat d_src(src); 137 GpuMat d_dst; 138 cuda::transpose(d_src, d_dst); 139 140 // Check results 141 bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3; 142 std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): " 143 << (passed ? "passed" : "FAILED") << endl; 144 145 // Deallocate data here, otherwise deallocation will be performed 146 // after context is extracted from the stack 147 d_src.release(); 148 d_dst.release(); 149 150 CUcontext prev_context; 151 safeCall(cuCtxPopCurrent(&prev_context)); 152} 153 154 155void destroyContexts() 156{ 157 safeCall(cuCtxDestroy(contexts[0])); 158 safeCall(cuCtxDestroy(contexts[1])); 159} 160 161#endif 162