1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                          License Agreement
11//                For Open Source Computer Vision Library
12//
13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16// Third party copyrights are property of their respective owners.
17//
18// Redistribution and use in source and binary forms, with or without modification,
19// are permitted provided that the following conditions are met:
20//
21//   * Redistribution's of source code must retain the above copyright notice,
22//     this list of conditions and the following disclaimer.
23//
24//   * Redistribution's in binary form must reproduce the above copyright notice,
25//     this list of conditions and the following disclaimer in the documentation
26//     and/or other materials provided with the distribution.
27//
28//   * The name of the copyright holders may not be used to endorse or promote products
29//     derived from this software without specific prior written permission.
30//
31// This software is provided by the copyright holders and contributors "as is" and
32// any express or implied warranties, including, but not limited to, the implied
33// warranties of merchantability and fitness for a particular purpose are disclaimed.
34// In no event shall the Intel Corporation or contributors be liable for any direct,
35// indirect, incidental, special, exemplary, or consequential damages
36// (including, but not limited to, procurement of substitute goods or services;
37// loss of use, data, or profits; or business interruption) however caused
38// and on any theory of liability, whether in contract, strict liability,
39// or tort (including negligence or otherwise) arising in any way out of
40// the use of this software, even if advised of the possibility of such damage.
41//
42//M*/
43
44#ifndef __OPENCV_CORE_CUDA_HPP__
45#define __OPENCV_CORE_CUDA_HPP__
46
47#ifndef __cplusplus
48#  error cuda.hpp header must be compiled as C++
49#endif
50
51#include "opencv2/core.hpp"
52#include "opencv2/core/cuda_types.hpp"
53
54/**
55  @defgroup cuda CUDA-accelerated Computer Vision
56  @{
57    @defgroup cudacore Core part
58    @{
59      @defgroup cudacore_init Initalization and Information
60      @defgroup cudacore_struct Data Structures
61    @}
62  @}
63 */
64
65namespace cv { namespace cuda {
66
67//! @addtogroup cudacore_struct
68//! @{
69
70//===================================================================================
71// GpuMat
72//===================================================================================
73
74/** @brief Base storage class for GPU memory with reference counting.
75
76Its interface matches the Mat interface with the following limitations:
77
78-   no arbitrary dimensions support (only 2D)
79-   no functions that return references to their data (because references on GPU are not valid for
80    CPU)
81-   no expression templates technique support
82
83Beware that the latter limitation may lead to overloaded matrix operators that cause memory
84allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
85passed directly to the kernel.
86
87@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
88aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
89
90@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
91on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
92release function returns error if the CUDA context has been destroyed before.
93
94@sa Mat
95 */
96class CV_EXPORTS GpuMat
97{
98public:
99    class CV_EXPORTS Allocator
100    {
101    public:
102        virtual ~Allocator() {}
103
104        // allocator must fill data, step and refcount fields
105        virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
106        virtual void free(GpuMat* mat) = 0;
107    };
108
109    //! default allocator
110    static Allocator* defaultAllocator();
111    static void setDefaultAllocator(Allocator* allocator);
112
113    //! default constructor
114    explicit GpuMat(Allocator* allocator = defaultAllocator());
115
116    //! constructs GpuMat of the specified size and type
117    GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
118    GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
119
120    //! constucts GpuMat and fills it with the specified value _s
121    GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
122    GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
123
124    //! copy constructor
125    GpuMat(const GpuMat& m);
126
127    //! constructor for GpuMat headers pointing to user-allocated data
128    GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
129    GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
130
131    //! creates a GpuMat header for a part of the bigger matrix
132    GpuMat(const GpuMat& m, Range rowRange, Range colRange);
133    GpuMat(const GpuMat& m, Rect roi);
134
135    //! builds GpuMat from host memory (Blocking call)
136    explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
137
138    //! destructor - calls release()
139    ~GpuMat();
140
141    //! assignment operators
142    GpuMat& operator =(const GpuMat& m);
143
144    //! allocates new GpuMat data unless the GpuMat already has specified size and type
145    void create(int rows, int cols, int type);
146    void create(Size size, int type);
147
148    //! decreases reference counter, deallocate the data when reference counter reaches 0
149    void release();
150
151    //! swaps with other smart pointer
152    void swap(GpuMat& mat);
153
154    //! pefroms upload data to GpuMat (Blocking call)
155    void upload(InputArray arr);
156
157    //! pefroms upload data to GpuMat (Non-Blocking call)
158    void upload(InputArray arr, Stream& stream);
159
160    //! pefroms download data from device to host memory (Blocking call)
161    void download(OutputArray dst) const;
162
163    //! pefroms download data from device to host memory (Non-Blocking call)
164    void download(OutputArray dst, Stream& stream) const;
165
166    //! returns deep copy of the GpuMat, i.e. the data is copied
167    GpuMat clone() const;
168
169    //! copies the GpuMat content to device memory (Blocking call)
170    void copyTo(OutputArray dst) const;
171
172    //! copies the GpuMat content to device memory (Non-Blocking call)
173    void copyTo(OutputArray dst, Stream& stream) const;
174
175    //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
176    void copyTo(OutputArray dst, InputArray mask) const;
177
178    //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
179    void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
180
181    //! sets some of the GpuMat elements to s (Blocking call)
182    GpuMat& setTo(Scalar s);
183
184    //! sets some of the GpuMat elements to s (Non-Blocking call)
185    GpuMat& setTo(Scalar s, Stream& stream);
186
187    //! sets some of the GpuMat elements to s, according to the mask (Blocking call)
188    GpuMat& setTo(Scalar s, InputArray mask);
189
190    //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
191    GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
192
193    //! converts GpuMat to another datatype (Blocking call)
194    void convertTo(OutputArray dst, int rtype) const;
195
196    //! converts GpuMat to another datatype (Non-Blocking call)
197    void convertTo(OutputArray dst, int rtype, Stream& stream) const;
198
199    //! converts GpuMat to another datatype with scaling (Blocking call)
200    void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
201
202    //! converts GpuMat to another datatype with scaling (Non-Blocking call)
203    void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
204
205    //! converts GpuMat to another datatype with scaling (Non-Blocking call)
206    void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
207
208    void assignTo(GpuMat& m, int type=-1) const;
209
210    //! returns pointer to y-th row
211    uchar* ptr(int y = 0);
212    const uchar* ptr(int y = 0) const;
213
214    //! template version of the above method
215    template<typename _Tp> _Tp* ptr(int y = 0);
216    template<typename _Tp> const _Tp* ptr(int y = 0) const;
217
218    template <typename _Tp> operator PtrStepSz<_Tp>() const;
219    template <typename _Tp> operator PtrStep<_Tp>() const;
220
221    //! returns a new GpuMat header for the specified row
222    GpuMat row(int y) const;
223
224    //! returns a new GpuMat header for the specified column
225    GpuMat col(int x) const;
226
227    //! ... for the specified row span
228    GpuMat rowRange(int startrow, int endrow) const;
229    GpuMat rowRange(Range r) const;
230
231    //! ... for the specified column span
232    GpuMat colRange(int startcol, int endcol) const;
233    GpuMat colRange(Range r) const;
234
235    //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
236    GpuMat operator ()(Range rowRange, Range colRange) const;
237    GpuMat operator ()(Rect roi) const;
238
239    //! creates alternative GpuMat header for the same data, with different
240    //! number of channels and/or different number of rows
241    GpuMat reshape(int cn, int rows = 0) const;
242
243    //! locates GpuMat header within a parent GpuMat
244    void locateROI(Size& wholeSize, Point& ofs) const;
245
246    //! moves/resizes the current GpuMat ROI inside the parent GpuMat
247    GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
248
249    //! returns true iff the GpuMat data is continuous
250    //! (i.e. when there are no gaps between successive rows)
251    bool isContinuous() const;
252
253    //! returns element size in bytes
254    size_t elemSize() const;
255
256    //! returns the size of element channel in bytes
257    size_t elemSize1() const;
258
259    //! returns element type
260    int type() const;
261
262    //! returns element type
263    int depth() const;
264
265    //! returns number of channels
266    int channels() const;
267
268    //! returns step/elemSize1()
269    size_t step1() const;
270
271    //! returns GpuMat size : width == number of columns, height == number of rows
272    Size size() const;
273
274    //! returns true if GpuMat data is NULL
275    bool empty() const;
276
277    /*! includes several bit-fields:
278    - the magic signature
279    - continuity flag
280    - depth
281    - number of channels
282    */
283    int flags;
284
285    //! the number of rows and columns
286    int rows, cols;
287
288    //! a distance between successive rows in bytes; includes the gap if any
289    size_t step;
290
291    //! pointer to the data
292    uchar* data;
293
294    //! pointer to the reference counter;
295    //! when GpuMat points to user-allocated data, the pointer is NULL
296    int* refcount;
297
298    //! helper fields used in locateROI and adjustROI
299    uchar* datastart;
300    const uchar* dataend;
301
302    //! allocator
303    Allocator* allocator;
304};
305
306/** @brief Creates a continuous matrix.
307
308@param rows Row count.
309@param cols Column count.
310@param type Type of the matrix.
311@param arr Destination matrix. This parameter changes only if it has a proper type and area (
312\f$\texttt{rows} \times \texttt{cols}\f$ ).
313
314Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
315end of each row.
316 */
317CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
318
319/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
320
321@param rows Minimum desired number of rows.
322@param cols Minimum desired number of columns.
323@param type Desired matrix type.
324@param arr Destination matrix.
325
326The function does not reallocate memory if the matrix has proper attributes already.
327 */
328CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
329
330//! BufferPool management (must be called before Stream creation)
331CV_EXPORTS void setBufferPoolUsage(bool on);
332CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
333
334//===================================================================================
335// HostMem
336//===================================================================================
337
338/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
339
340Its interface is also Mat-like but with additional memory type parameters.
341
342-   **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
343    uploading/downloading data from/to GPU.
344-   **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
345    address space, if supported.
346-   **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
347    used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
348    utilization.
349
350@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
351Pinned Memory APIs* document or *CUDA C Programming Guide*.
352 */
353class CV_EXPORTS HostMem
354{
355public:
356    enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
357
358    static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED);
359
360    explicit HostMem(AllocType alloc_type = PAGE_LOCKED);
361
362    HostMem(const HostMem& m);
363
364    HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
365    HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
366
367    //! creates from host memory with coping data
368    explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
369
370    ~HostMem();
371
372    HostMem& operator =(const HostMem& m);
373
374    //! swaps with other smart pointer
375    void swap(HostMem& b);
376
377    //! returns deep copy of the matrix, i.e. the data is copied
378    HostMem clone() const;
379
380    //! allocates new matrix data unless the matrix already has specified size and type.
381    void create(int rows, int cols, int type);
382    void create(Size size, int type);
383
384    //! creates alternative HostMem header for the same data, with different
385    //! number of channels and/or different number of rows
386    HostMem reshape(int cn, int rows = 0) const;
387
388    //! decrements reference counter and released memory if needed.
389    void release();
390
391    //! returns matrix header with disabled reference counting for HostMem data.
392    Mat createMatHeader() const;
393
394    /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
395    for it.
396
397    This can be done only if memory was allocated with the SHARED flag and if it is supported by the
398    hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
399    eliminates an extra copy.
400     */
401    GpuMat createGpuMatHeader() const;
402
403    // Please see cv::Mat for descriptions
404    bool isContinuous() const;
405    size_t elemSize() const;
406    size_t elemSize1() const;
407    int type() const;
408    int depth() const;
409    int channels() const;
410    size_t step1() const;
411    Size size() const;
412    bool empty() const;
413
414    // Please see cv::Mat for descriptions
415    int flags;
416    int rows, cols;
417    size_t step;
418
419    uchar* data;
420    int* refcount;
421
422    uchar* datastart;
423    const uchar* dataend;
424
425    AllocType alloc_type;
426};
427
428/** @brief Page-locks the memory of matrix and maps it for the device(s).
429
430@param m Input matrix.
431 */
432CV_EXPORTS void registerPageLocked(Mat& m);
433
434/** @brief Unmaps the memory of matrix and makes it pageable again.
435
436@param m Input matrix.
437 */
438CV_EXPORTS void unregisterPageLocked(Mat& m);
439
440//===================================================================================
441// Stream
442//===================================================================================
443
444/** @brief This class encapsulates a queue of asynchronous calls.
445
446@note Currently, you may face problems if an operation is enqueued twice with different data. Some
447functions use the constant GPU memory, and next call may update the memory before the previous one
448has been finished. But calling different operations asynchronously is safe because each operation
449has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
450also safe. :
451 */
452class CV_EXPORTS Stream
453{
454    typedef void (Stream::*bool_type)() const;
455    void this_type_does_not_support_comparisons() const {}
456
457public:
458    typedef void (*StreamCallback)(int status, void* userData);
459
460    //! creates a new asynchronous stream
461    Stream();
462
463    /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
464    */
465    bool queryIfComplete() const;
466
467    /** @brief Blocks the current CPU thread until all operations in the stream are complete.
468    */
469    void waitForCompletion();
470
471    /** @brief Makes a compute stream wait on an event.
472    */
473    void waitEvent(const Event& event);
474
475    /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
476    completed.
477
478    @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
479    that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
480    Callbacks without a mandated order (in independent streams) execute in undefined order and may be
481    serialized.
482     */
483    void enqueueHostCallback(StreamCallback callback, void* userData);
484
485    //! return Stream object for default CUDA stream
486    static Stream& Null();
487
488    //! returns true if stream object is not default (!= 0)
489    operator bool_type() const;
490
491    class Impl;
492
493private:
494    Ptr<Impl> impl_;
495    Stream(const Ptr<Impl>& impl);
496
497    friend struct StreamAccessor;
498    friend class BufferPool;
499    friend class DefaultDeviceInitializer;
500};
501
502class CV_EXPORTS Event
503{
504public:
505    enum CreateFlags
506    {
507        DEFAULT        = 0x00,  /**< Default event flag */
508        BLOCKING_SYNC  = 0x01,  /**< Event uses blocking synchronization */
509        DISABLE_TIMING = 0x02,  /**< Event will not record timing data */
510        INTERPROCESS   = 0x04   /**< Event is suitable for interprocess use. DisableTiming must be set */
511    };
512
513    explicit Event(CreateFlags flags = DEFAULT);
514
515    //! records an event
516    void record(Stream& stream = Stream::Null());
517
518    //! queries an event's status
519    bool queryIfComplete() const;
520
521    //! waits for an event to complete
522    void waitForCompletion();
523
524    //! computes the elapsed time between events
525    static float elapsedTime(const Event& start, const Event& end);
526
527    class Impl;
528
529private:
530    Ptr<Impl> impl_;
531
532    friend struct EventAccessor;
533};
534
535//! @} cudacore_struct
536
537//===================================================================================
538// Initialization & Info
539//===================================================================================
540
541//! @addtogroup cudacore_init
542//! @{
543
544/** @brief Returns the number of installed CUDA-enabled devices.
545
546Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
547this function returns 0.
548 */
549CV_EXPORTS int getCudaEnabledDeviceCount();
550
551/** @brief Sets a device and initializes it for the current thread.
552
553@param device System index of a CUDA device starting with 0.
554
555If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
556 */
557CV_EXPORTS void setDevice(int device);
558
559/** @brief Returns the current device index set by cuda::setDevice or initialized by default.
560 */
561CV_EXPORTS int getDevice();
562
563/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
564process.
565
566Any subsequent API call to this device will reinitialize the device.
567 */
568CV_EXPORTS void resetDevice();
569
570/** @brief Enumeration providing CUDA computing features.
571 */
572enum FeatureSet
573{
574    FEATURE_SET_COMPUTE_10 = 10,
575    FEATURE_SET_COMPUTE_11 = 11,
576    FEATURE_SET_COMPUTE_12 = 12,
577    FEATURE_SET_COMPUTE_13 = 13,
578    FEATURE_SET_COMPUTE_20 = 20,
579    FEATURE_SET_COMPUTE_21 = 21,
580    FEATURE_SET_COMPUTE_30 = 30,
581    FEATURE_SET_COMPUTE_32 = 32,
582    FEATURE_SET_COMPUTE_35 = 35,
583    FEATURE_SET_COMPUTE_50 = 50,
584
585    GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
586    SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
587    NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
588    WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
589    DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
590};
591
592//! checks whether current device supports the given feature
593CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
594
595/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
596built for.
597
598According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
599capability can always be compiled to binary code of greater or equal compute capability".
600 */
601class CV_EXPORTS TargetArchs
602{
603public:
604    /** @brief The following method checks whether the module was built with the support of the given feature:
605
606    @param feature_set Features to be checked. See :ocvcuda::FeatureSet.
607     */
608    static bool builtWith(FeatureSet feature_set);
609
610    /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
611    code for the given architecture(s):
612
613    @param major Major compute capability version.
614    @param minor Minor compute capability version.
615     */
616    static bool has(int major, int minor);
617    static bool hasPtx(int major, int minor);
618    static bool hasBin(int major, int minor);
619
620    static bool hasEqualOrLessPtx(int major, int minor);
621    static bool hasEqualOrGreater(int major, int minor);
622    static bool hasEqualOrGreaterPtx(int major, int minor);
623    static bool hasEqualOrGreaterBin(int major, int minor);
624};
625
626/** @brief Class providing functionality for querying the specified GPU properties.
627 */
628class CV_EXPORTS DeviceInfo
629{
630public:
631    //! creates DeviceInfo object for the current GPU
632    DeviceInfo();
633
634    /** @brief The constructors.
635
636    @param device_id System index of the CUDA device starting with 0.
637
638    Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
639    constructs an object for the current device.
640     */
641    DeviceInfo(int device_id);
642
643    /** @brief Returns system index of the CUDA device starting with 0.
644    */
645    int deviceID() const;
646
647    //! ASCII string identifying device
648    const char* name() const;
649
650    //! global memory available on device in bytes
651    size_t totalGlobalMem() const;
652
653    //! shared memory available per block in bytes
654    size_t sharedMemPerBlock() const;
655
656    //! 32-bit registers available per block
657    int regsPerBlock() const;
658
659    //! warp size in threads
660    int warpSize() const;
661
662    //! maximum pitch in bytes allowed by memory copies
663    size_t memPitch() const;
664
665    //! maximum number of threads per block
666    int maxThreadsPerBlock() const;
667
668    //! maximum size of each dimension of a block
669    Vec3i maxThreadsDim() const;
670
671    //! maximum size of each dimension of a grid
672    Vec3i maxGridSize() const;
673
674    //! clock frequency in kilohertz
675    int clockRate() const;
676
677    //! constant memory available on device in bytes
678    size_t totalConstMem() const;
679
680    //! major compute capability
681    int majorVersion() const;
682
683    //! minor compute capability
684    int minorVersion() const;
685
686    //! alignment requirement for textures
687    size_t textureAlignment() const;
688
689    //! pitch alignment requirement for texture references bound to pitched memory
690    size_t texturePitchAlignment() const;
691
692    //! number of multiprocessors on device
693    int multiProcessorCount() const;
694
695    //! specified whether there is a run time limit on kernels
696    bool kernelExecTimeoutEnabled() const;
697
698    //! device is integrated as opposed to discrete
699    bool integrated() const;
700
701    //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
702    bool canMapHostMemory() const;
703
704    enum ComputeMode
705    {
706        ComputeModeDefault,         /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
707        ComputeModeExclusive,       /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
708        ComputeModeProhibited,      /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
709        ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
710    };
711
712    //! compute mode
713    ComputeMode computeMode() const;
714
715    //! maximum 1D texture size
716    int maxTexture1D() const;
717
718    //! maximum 1D mipmapped texture size
719    int maxTexture1DMipmap() const;
720
721    //! maximum size for 1D textures bound to linear memory
722    int maxTexture1DLinear() const;
723
724    //! maximum 2D texture dimensions
725    Vec2i maxTexture2D() const;
726
727    //! maximum 2D mipmapped texture dimensions
728    Vec2i maxTexture2DMipmap() const;
729
730    //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
731    Vec3i maxTexture2DLinear() const;
732
733    //! maximum 2D texture dimensions if texture gather operations have to be performed
734    Vec2i maxTexture2DGather() const;
735
736    //! maximum 3D texture dimensions
737    Vec3i maxTexture3D() const;
738
739    //! maximum Cubemap texture dimensions
740    int maxTextureCubemap() const;
741
742    //! maximum 1D layered texture dimensions
743    Vec2i maxTexture1DLayered() const;
744
745    //! maximum 2D layered texture dimensions
746    Vec3i maxTexture2DLayered() const;
747
748    //! maximum Cubemap layered texture dimensions
749    Vec2i maxTextureCubemapLayered() const;
750
751    //! maximum 1D surface size
752    int maxSurface1D() const;
753
754    //! maximum 2D surface dimensions
755    Vec2i maxSurface2D() const;
756
757    //! maximum 3D surface dimensions
758    Vec3i maxSurface3D() const;
759
760    //! maximum 1D layered surface dimensions
761    Vec2i maxSurface1DLayered() const;
762
763    //! maximum 2D layered surface dimensions
764    Vec3i maxSurface2DLayered() const;
765
766    //! maximum Cubemap surface dimensions
767    int maxSurfaceCubemap() const;
768
769    //! maximum Cubemap layered surface dimensions
770    Vec2i maxSurfaceCubemapLayered() const;
771
772    //! alignment requirements for surfaces
773    size_t surfaceAlignment() const;
774
775    //! device can possibly execute multiple kernels concurrently
776    bool concurrentKernels() const;
777
778    //! device has ECC support enabled
779    bool ECCEnabled() const;
780
781    //! PCI bus ID of the device
782    int pciBusID() const;
783
784    //! PCI device ID of the device
785    int pciDeviceID() const;
786
787    //! PCI domain ID of the device
788    int pciDomainID() const;
789
790    //! true if device is a Tesla device using TCC driver, false otherwise
791    bool tccDriver() const;
792
793    //! number of asynchronous engines
794    int asyncEngineCount() const;
795
796    //! device shares a unified address space with the host
797    bool unifiedAddressing() const;
798
799    //! peak memory clock frequency in kilohertz
800    int memoryClockRate() const;
801
802    //! global memory bus width in bits
803    int memoryBusWidth() const;
804
805    //! size of L2 cache in bytes
806    int l2CacheSize() const;
807
808    //! maximum resident threads per multiprocessor
809    int maxThreadsPerMultiProcessor() const;
810
811    //! gets free and total device memory
812    void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
813    size_t freeMemory() const;
814    size_t totalMemory() const;
815
816    /** @brief Provides information on CUDA feature support.
817
818    @param feature_set Features to be checked. See cuda::FeatureSet.
819
820    This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
821     */
822    bool supports(FeatureSet feature_set) const;
823
824    /** @brief Checks the CUDA module and device compatibility.
825
826    This function returns true if the CUDA module can be run on the specified device. Otherwise, it
827    returns false .
828     */
829    bool isCompatible() const;
830
831private:
832    int device_id_;
833};
834
835CV_EXPORTS void printCudaDeviceInfo(int device);
836CV_EXPORTS void printShortCudaDeviceInfo(int device);
837
838//! @} cudacore_init
839
840}} // namespace cv { namespace cuda {
841
842
843#include "opencv2/core/cuda.inl.hpp"
844
845#endif /* __OPENCV_CORE_CUDA_HPP__ */
846