1#if defined _MSC_VER && _MSC_VER >= 1400
2#pragma warning( disable : 4201 4408 4127 4100)
3#endif
4
5#include "cvconfig.h"
6#include <iostream>
7#include <iomanip>
8#include <cstdio>
9#include "opencv2/core/cuda.hpp"
10#include "opencv2/cudalegacy.hpp"
11#include "opencv2/highgui.hpp"
12#include "opencv2/imgproc.hpp"
13#include "opencv2/objdetect.hpp"
14#include "opencv2/objdetect/objdetect_c.h"
15
16using namespace std;
17using namespace cv;
18
19
20#if !defined(HAVE_CUDA) || defined(__arm__)
21
22int main( int, const char** )
23{
24#if !defined(HAVE_CUDA)
25    std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true)." << std::endl;
26#endif
27
28#if defined(__arm__)
29    std::cout << "Unsupported for ARM CUDA library." << std::endl;
30#endif
31
32    return 0;
33}
34
35#else
36
37
38const Size2i preferredVideoFrameSize(640, 480);
39const cv::String wndTitle = "NVIDIA Computer Vision :: Haar Classifiers Cascade";
40
41
42static void matPrint(Mat &img, int lineOffsY, Scalar fontColor, const string &ss)
43{
44    int fontFace = FONT_HERSHEY_DUPLEX;
45    double fontScale = 0.8;
46    int fontThickness = 2;
47    Size fontSize = cv::getTextSize("T[]", fontFace, fontScale, fontThickness, 0);
48
49    Point org;
50    org.x = 1;
51    org.y = 3 * fontSize.height * (lineOffsY + 1) / 2;
52    putText(img, ss, org, fontFace, fontScale, Scalar(0,0,0), 5*fontThickness/2, 16);
53    putText(img, ss, org, fontFace, fontScale, fontColor, fontThickness, 16);
54}
55
56
57static void displayState(Mat &canvas, bool bHelp, bool bGpu, bool bLargestFace, bool bFilter, double fps)
58{
59    Scalar fontColorRed(0,0,255);
60    Scalar fontColorNV(0,185,118);
61
62    ostringstream ss;
63    ss << "FPS = " << setprecision(1) << fixed << fps;
64    matPrint(canvas, 0, fontColorRed, ss.str());
65    ss.str("");
66    ss << "[" << canvas.cols << "x" << canvas.rows << "], " <<
67        (bGpu ? "GPU, " : "CPU, ") <<
68        (bLargestFace ? "OneFace, " : "MultiFace, ") <<
69        (bFilter ? "Filter:ON" : "Filter:OFF");
70    matPrint(canvas, 1, fontColorRed, ss.str());
71
72    if (bHelp)
73    {
74        matPrint(canvas, 2, fontColorNV, "Space - switch GPU / CPU");
75        matPrint(canvas, 3, fontColorNV, "M - switch OneFace / MultiFace");
76        matPrint(canvas, 4, fontColorNV, "F - toggle rectangles Filter");
77        matPrint(canvas, 5, fontColorNV, "H - toggle hotkeys help");
78    }
79    else
80    {
81        matPrint(canvas, 2, fontColorNV, "H - toggle hotkeys help");
82    }
83}
84
85
86static NCVStatus process(Mat *srcdst,
87                  Ncv32u width, Ncv32u height,
88                  NcvBool bFilterRects, NcvBool bLargestFace,
89                  HaarClassifierCascadeDescriptor &haar,
90                  NCVVector<HaarStage64> &d_haarStages, NCVVector<HaarClassifierNode128> &d_haarNodes,
91                  NCVVector<HaarFeature64> &d_haarFeatures, NCVVector<HaarStage64> &h_haarStages,
92                  INCVMemAllocator &gpuAllocator,
93                  INCVMemAllocator &cpuAllocator,
94                  cudaDeviceProp &devProp)
95{
96    ncvAssertReturn(!((srcdst == NULL) ^ gpuAllocator.isCounting()), NCV_NULL_PTR);
97
98    NCVStatus ncvStat;
99
100    NCV_SET_SKIP_COND(gpuAllocator.isCounting());
101
102    NCVMatrixAlloc<Ncv8u> d_src(gpuAllocator, width, height);
103    ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
104    NCVMatrixAlloc<Ncv8u> h_src(cpuAllocator, width, height);
105    ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
106    NCVVectorAlloc<NcvRect32u> d_rects(gpuAllocator, 100);
107    ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
108
109    NCV_SKIP_COND_BEGIN
110
111    for (Ncv32u i=0; i<(Ncv32u)srcdst->rows; i++)
112    {
113        memcpy(h_src.ptr() + i * h_src.stride(), srcdst->ptr(i), srcdst->cols);
114    }
115
116    ncvStat = h_src.copySolid(d_src, 0);
117    ncvAssertReturnNcvStat(ncvStat);
118    ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
119
120    NCV_SKIP_COND_END
121
122    NcvSize32u roi;
123    roi.width = d_src.width();
124    roi.height = d_src.height();
125
126    Ncv32u numDetections;
127    ncvStat = ncvDetectObjectsMultiScale_device(
128        d_src, roi, d_rects, numDetections, haar, h_haarStages,
129        d_haarStages, d_haarNodes, d_haarFeatures,
130        haar.ClassifierSize,
131        (bFilterRects || bLargestFace) ? 4 : 0,
132        1.2f, 1,
133        (bLargestFace ? NCVPipeObjDet_FindLargestObject : 0)
134        | NCVPipeObjDet_VisualizeInPlace,
135        gpuAllocator, cpuAllocator, devProp, 0);
136    ncvAssertReturnNcvStat(ncvStat);
137    ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
138
139    NCV_SKIP_COND_BEGIN
140
141    ncvStat = d_src.copySolid(h_src, 0);
142    ncvAssertReturnNcvStat(ncvStat);
143    ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
144
145    for (Ncv32u i=0; i<(Ncv32u)srcdst->rows; i++)
146    {
147        memcpy(srcdst->ptr(i), h_src.ptr() + i * h_src.stride(), srcdst->cols);
148    }
149
150    NCV_SKIP_COND_END
151
152    return NCV_SUCCESS;
153}
154
155
156int main(int argc, const char** argv)
157{
158    cout << "OpenCV / NVIDIA Computer Vision" << endl;
159    cout << "Face Detection in video and live feed" << endl;
160    cout << "Syntax: exename <cascade_file> <image_or_video_or_cameraid>" << endl;
161    cout << "=========================================" << endl;
162
163    ncvAssertPrintReturn(cv::cuda::getCudaEnabledDeviceCount() != 0, "No GPU found or the library is compiled without CUDA support", -1);
164    ncvAssertPrintReturn(argc == 3, "Invalid number of arguments", -1);
165
166    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
167
168    string cascadeName = argv[1];
169    string inputName = argv[2];
170
171    NCVStatus ncvStat;
172    NcvBool bQuit = false;
173    VideoCapture capture;
174    Size2i frameSize;
175
176    //open content source
177    Mat image = imread(inputName);
178    Mat frame;
179    if (!image.empty())
180    {
181        frameSize.width = image.cols;
182        frameSize.height = image.rows;
183    }
184    else
185    {
186        if (!capture.open(inputName))
187        {
188            int camid = -1;
189
190            istringstream ss(inputName);
191            int x = 0;
192            ss >> x;
193
194            ncvAssertPrintReturn(capture.open(camid) != 0, "Can't open source", -1);
195        }
196
197        capture >> frame;
198        ncvAssertPrintReturn(!frame.empty(), "Empty video source", -1);
199
200        frameSize.width = frame.cols;
201        frameSize.height = frame.rows;
202    }
203
204    NcvBool bUseGPU = true;
205    NcvBool bLargestObject = false;
206    NcvBool bFilterRects = true;
207    NcvBool bHelpScreen = false;
208
209    CascadeClassifier classifierOpenCV;
210    ncvAssertPrintReturn(classifierOpenCV.load(cascadeName) != 0, "Error (in OpenCV) opening classifier", -1);
211
212    int devId;
213    ncvAssertCUDAReturn(cudaGetDevice(&devId), -1);
214    cudaDeviceProp devProp;
215    ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), -1);
216    cout << "Using GPU: " << devId << "(" << devProp.name <<
217            "), arch=" << devProp.major << "." << devProp.minor << endl;
218
219    //==============================================================================
220    //
221    // Load the classifier from file (assuming its size is about 1 mb)
222    // using a simple allocator
223    //
224    //==============================================================================
225
226    NCVMemNativeAllocator gpuCascadeAllocator(NCVMemoryTypeDevice, static_cast<Ncv32u>(devProp.textureAlignment));
227    ncvAssertPrintReturn(gpuCascadeAllocator.isInitialized(), "Error creating cascade GPU allocator", -1);
228    NCVMemNativeAllocator cpuCascadeAllocator(NCVMemoryTypeHostPinned, static_cast<Ncv32u>(devProp.textureAlignment));
229    ncvAssertPrintReturn(cpuCascadeAllocator.isInitialized(), "Error creating cascade CPU allocator", -1);
230
231    Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;
232    ncvStat = ncvHaarGetClassifierSize(cascadeName, haarNumStages, haarNumNodes, haarNumFeatures);
233    ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", -1);
234
235    NCVVectorAlloc<HaarStage64> h_haarStages(cpuCascadeAllocator, haarNumStages);
236    ncvAssertPrintReturn(h_haarStages.isMemAllocated(), "Error in cascade CPU allocator", -1);
237    NCVVectorAlloc<HaarClassifierNode128> h_haarNodes(cpuCascadeAllocator, haarNumNodes);
238    ncvAssertPrintReturn(h_haarNodes.isMemAllocated(), "Error in cascade CPU allocator", -1);
239    NCVVectorAlloc<HaarFeature64> h_haarFeatures(cpuCascadeAllocator, haarNumFeatures);
240
241    ncvAssertPrintReturn(h_haarFeatures.isMemAllocated(), "Error in cascade CPU allocator", -1);
242
243    HaarClassifierCascadeDescriptor haar;
244    ncvStat = ncvHaarLoadFromFile_host(cascadeName, haar, h_haarStages, h_haarNodes, h_haarFeatures);
245    ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", -1);
246
247    NCVVectorAlloc<HaarStage64> d_haarStages(gpuCascadeAllocator, haarNumStages);
248    ncvAssertPrintReturn(d_haarStages.isMemAllocated(), "Error in cascade GPU allocator", -1);
249    NCVVectorAlloc<HaarClassifierNode128> d_haarNodes(gpuCascadeAllocator, haarNumNodes);
250    ncvAssertPrintReturn(d_haarNodes.isMemAllocated(), "Error in cascade GPU allocator", -1);
251    NCVVectorAlloc<HaarFeature64> d_haarFeatures(gpuCascadeAllocator, haarNumFeatures);
252    ncvAssertPrintReturn(d_haarFeatures.isMemAllocated(), "Error in cascade GPU allocator", -1);
253
254    ncvStat = h_haarStages.copySolid(d_haarStages, 0);
255    ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
256    ncvStat = h_haarNodes.copySolid(d_haarNodes, 0);
257    ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
258    ncvStat = h_haarFeatures.copySolid(d_haarFeatures, 0);
259    ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
260
261    //==============================================================================
262    //
263    // Calculate memory requirements and create real allocators
264    //
265    //==============================================================================
266
267    NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
268    ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", -1);
269    NCVMemStackAllocator cpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
270    ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", -1);
271
272    ncvStat = process(NULL, frameSize.width, frameSize.height,
273                      false, false, haar,
274                      d_haarStages, d_haarNodes,
275                      d_haarFeatures, h_haarStages,
276                      gpuCounter, cpuCounter, devProp);
277    ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);
278
279    NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), static_cast<Ncv32u>(devProp.textureAlignment));
280    ncvAssertPrintReturn(gpuAllocator.isInitialized(), "Error creating GPU memory allocator", -1);
281    NCVMemStackAllocator cpuAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), static_cast<Ncv32u>(devProp.textureAlignment));
282    ncvAssertPrintReturn(cpuAllocator.isInitialized(), "Error creating CPU memory allocator", -1);
283
284    printf("Initialized for frame size [%dx%d]\n", frameSize.width, frameSize.height);
285
286    //==============================================================================
287    //
288    // Main processing loop
289    //
290    //==============================================================================
291
292    namedWindow(wndTitle, 1);
293    Mat frameDisp;
294
295    do
296    {
297        Mat gray;
298        cvtColor((image.empty() ? frame : image), gray, cv::COLOR_BGR2GRAY);
299
300        //
301        // process
302        //
303
304        NcvSize32u minSize = haar.ClassifierSize;
305        if (bLargestObject)
306        {
307            Ncv32u ratioX = preferredVideoFrameSize.width / minSize.width;
308            Ncv32u ratioY = preferredVideoFrameSize.height / minSize.height;
309            Ncv32u ratioSmallest = min(ratioX, ratioY);
310            ratioSmallest = max((Ncv32u)(ratioSmallest / 2.5f), (Ncv32u)1);
311            minSize.width *= ratioSmallest;
312            minSize.height *= ratioSmallest;
313        }
314
315        Ncv32f avgTime;
316        NcvTimer timer = ncvStartTimer();
317
318        if (bUseGPU)
319        {
320            ncvStat = process(&gray, frameSize.width, frameSize.height,
321                              bFilterRects, bLargestObject, haar,
322                              d_haarStages, d_haarNodes,
323                              d_haarFeatures, h_haarStages,
324                              gpuAllocator, cpuAllocator, devProp);
325            ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);
326        }
327        else
328        {
329            vector<Rect> rectsOpenCV;
330
331            classifierOpenCV.detectMultiScale(
332                gray,
333                rectsOpenCV,
334                1.2f,
335                bFilterRects ? 4 : 0,
336                (bLargestObject ? CV_HAAR_FIND_BIGGEST_OBJECT : 0)
337                | CV_HAAR_SCALE_IMAGE,
338                Size(minSize.width, minSize.height));
339
340            for (size_t rt = 0; rt < rectsOpenCV.size(); ++rt)
341                rectangle(gray, rectsOpenCV[rt], Scalar(255));
342        }
343
344        avgTime = (Ncv32f)ncvEndQueryTimerMs(timer);
345
346        cvtColor(gray, frameDisp, cv::COLOR_GRAY2BGR);
347        displayState(frameDisp, bHelpScreen, bUseGPU, bLargestObject, bFilterRects, 1000.0f / avgTime);
348        imshow(wndTitle, frameDisp);
349
350        //handle input
351        switch (cv::waitKey(3))
352        {
353        case ' ':
354            bUseGPU = !bUseGPU;
355            break;
356        case 'm':
357        case 'M':
358            bLargestObject = !bLargestObject;
359            break;
360        case 'f':
361        case 'F':
362            bFilterRects = !bFilterRects;
363            break;
364        case 'h':
365        case 'H':
366            bHelpScreen = !bHelpScreen;
367            break;
368        case 27:
369            bQuit = true;
370            break;
371        }
372
373        // For camera and video file, capture the next image
374        if (capture.isOpened())
375        {
376            capture >> frame;
377            if (frame.empty())
378            {
379                break;
380            }
381        }
382    } while (!bQuit);
383
384    cv::destroyWindow(wndTitle);
385
386    return 0;
387}
388
389#endif //!defined(HAVE_CUDA)
390