1#include <iomanip>
2#include <stdexcept>
3#include <string>
4#include "performance.h"
5#include "opencv2/core/cuda.hpp"
6
7using namespace std;
8using namespace cv;
9using namespace cv::cuda;
10
11void TestSystem::run()
12{
13    if (is_list_mode_)
14    {
15        for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
16            cout << (*it)->name() << endl;
17
18        return;
19    }
20
21    // Run test initializers
22    for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it)
23    {
24        if ((*it)->name().find(test_filter_, 0) != string::npos)
25            (*it)->run();
26    }
27
28    printHeading();
29
30    // Run tests
31    for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
32    {
33        try
34        {
35            if ((*it)->name().find(test_filter_, 0) != string::npos)
36            {
37                cout << endl << (*it)->name() << ":\n";
38                (*it)->run();
39                finishCurrentSubtest();
40            }
41        }
42        catch (const Exception&)
43        {
44            // Message is printed via callback
45            resetCurrentSubtest();
46        }
47        catch (const runtime_error& e)
48        {
49            printError(e.what());
50            resetCurrentSubtest();
51        }
52    }
53
54    printSummary();
55}
56
57
58void TestSystem::finishCurrentSubtest()
59{
60    if (cur_subtest_is_empty_)
61        // There is no need to print subtest statistics
62        return;
63
64    double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
65    double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
66
67    double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
68    speedup_total_ += speedup;
69
70    printMetrics(cpu_time, gpu_time, speedup);
71
72    num_subtests_called_++;
73    resetCurrentSubtest();
74}
75
76
77double TestSystem::meanTime(const vector<int64> &samples)
78{
79    double sum = accumulate(samples.begin(), samples.end(), 0.);
80    if (samples.size() > 1)
81        return (sum - samples[0]) / (samples.size() - 1);
82    return sum;
83}
84
85
86void TestSystem::printHeading()
87{
88    cout << endl;
89    cout << setiosflags(ios_base::left);
90    cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
91        << setw(14) << "SPEEDUP"
92        << "DESCRIPTION\n";
93    cout << resetiosflags(ios_base::left);
94}
95
96
97void TestSystem::printSummary()
98{
99    cout << setiosflags(ios_base::fixed);
100    cout << "\naverage GPU speedup: x"
101        << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
102        << endl;
103    cout << resetiosflags(ios_base::fixed);
104}
105
106
107void TestSystem::printMetrics(double cpu_time, double gpu_time, double speedup)
108{
109    cout << TAB << setiosflags(ios_base::left);
110    stringstream stream;
111
112    stream << cpu_time;
113    cout << setw(10) << stream.str();
114
115    stream.str("");
116    stream << gpu_time;
117    cout << setw(10) << stream.str();
118
119    stream.str("");
120    stream << "x" << setprecision(3) << speedup;
121    cout << setw(14) << stream.str();
122
123    cout << cur_subtest_description_.str();
124    cout << resetiosflags(ios_base::left) << endl;
125}
126
127
128void TestSystem::printError(const std::string& msg)
129{
130    cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
131}
132
133
134void gen(Mat& mat, int rows, int cols, int type, Scalar low, Scalar high)
135{
136    mat.create(rows, cols, type);
137    RNG rng(0);
138    rng.fill(mat, RNG::UNIFORM, low, high);
139}
140
141
142string abspath(const string& relpath)
143{
144    return TestSystem::instance().workingDir() + relpath;
145}
146
147
148static int cvErrorCallback(int /*status*/, const char* /*func_name*/,
149                             const char* err_msg, const char* /*file_name*/,
150                             int /*line*/, void* /*userdata*/)
151{
152    TestSystem::instance().printError(err_msg);
153    return 0;
154}
155
156
157int main(int argc, const char* argv[])
158{
159    int num_devices = getCudaEnabledDeviceCount();
160    if (num_devices == 0)
161    {
162        cerr << "No GPU found or the library was compiled without CUDA support";
163        return -1;
164    }
165
166    redirectError(cvErrorCallback);
167
168    const char* keys =
169       "{ h  help    |       | print help message }"
170       "{ f  filter  |       | filter for test }"
171       "{ w  workdir |       | set working directory }"
172       "{ l  list    |       | show all tests }"
173       "{ d  device  | 0     | device id }"
174       "{ i  iters   | 10    | iteration count }";
175
176    CommandLineParser cmd(argc, argv, keys);
177
178    if (cmd.has("help") || !cmd.check())
179    {
180        cmd.printMessage();
181        cmd.printErrors();
182        return 0;
183    }
184
185
186    int device = cmd.get<int>("device");
187    if (device < 0 || device >= num_devices)
188    {
189        cerr << "Invalid device ID" << endl;
190        return -1;
191    }
192    DeviceInfo dev_info(device);
193    if (!dev_info.isCompatible())
194    {
195        cerr << "CUDA module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
196        return -1;
197    }
198    setDevice(device);
199    printShortCudaDeviceInfo(device);
200
201    string filter = cmd.get<string>("filter");
202    string workdir = cmd.get<string>("workdir");
203    bool list = cmd.has("list");
204    int iters = cmd.get<int>("iters");
205
206    if (!filter.empty())
207        TestSystem::instance().setTestFilter(filter);
208
209    if (!workdir.empty())
210    {
211        if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\')
212            workdir += '/';
213
214        TestSystem::instance().setWorkingDir(workdir);
215    }
216
217    if (list)
218        TestSystem::instance().setListMode(true);
219
220    TestSystem::instance().setNumIters(iters);
221
222    cout << "\nNote: the timings for GPU don't include data transfer" << endl;
223
224    TestSystem::instance().run();
225
226    return 0;
227}
228