1#include <opencv2/opencv.hpp>
2
3#include <string>
4#include <iostream>
5#include <fstream>
6#include <vector>
7
8#include <time.h>
9
10using namespace cv;
11using namespace cv::ml;
12using namespace std;
13
14void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector );
15void convert_to_ml(const std::vector< cv::Mat > & train_samples, cv::Mat& trainData );
16void load_images( const string & prefix, const string & filename, vector< Mat > & img_lst );
17void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size );
18Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorValues, const Size & size );
19void compute_hog( const vector< Mat > & img_lst, vector< Mat > & gradient_lst, const Size & size );
20void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels );
21void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color );
22void test_it( const Size & size );
23
24void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector )
25{
26    // get the support vectors
27    Mat sv = svm->getSupportVectors();
28    const int sv_total = sv.rows;
29    // get the decision function
30    Mat alpha, svidx;
31    double rho = svm->getDecisionFunction(0, alpha, svidx);
32
33    CV_Assert( alpha.total() == 1 && svidx.total() == 1 && sv_total == 1 );
34    CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
35               (alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) );
36    CV_Assert( sv.type() == CV_32F );
37    hog_detector.clear();
38
39    hog_detector.resize(sv.cols + 1);
40    memcpy(&hog_detector[0], sv.ptr(), sv.cols*sizeof(hog_detector[0]));
41    hog_detector[sv.cols] = (float)-rho;
42}
43
44
45/*
46* Convert training/testing set to be used by OpenCV Machine Learning algorithms.
47* TrainData is a matrix of size (#samples x max(#cols,#rows) per samples), in 32FC1.
48* Transposition of samples are made if needed.
49*/
50void convert_to_ml(const std::vector< cv::Mat > & train_samples, cv::Mat& trainData )
51{
52    //--Convert data
53    const int rows = (int)train_samples.size();
54    const int cols = (int)std::max( train_samples[0].cols, train_samples[0].rows );
55    cv::Mat tmp(1, cols, CV_32FC1); //< used for transposition if needed
56    trainData = cv::Mat(rows, cols, CV_32FC1 );
57    vector< Mat >::const_iterator itr = train_samples.begin();
58    vector< Mat >::const_iterator end = train_samples.end();
59    for( int i = 0 ; itr != end ; ++itr, ++i )
60    {
61        CV_Assert( itr->cols == 1 ||
62            itr->rows == 1 );
63        if( itr->cols == 1 )
64        {
65            transpose( *(itr), tmp );
66            tmp.copyTo( trainData.row( i ) );
67        }
68        else if( itr->rows == 1 )
69        {
70            itr->copyTo( trainData.row( i ) );
71        }
72    }
73}
74
75void load_images( const string & prefix, const string & filename, vector< Mat > & img_lst )
76{
77    string line;
78    ifstream file;
79
80    file.open( (prefix+filename).c_str() );
81    if( !file.is_open() )
82    {
83        cerr << "Unable to open the list of images from " << filename << " filename." << endl;
84        exit( -1 );
85    }
86
87    bool end_of_parsing = false;
88    while( !end_of_parsing )
89    {
90        getline( file, line );
91        if( line == "" ) // no more file to read
92        {
93            end_of_parsing = true;
94            break;
95        }
96        Mat img = imread( (prefix+line).c_str() ); // load the image
97        if( img.empty() ) // invalid image, just skip it.
98            continue;
99#ifdef _DEBUG
100        imshow( "image", img );
101        waitKey( 10 );
102#endif
103        img_lst.push_back( img.clone() );
104    }
105}
106
107void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size )
108{
109    Rect box;
110    box.width = size.width;
111    box.height = size.height;
112
113    const int size_x = box.width;
114    const int size_y = box.height;
115
116    srand( (unsigned int)time( NULL ) );
117
118    vector< Mat >::const_iterator img = full_neg_lst.begin();
119    vector< Mat >::const_iterator end = full_neg_lst.end();
120    for( ; img != end ; ++img )
121    {
122        box.x = rand() % (img->cols - size_x);
123        box.y = rand() % (img->rows - size_y);
124        Mat roi = (*img)(box);
125        neg_lst.push_back( roi.clone() );
126#ifdef _DEBUG
127        imshow( "img", roi.clone() );
128        waitKey( 10 );
129#endif
130    }
131}
132
133// From http://www.juergenwiki.de/work/wiki/doku.php?id=public:hog_descriptor_computation_and_visualization
134Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorValues, const Size & size )
135{
136    const int DIMX = size.width;
137    const int DIMY = size.height;
138    float zoomFac = 3;
139    Mat visu;
140    resize(color_origImg, visu, Size( (int)(color_origImg.cols*zoomFac), (int)(color_origImg.rows*zoomFac) ) );
141
142    int cellSize        = 8;
143    int gradientBinSize = 9;
144    float radRangeForOneBin = (float)(CV_PI/(float)gradientBinSize); // dividing 180 into 9 bins, how large (in rad) is one bin?
145
146    // prepare data structure: 9 orientation / gradient strenghts for each cell
147    int cells_in_x_dir = DIMX / cellSize;
148    int cells_in_y_dir = DIMY / cellSize;
149    float*** gradientStrengths = new float**[cells_in_y_dir];
150    int** cellUpdateCounter   = new int*[cells_in_y_dir];
151    for (int y=0; y<cells_in_y_dir; y++)
152    {
153        gradientStrengths[y] = new float*[cells_in_x_dir];
154        cellUpdateCounter[y] = new int[cells_in_x_dir];
155        for (int x=0; x<cells_in_x_dir; x++)
156        {
157            gradientStrengths[y][x] = new float[gradientBinSize];
158            cellUpdateCounter[y][x] = 0;
159
160            for (int bin=0; bin<gradientBinSize; bin++)
161                gradientStrengths[y][x][bin] = 0.0;
162        }
163    }
164
165    // nr of blocks = nr of cells - 1
166    // since there is a new block on each cell (overlapping blocks!) but the last one
167    int blocks_in_x_dir = cells_in_x_dir - 1;
168    int blocks_in_y_dir = cells_in_y_dir - 1;
169
170    // compute gradient strengths per cell
171    int descriptorDataIdx = 0;
172    int cellx = 0;
173    int celly = 0;
174
175    for (int blockx=0; blockx<blocks_in_x_dir; blockx++)
176    {
177        for (int blocky=0; blocky<blocks_in_y_dir; blocky++)
178        {
179            // 4 cells per block ...
180            for (int cellNr=0; cellNr<4; cellNr++)
181            {
182                // compute corresponding cell nr
183                cellx = blockx;
184                celly = blocky;
185                if (cellNr==1) celly++;
186                if (cellNr==2) cellx++;
187                if (cellNr==3)
188                {
189                    cellx++;
190                    celly++;
191                }
192
193                for (int bin=0; bin<gradientBinSize; bin++)
194                {
195                    float gradientStrength = descriptorValues[ descriptorDataIdx ];
196                    descriptorDataIdx++;
197
198                    gradientStrengths[celly][cellx][bin] += gradientStrength;
199
200                } // for (all bins)
201
202
203                // note: overlapping blocks lead to multiple updates of this sum!
204                // we therefore keep track how often a cell was updated,
205                // to compute average gradient strengths
206                cellUpdateCounter[celly][cellx]++;
207
208            } // for (all cells)
209
210
211        } // for (all block x pos)
212    } // for (all block y pos)
213
214
215    // compute average gradient strengths
216    for (celly=0; celly<cells_in_y_dir; celly++)
217    {
218        for (cellx=0; cellx<cells_in_x_dir; cellx++)
219        {
220
221            float NrUpdatesForThisCell = (float)cellUpdateCounter[celly][cellx];
222
223            // compute average gradient strenghts for each gradient bin direction
224            for (int bin=0; bin<gradientBinSize; bin++)
225            {
226                gradientStrengths[celly][cellx][bin] /= NrUpdatesForThisCell;
227            }
228        }
229    }
230
231    // draw cells
232    for (celly=0; celly<cells_in_y_dir; celly++)
233    {
234        for (cellx=0; cellx<cells_in_x_dir; cellx++)
235        {
236            int drawX = cellx * cellSize;
237            int drawY = celly * cellSize;
238
239            int mx = drawX + cellSize/2;
240            int my = drawY + cellSize/2;
241
242            rectangle(visu, Point((int)(drawX*zoomFac), (int)(drawY*zoomFac)), Point((int)((drawX+cellSize)*zoomFac), (int)((drawY+cellSize)*zoomFac)), Scalar(100,100,100), 1);
243
244            // draw in each cell all 9 gradient strengths
245            for (int bin=0; bin<gradientBinSize; bin++)
246            {
247                float currentGradStrength = gradientStrengths[celly][cellx][bin];
248
249                // no line to draw?
250                if (currentGradStrength==0)
251                    continue;
252
253                float currRad = bin * radRangeForOneBin + radRangeForOneBin/2;
254
255                float dirVecX = cos( currRad );
256                float dirVecY = sin( currRad );
257                float maxVecLen = (float)(cellSize/2.f);
258                float scale = 2.5; // just a visualization scale, to see the lines better
259
260                // compute line coordinates
261                float x1 = mx - dirVecX * currentGradStrength * maxVecLen * scale;
262                float y1 = my - dirVecY * currentGradStrength * maxVecLen * scale;
263                float x2 = mx + dirVecX * currentGradStrength * maxVecLen * scale;
264                float y2 = my + dirVecY * currentGradStrength * maxVecLen * scale;
265
266                // draw gradient visualization
267                line(visu, Point((int)(x1*zoomFac),(int)(y1*zoomFac)), Point((int)(x2*zoomFac),(int)(y2*zoomFac)), Scalar(0,255,0), 1);
268
269            } // for (all bins)
270
271        } // for (cellx)
272    } // for (celly)
273
274
275    // don't forget to free memory allocated by helper data structures!
276    for (int y=0; y<cells_in_y_dir; y++)
277    {
278        for (int x=0; x<cells_in_x_dir; x++)
279        {
280            delete[] gradientStrengths[y][x];
281        }
282        delete[] gradientStrengths[y];
283        delete[] cellUpdateCounter[y];
284    }
285    delete[] gradientStrengths;
286    delete[] cellUpdateCounter;
287
288    return visu;
289
290} // get_hogdescriptor_visu
291
292void compute_hog( const vector< Mat > & img_lst, vector< Mat > & gradient_lst, const Size & size )
293{
294    HOGDescriptor hog;
295    hog.winSize = size;
296    Mat gray;
297    vector< Point > location;
298    vector< float > descriptors;
299
300    vector< Mat >::const_iterator img = img_lst.begin();
301    vector< Mat >::const_iterator end = img_lst.end();
302    for( ; img != end ; ++img )
303    {
304        cvtColor( *img, gray, COLOR_BGR2GRAY );
305        hog.compute( gray, descriptors, Size( 8, 8 ), Size( 0, 0 ), location );
306        gradient_lst.push_back( Mat( descriptors ).clone() );
307#ifdef _DEBUG
308        imshow( "gradient", get_hogdescriptor_visu( img->clone(), descriptors, size ) );
309        waitKey( 10 );
310#endif
311    }
312}
313
314void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels )
315{
316
317    Mat train_data;
318    convert_to_ml( gradient_lst, train_data );
319
320    clog << "Start training...";
321    Ptr<SVM> svm = SVM::create();
322    /* Default values to train SVM */
323    svm->setCoef0(0.0);
324    svm->setDegree(3);
325    svm->setTermCriteria(TermCriteria( CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 1e-3 ));
326    svm->setGamma(0);
327    svm->setKernel(SVM::LINEAR);
328    svm->setNu(0.5);
329    svm->setP(0.1); // for EPSILON_SVR, epsilon in loss function?
330    svm->setC(0.01); // From paper, soft classifier
331    svm->setType(SVM::EPS_SVR); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
332    svm->train(train_data, ROW_SAMPLE, Mat(labels));
333    clog << "...[done]" << endl;
334
335    svm->save( "my_people_detector.yml" );
336}
337
338void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color )
339{
340    if( !locations.empty() )
341    {
342        vector< Rect >::const_iterator loc = locations.begin();
343        vector< Rect >::const_iterator end = locations.end();
344        for( ; loc != end ; ++loc )
345        {
346            rectangle( img, *loc, color, 2 );
347        }
348    }
349}
350
351void test_it( const Size & size )
352{
353    char key = 27;
354    Scalar reference( 0, 255, 0 );
355    Scalar trained( 0, 0, 255 );
356    Mat img, draw;
357    Ptr<SVM> svm;
358    HOGDescriptor hog;
359    HOGDescriptor my_hog;
360    my_hog.winSize = size;
361    VideoCapture video;
362    vector< Rect > locations;
363
364    // Load the trained SVM.
365    svm = StatModel::load<SVM>( "my_people_detector.yml" );
366    // Set the trained svm to my_hog
367    vector< float > hog_detector;
368    get_svm_detector( svm, hog_detector );
369    my_hog.setSVMDetector( hog_detector );
370    // Set the people detector.
371    hog.setSVMDetector( hog.getDefaultPeopleDetector() );
372    // Open the camera.
373    video.open(0);
374    if( !video.isOpened() )
375    {
376        cerr << "Unable to open the device 0" << endl;
377        exit( -1 );
378    }
379
380    bool end_of_process = false;
381    while( !end_of_process )
382    {
383        video >> img;
384        if( img.empty() )
385            break;
386
387        draw = img.clone();
388
389        locations.clear();
390        hog.detectMultiScale( img, locations );
391        draw_locations( draw, locations, reference );
392
393        locations.clear();
394        my_hog.detectMultiScale( img, locations );
395        draw_locations( draw, locations, trained );
396
397        imshow( "Video", draw );
398        key = (char)waitKey( 10 );
399        if( 27 == key )
400            end_of_process = true;
401    }
402}
403
404int main( int argc, char** argv )
405{
406    if( argc != 5 )
407    {
408        cout << "Wrong number of parameters." << endl
409            << "Usage: " << argv[0] << " pos_dir pos.lst neg_dir neg.lst" << endl
410            << "example: " << argv[0] << " /INRIA_dataset/ Train/pos.lst /INRIA_dataset/ Train/neg.lst" << endl;
411        exit( -1 );
412    }
413    vector< Mat > pos_lst;
414    vector< Mat > full_neg_lst;
415    vector< Mat > neg_lst;
416    vector< Mat > gradient_lst;
417    vector< int > labels;
418
419    load_images( argv[1], argv[2], pos_lst );
420    labels.assign( pos_lst.size(), +1 );
421    const unsigned int old = (unsigned int)labels.size();
422    load_images( argv[3], argv[4], full_neg_lst );
423    sample_neg( full_neg_lst, neg_lst, Size( 96,160 ) );
424    labels.insert( labels.end(), neg_lst.size(), -1 );
425    CV_Assert( old < labels.size() );
426
427    compute_hog( pos_lst, gradient_lst, Size( 96, 160 ) );
428    compute_hog( neg_lst, gradient_lst, Size( 96, 160 ) );
429
430    train_svm( gradient_lst, labels );
431
432    test_it( Size( 96, 160 ) ); // change with your parameters
433
434    return 0;
435}
436