1#include <opencv2/opencv.hpp> 2 3#include <string> 4#include <iostream> 5#include <fstream> 6#include <vector> 7 8#include <time.h> 9 10using namespace cv; 11using namespace cv::ml; 12using namespace std; 13 14void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector ); 15void convert_to_ml(const std::vector< cv::Mat > & train_samples, cv::Mat& trainData ); 16void load_images( const string & prefix, const string & filename, vector< Mat > & img_lst ); 17void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size ); 18Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorValues, const Size & size ); 19void compute_hog( const vector< Mat > & img_lst, vector< Mat > & gradient_lst, const Size & size ); 20void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels ); 21void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color ); 22void test_it( const Size & size ); 23 24void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector ) 25{ 26 // get the support vectors 27 Mat sv = svm->getSupportVectors(); 28 const int sv_total = sv.rows; 29 // get the decision function 30 Mat alpha, svidx; 31 double rho = svm->getDecisionFunction(0, alpha, svidx); 32 33 CV_Assert( alpha.total() == 1 && svidx.total() == 1 && sv_total == 1 ); 34 CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) || 35 (alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) ); 36 CV_Assert( sv.type() == CV_32F ); 37 hog_detector.clear(); 38 39 hog_detector.resize(sv.cols + 1); 40 memcpy(&hog_detector[0], sv.ptr(), sv.cols*sizeof(hog_detector[0])); 41 hog_detector[sv.cols] = (float)-rho; 42} 43 44 45/* 46* Convert training/testing set to be used by OpenCV Machine Learning algorithms. 47* TrainData is a matrix of size (#samples x max(#cols,#rows) per samples), in 32FC1. 48* Transposition of samples are made if needed. 49*/ 50void convert_to_ml(const std::vector< cv::Mat > & train_samples, cv::Mat& trainData ) 51{ 52 //--Convert data 53 const int rows = (int)train_samples.size(); 54 const int cols = (int)std::max( train_samples[0].cols, train_samples[0].rows ); 55 cv::Mat tmp(1, cols, CV_32FC1); //< used for transposition if needed 56 trainData = cv::Mat(rows, cols, CV_32FC1 ); 57 vector< Mat >::const_iterator itr = train_samples.begin(); 58 vector< Mat >::const_iterator end = train_samples.end(); 59 for( int i = 0 ; itr != end ; ++itr, ++i ) 60 { 61 CV_Assert( itr->cols == 1 || 62 itr->rows == 1 ); 63 if( itr->cols == 1 ) 64 { 65 transpose( *(itr), tmp ); 66 tmp.copyTo( trainData.row( i ) ); 67 } 68 else if( itr->rows == 1 ) 69 { 70 itr->copyTo( trainData.row( i ) ); 71 } 72 } 73} 74 75void load_images( const string & prefix, const string & filename, vector< Mat > & img_lst ) 76{ 77 string line; 78 ifstream file; 79 80 file.open( (prefix+filename).c_str() ); 81 if( !file.is_open() ) 82 { 83 cerr << "Unable to open the list of images from " << filename << " filename." << endl; 84 exit( -1 ); 85 } 86 87 bool end_of_parsing = false; 88 while( !end_of_parsing ) 89 { 90 getline( file, line ); 91 if( line == "" ) // no more file to read 92 { 93 end_of_parsing = true; 94 break; 95 } 96 Mat img = imread( (prefix+line).c_str() ); // load the image 97 if( img.empty() ) // invalid image, just skip it. 98 continue; 99#ifdef _DEBUG 100 imshow( "image", img ); 101 waitKey( 10 ); 102#endif 103 img_lst.push_back( img.clone() ); 104 } 105} 106 107void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size ) 108{ 109 Rect box; 110 box.width = size.width; 111 box.height = size.height; 112 113 const int size_x = box.width; 114 const int size_y = box.height; 115 116 srand( (unsigned int)time( NULL ) ); 117 118 vector< Mat >::const_iterator img = full_neg_lst.begin(); 119 vector< Mat >::const_iterator end = full_neg_lst.end(); 120 for( ; img != end ; ++img ) 121 { 122 box.x = rand() % (img->cols - size_x); 123 box.y = rand() % (img->rows - size_y); 124 Mat roi = (*img)(box); 125 neg_lst.push_back( roi.clone() ); 126#ifdef _DEBUG 127 imshow( "img", roi.clone() ); 128 waitKey( 10 ); 129#endif 130 } 131} 132 133// From http://www.juergenwiki.de/work/wiki/doku.php?id=public:hog_descriptor_computation_and_visualization 134Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorValues, const Size & size ) 135{ 136 const int DIMX = size.width; 137 const int DIMY = size.height; 138 float zoomFac = 3; 139 Mat visu; 140 resize(color_origImg, visu, Size( (int)(color_origImg.cols*zoomFac), (int)(color_origImg.rows*zoomFac) ) ); 141 142 int cellSize = 8; 143 int gradientBinSize = 9; 144 float radRangeForOneBin = (float)(CV_PI/(float)gradientBinSize); // dividing 180 into 9 bins, how large (in rad) is one bin? 145 146 // prepare data structure: 9 orientation / gradient strenghts for each cell 147 int cells_in_x_dir = DIMX / cellSize; 148 int cells_in_y_dir = DIMY / cellSize; 149 float*** gradientStrengths = new float**[cells_in_y_dir]; 150 int** cellUpdateCounter = new int*[cells_in_y_dir]; 151 for (int y=0; y<cells_in_y_dir; y++) 152 { 153 gradientStrengths[y] = new float*[cells_in_x_dir]; 154 cellUpdateCounter[y] = new int[cells_in_x_dir]; 155 for (int x=0; x<cells_in_x_dir; x++) 156 { 157 gradientStrengths[y][x] = new float[gradientBinSize]; 158 cellUpdateCounter[y][x] = 0; 159 160 for (int bin=0; bin<gradientBinSize; bin++) 161 gradientStrengths[y][x][bin] = 0.0; 162 } 163 } 164 165 // nr of blocks = nr of cells - 1 166 // since there is a new block on each cell (overlapping blocks!) but the last one 167 int blocks_in_x_dir = cells_in_x_dir - 1; 168 int blocks_in_y_dir = cells_in_y_dir - 1; 169 170 // compute gradient strengths per cell 171 int descriptorDataIdx = 0; 172 int cellx = 0; 173 int celly = 0; 174 175 for (int blockx=0; blockx<blocks_in_x_dir; blockx++) 176 { 177 for (int blocky=0; blocky<blocks_in_y_dir; blocky++) 178 { 179 // 4 cells per block ... 180 for (int cellNr=0; cellNr<4; cellNr++) 181 { 182 // compute corresponding cell nr 183 cellx = blockx; 184 celly = blocky; 185 if (cellNr==1) celly++; 186 if (cellNr==2) cellx++; 187 if (cellNr==3) 188 { 189 cellx++; 190 celly++; 191 } 192 193 for (int bin=0; bin<gradientBinSize; bin++) 194 { 195 float gradientStrength = descriptorValues[ descriptorDataIdx ]; 196 descriptorDataIdx++; 197 198 gradientStrengths[celly][cellx][bin] += gradientStrength; 199 200 } // for (all bins) 201 202 203 // note: overlapping blocks lead to multiple updates of this sum! 204 // we therefore keep track how often a cell was updated, 205 // to compute average gradient strengths 206 cellUpdateCounter[celly][cellx]++; 207 208 } // for (all cells) 209 210 211 } // for (all block x pos) 212 } // for (all block y pos) 213 214 215 // compute average gradient strengths 216 for (celly=0; celly<cells_in_y_dir; celly++) 217 { 218 for (cellx=0; cellx<cells_in_x_dir; cellx++) 219 { 220 221 float NrUpdatesForThisCell = (float)cellUpdateCounter[celly][cellx]; 222 223 // compute average gradient strenghts for each gradient bin direction 224 for (int bin=0; bin<gradientBinSize; bin++) 225 { 226 gradientStrengths[celly][cellx][bin] /= NrUpdatesForThisCell; 227 } 228 } 229 } 230 231 // draw cells 232 for (celly=0; celly<cells_in_y_dir; celly++) 233 { 234 for (cellx=0; cellx<cells_in_x_dir; cellx++) 235 { 236 int drawX = cellx * cellSize; 237 int drawY = celly * cellSize; 238 239 int mx = drawX + cellSize/2; 240 int my = drawY + cellSize/2; 241 242 rectangle(visu, Point((int)(drawX*zoomFac), (int)(drawY*zoomFac)), Point((int)((drawX+cellSize)*zoomFac), (int)((drawY+cellSize)*zoomFac)), Scalar(100,100,100), 1); 243 244 // draw in each cell all 9 gradient strengths 245 for (int bin=0; bin<gradientBinSize; bin++) 246 { 247 float currentGradStrength = gradientStrengths[celly][cellx][bin]; 248 249 // no line to draw? 250 if (currentGradStrength==0) 251 continue; 252 253 float currRad = bin * radRangeForOneBin + radRangeForOneBin/2; 254 255 float dirVecX = cos( currRad ); 256 float dirVecY = sin( currRad ); 257 float maxVecLen = (float)(cellSize/2.f); 258 float scale = 2.5; // just a visualization scale, to see the lines better 259 260 // compute line coordinates 261 float x1 = mx - dirVecX * currentGradStrength * maxVecLen * scale; 262 float y1 = my - dirVecY * currentGradStrength * maxVecLen * scale; 263 float x2 = mx + dirVecX * currentGradStrength * maxVecLen * scale; 264 float y2 = my + dirVecY * currentGradStrength * maxVecLen * scale; 265 266 // draw gradient visualization 267 line(visu, Point((int)(x1*zoomFac),(int)(y1*zoomFac)), Point((int)(x2*zoomFac),(int)(y2*zoomFac)), Scalar(0,255,0), 1); 268 269 } // for (all bins) 270 271 } // for (cellx) 272 } // for (celly) 273 274 275 // don't forget to free memory allocated by helper data structures! 276 for (int y=0; y<cells_in_y_dir; y++) 277 { 278 for (int x=0; x<cells_in_x_dir; x++) 279 { 280 delete[] gradientStrengths[y][x]; 281 } 282 delete[] gradientStrengths[y]; 283 delete[] cellUpdateCounter[y]; 284 } 285 delete[] gradientStrengths; 286 delete[] cellUpdateCounter; 287 288 return visu; 289 290} // get_hogdescriptor_visu 291 292void compute_hog( const vector< Mat > & img_lst, vector< Mat > & gradient_lst, const Size & size ) 293{ 294 HOGDescriptor hog; 295 hog.winSize = size; 296 Mat gray; 297 vector< Point > location; 298 vector< float > descriptors; 299 300 vector< Mat >::const_iterator img = img_lst.begin(); 301 vector< Mat >::const_iterator end = img_lst.end(); 302 for( ; img != end ; ++img ) 303 { 304 cvtColor( *img, gray, COLOR_BGR2GRAY ); 305 hog.compute( gray, descriptors, Size( 8, 8 ), Size( 0, 0 ), location ); 306 gradient_lst.push_back( Mat( descriptors ).clone() ); 307#ifdef _DEBUG 308 imshow( "gradient", get_hogdescriptor_visu( img->clone(), descriptors, size ) ); 309 waitKey( 10 ); 310#endif 311 } 312} 313 314void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels ) 315{ 316 317 Mat train_data; 318 convert_to_ml( gradient_lst, train_data ); 319 320 clog << "Start training..."; 321 Ptr<SVM> svm = SVM::create(); 322 /* Default values to train SVM */ 323 svm->setCoef0(0.0); 324 svm->setDegree(3); 325 svm->setTermCriteria(TermCriteria( CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 1e-3 )); 326 svm->setGamma(0); 327 svm->setKernel(SVM::LINEAR); 328 svm->setNu(0.5); 329 svm->setP(0.1); // for EPSILON_SVR, epsilon in loss function? 330 svm->setC(0.01); // From paper, soft classifier 331 svm->setType(SVM::EPS_SVR); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task 332 svm->train(train_data, ROW_SAMPLE, Mat(labels)); 333 clog << "...[done]" << endl; 334 335 svm->save( "my_people_detector.yml" ); 336} 337 338void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color ) 339{ 340 if( !locations.empty() ) 341 { 342 vector< Rect >::const_iterator loc = locations.begin(); 343 vector< Rect >::const_iterator end = locations.end(); 344 for( ; loc != end ; ++loc ) 345 { 346 rectangle( img, *loc, color, 2 ); 347 } 348 } 349} 350 351void test_it( const Size & size ) 352{ 353 char key = 27; 354 Scalar reference( 0, 255, 0 ); 355 Scalar trained( 0, 0, 255 ); 356 Mat img, draw; 357 Ptr<SVM> svm; 358 HOGDescriptor hog; 359 HOGDescriptor my_hog; 360 my_hog.winSize = size; 361 VideoCapture video; 362 vector< Rect > locations; 363 364 // Load the trained SVM. 365 svm = StatModel::load<SVM>( "my_people_detector.yml" ); 366 // Set the trained svm to my_hog 367 vector< float > hog_detector; 368 get_svm_detector( svm, hog_detector ); 369 my_hog.setSVMDetector( hog_detector ); 370 // Set the people detector. 371 hog.setSVMDetector( hog.getDefaultPeopleDetector() ); 372 // Open the camera. 373 video.open(0); 374 if( !video.isOpened() ) 375 { 376 cerr << "Unable to open the device 0" << endl; 377 exit( -1 ); 378 } 379 380 bool end_of_process = false; 381 while( !end_of_process ) 382 { 383 video >> img; 384 if( img.empty() ) 385 break; 386 387 draw = img.clone(); 388 389 locations.clear(); 390 hog.detectMultiScale( img, locations ); 391 draw_locations( draw, locations, reference ); 392 393 locations.clear(); 394 my_hog.detectMultiScale( img, locations ); 395 draw_locations( draw, locations, trained ); 396 397 imshow( "Video", draw ); 398 key = (char)waitKey( 10 ); 399 if( 27 == key ) 400 end_of_process = true; 401 } 402} 403 404int main( int argc, char** argv ) 405{ 406 if( argc != 5 ) 407 { 408 cout << "Wrong number of parameters." << endl 409 << "Usage: " << argv[0] << " pos_dir pos.lst neg_dir neg.lst" << endl 410 << "example: " << argv[0] << " /INRIA_dataset/ Train/pos.lst /INRIA_dataset/ Train/neg.lst" << endl; 411 exit( -1 ); 412 } 413 vector< Mat > pos_lst; 414 vector< Mat > full_neg_lst; 415 vector< Mat > neg_lst; 416 vector< Mat > gradient_lst; 417 vector< int > labels; 418 419 load_images( argv[1], argv[2], pos_lst ); 420 labels.assign( pos_lst.size(), +1 ); 421 const unsigned int old = (unsigned int)labels.size(); 422 load_images( argv[3], argv[4], full_neg_lst ); 423 sample_neg( full_neg_lst, neg_lst, Size( 96,160 ) ); 424 labels.insert( labels.end(), neg_lst.size(), -1 ); 425 CV_Assert( old < labels.size() ); 426 427 compute_hog( pos_lst, gradient_lst, Size( 96, 160 ) ); 428 compute_hog( neg_lst, gradient_lst, Size( 96, 160 ) ); 429 430 train_svm( gradient_lst, labels ); 431 432 test_it( Size( 96, 160 ) ); // change with your parameters 433 434 return 0; 435} 436