1/*M/////////////////////////////////////////////////////////////////////////////////////// 2// 3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4// 5// By downloading, copying, installing or using the software you agree to this license. 6// If you do not agree to this license, do not download, install, 7// copy or use the software. 8// 9// 10// Intel License Agreement 11// 12// Copyright (C) 2000, Intel Corporation, all rights reserved. 13// Third party copyrights are property of their respective owners. 14// 15// Redistribution and use in source and binary forms, with or without modification, 16// are permitted provided that the following conditions are met: 17// 18// * Redistribution's of source code must retain the above copyright notice, 19// this list of conditions and the following disclaimer. 20// 21// * Redistribution's in binary form must reproduce the above copyright notice, 22// this list of conditions and the following disclaimer in the documentation 23// and/or other materials provided with the distribution. 24// 25// * The name of Intel Corporation may not be used to endorse or promote products 26// derived from this software without specific prior written permission. 27// 28// This software is provided by the copyright holders and contributors "as is" and 29// any express or implied warranties, including, but not limited to, the implied 30// warranties of merchantability and fitness for a particular purpose are disclaimed. 31// In no event shall the Intel Corporation or contributors be liable for any direct, 32// indirect, incidental, special, exemplary, or consequential damages 33// (including, but not limited to, procurement of substitute goods or services; 34// loss of use, data, or profits; or business interruption) however caused 35// and on any theory of liability, whether in contract, strict liability, 36// or tort (including negligence or otherwise) arising in any way out of 37// the use of this software, even if advised of the possibility of such damage. 38// 39//M*/ 40 41#include "_ml.h" 42 43CvANN_MLP_TrainParams::CvANN_MLP_TrainParams() 44{ 45 term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 ); 46 train_method = RPROP; 47 bp_dw_scale = bp_moment_scale = 0.1; 48 rp_dw0 = 0.1; rp_dw_plus = 1.2; rp_dw_minus = 0.5; 49 rp_dw_min = FLT_EPSILON; rp_dw_max = 50.; 50} 51 52 53CvANN_MLP_TrainParams::CvANN_MLP_TrainParams( CvTermCriteria _term_crit, 54 int _train_method, 55 double _param1, double _param2 ) 56{ 57 term_crit = _term_crit; 58 train_method = _train_method; 59 bp_dw_scale = bp_moment_scale = 0.1; 60 rp_dw0 = 1.; rp_dw_plus = 1.2; rp_dw_minus = 0.5; 61 rp_dw_min = FLT_EPSILON; rp_dw_max = 50.; 62 63 if( train_method == RPROP ) 64 { 65 rp_dw0 = _param1; 66 if( rp_dw0 < FLT_EPSILON ) 67 rp_dw0 = 1.; 68 rp_dw_min = _param2; 69 rp_dw_min = MAX( rp_dw_min, 0 ); 70 } 71 else if( train_method == BACKPROP ) 72 { 73 bp_dw_scale = _param1; 74 if( bp_dw_scale <= 0 ) 75 bp_dw_scale = 0.1; 76 bp_dw_scale = MAX( bp_dw_scale, 1e-3 ); 77 bp_dw_scale = MIN( bp_dw_scale, 1 ); 78 bp_moment_scale = _param2; 79 if( bp_moment_scale < 0 ) 80 bp_moment_scale = 0.1; 81 bp_moment_scale = MIN( bp_moment_scale, 1 ); 82 } 83 else 84 train_method = RPROP; 85} 86 87 88CvANN_MLP_TrainParams::~CvANN_MLP_TrainParams() 89{ 90} 91 92 93CvANN_MLP::CvANN_MLP() 94{ 95 layer_sizes = wbuf = 0; 96 min_val = max_val = min_val1 = max_val1 = 0.; 97 weights = 0; 98 rng = cvRNG(-1); 99 default_model_name = "my_nn"; 100 clear(); 101} 102 103 104CvANN_MLP::CvANN_MLP( const CvMat* _layer_sizes, 105 int _activ_func, 106 double _f_param1, double _f_param2 ) 107{ 108 layer_sizes = wbuf = 0; 109 min_val = max_val = min_val1 = max_val1 = 0.; 110 weights = 0; 111 rng = cvRNG(-1); 112 default_model_name = "my_nn"; 113 create( _layer_sizes, _activ_func, _f_param1, _f_param2 ); 114} 115 116 117CvANN_MLP::~CvANN_MLP() 118{ 119 clear(); 120} 121 122 123void CvANN_MLP::clear() 124{ 125 cvReleaseMat( &layer_sizes ); 126 cvReleaseMat( &wbuf ); 127 cvFree( &weights ); 128 activ_func = SIGMOID_SYM; 129 f_param1 = f_param2 = 1; 130 max_buf_sz = 1 << 12; 131} 132 133 134void CvANN_MLP::set_activ_func( int _activ_func, double _f_param1, double _f_param2 ) 135{ 136 CV_FUNCNAME( "CvANN_MLP::set_activ_func" ); 137 138 __BEGIN__; 139 140 if( _activ_func < 0 || _activ_func > GAUSSIAN ) 141 CV_ERROR( CV_StsOutOfRange, "Unknown activation function" ); 142 143 activ_func = _activ_func; 144 145 switch( activ_func ) 146 { 147 case SIGMOID_SYM: 148 max_val = 0.95; min_val = -max_val; 149 max_val1 = 0.98; min_val1 = -max_val1; 150 if( fabs(_f_param1) < FLT_EPSILON ) 151 _f_param1 = 2./3; 152 if( fabs(_f_param2) < FLT_EPSILON ) 153 _f_param2 = 1.7159; 154 break; 155 case GAUSSIAN: 156 max_val = 1.; min_val = 0.05; 157 max_val1 = 1.; min_val1 = 0.02; 158 if( fabs(_f_param1) < FLT_EPSILON ) 159 _f_param1 = 1.; 160 if( fabs(_f_param2) < FLT_EPSILON ) 161 _f_param2 = 1.; 162 break; 163 default: 164 min_val = max_val = min_val1 = max_val1 = 0.; 165 _f_param1 = 1.; 166 _f_param2 = 0.; 167 } 168 169 f_param1 = _f_param1; 170 f_param2 = _f_param2; 171 172 __END__; 173} 174 175 176void CvANN_MLP::init_weights() 177{ 178 int i, j, k; 179 180 for( i = 1; i < layer_sizes->cols; i++ ) 181 { 182 int n1 = layer_sizes->data.i[i-1]; 183 int n2 = layer_sizes->data.i[i]; 184 double val = 0, G = n2 > 2 ? 0.7*pow((double)n1,1./(n2-1)) : 1.; 185 double* w = weights[i]; 186 187 // initialize weights using Nguyen-Widrow algorithm 188 for( j = 0; j < n2; j++ ) 189 { 190 double s = 0; 191 for( k = 0; k <= n1; k++ ) 192 { 193 val = cvRandReal(&rng)*2-1.; 194 w[k*n2 + j] = val; 195 s += val; 196 } 197 198 if( i < layer_sizes->cols - 1 ) 199 { 200 s = 1./(s - val); 201 for( k = 0; k <= n1; k++ ) 202 w[k*n2 + j] *= s; 203 w[n1*n2 + j] *= G*(-1+j*2./n2); 204 } 205 } 206 } 207} 208 209 210void CvANN_MLP::create( const CvMat* _layer_sizes, int _activ_func, 211 double _f_param1, double _f_param2 ) 212{ 213 CV_FUNCNAME( "CvANN_MLP::create" ); 214 215 __BEGIN__; 216 217 int i, l_step, l_count, buf_sz = 0; 218 int *l_src, *l_dst; 219 220 clear(); 221 222 if( !CV_IS_MAT(_layer_sizes) || 223 _layer_sizes->cols != 1 && _layer_sizes->rows != 1 || 224 CV_MAT_TYPE(_layer_sizes->type) != CV_32SC1 ) 225 CV_ERROR( CV_StsBadArg, 226 "The array of layer neuron counters must be an integer vector" ); 227 228 CV_CALL( set_activ_func( _activ_func, _f_param1, _f_param2 )); 229 230 l_count = _layer_sizes->rows + _layer_sizes->cols - 1; 231 l_src = _layer_sizes->data.i; 232 l_step = CV_IS_MAT_CONT(_layer_sizes->type) ? 1 : 233 _layer_sizes->step / sizeof(l_src[0]); 234 CV_CALL( layer_sizes = cvCreateMat( 1, l_count, CV_32SC1 )); 235 l_dst = layer_sizes->data.i; 236 237 max_count = 0; 238 for( i = 0; i < l_count; i++ ) 239 { 240 int n = l_src[i*l_step]; 241 if( n < 1 + (0 < i && i < l_count-1)) 242 CV_ERROR( CV_StsOutOfRange, 243 "there should be at least one input and one output " 244 "and every hidden layer must have more than 1 neuron" ); 245 l_dst[i] = n; 246 max_count = MAX( max_count, n ); 247 if( i > 0 ) 248 buf_sz += (l_dst[i-1]+1)*n; 249 } 250 251 buf_sz += (l_dst[0] + l_dst[l_count-1]*2)*2; 252 253 CV_CALL( wbuf = cvCreateMat( 1, buf_sz, CV_64F )); 254 CV_CALL( weights = (double**)cvAlloc( (l_count+1)*sizeof(weights[0]) )); 255 256 weights[0] = wbuf->data.db; 257 weights[1] = weights[0] + l_dst[0]*2; 258 for( i = 1; i < l_count; i++ ) 259 weights[i+1] = weights[i] + (l_dst[i-1] + 1)*l_dst[i]; 260 weights[l_count+1] = weights[l_count] + l_dst[l_count-1]*2; 261 262 __END__; 263} 264 265 266float CvANN_MLP::predict( const CvMat* _inputs, CvMat* _outputs ) const 267{ 268 CV_FUNCNAME( "CvANN_MLP::predict" ); 269 270 __BEGIN__; 271 272 double* buf; 273 int i, j, n, dn = 0, l_count, dn0, buf_sz, min_buf_sz; 274 275 if( !layer_sizes ) 276 CV_ERROR( CV_StsError, "The network has not been initialized" ); 277 278 if( !CV_IS_MAT(_inputs) || !CV_IS_MAT(_outputs) || 279 !CV_ARE_TYPES_EQ(_inputs,_outputs) || 280 CV_MAT_TYPE(_inputs->type) != CV_32FC1 && 281 CV_MAT_TYPE(_inputs->type) != CV_64FC1 || 282 _inputs->rows != _outputs->rows ) 283 CV_ERROR( CV_StsBadArg, "Both input and output must be floating-point matrices " 284 "of the same type and have the same number of rows" ); 285 286 if( _inputs->cols != layer_sizes->data.i[0] ) 287 CV_ERROR( CV_StsBadSize, "input matrix must have the same number of columns as " 288 "the number of neurons in the input layer" ); 289 290 if( _outputs->cols != layer_sizes->data.i[layer_sizes->cols - 1] ) 291 CV_ERROR( CV_StsBadSize, "output matrix must have the same number of columns as " 292 "the number of neurons in the output layer" ); 293 n = dn0 = _inputs->rows; 294 min_buf_sz = 2*max_count; 295 buf_sz = n*min_buf_sz; 296 297 if( buf_sz > max_buf_sz ) 298 { 299 dn0 = max_buf_sz/min_buf_sz; 300 dn0 = MAX( dn0, 1 ); 301 buf_sz = dn0*min_buf_sz; 302 } 303 304 buf = (double*)cvStackAlloc( buf_sz*sizeof(buf[0]) ); 305 l_count = layer_sizes->cols; 306 307 for( i = 0; i < n; i += dn ) 308 { 309 CvMat hdr[2], _w, *layer_in = &hdr[0], *layer_out = &hdr[1], *temp; 310 dn = MIN( dn0, n - i ); 311 312 cvGetRows( _inputs, layer_in, i, i + dn ); 313 cvInitMatHeader( layer_out, dn, layer_in->cols, CV_64F, buf ); 314 315 scale_input( layer_in, layer_out ); 316 CV_SWAP( layer_in, layer_out, temp ); 317 318 for( j = 1; j < l_count; j++ ) 319 { 320 double* data = buf + (j&1 ? max_count*dn0 : 0); 321 int cols = layer_sizes->data.i[j]; 322 323 cvInitMatHeader( layer_out, dn, cols, CV_64F, data ); 324 cvInitMatHeader( &_w, layer_in->cols, layer_out->cols, CV_64F, weights[j] ); 325 cvGEMM( layer_in, &_w, 1, 0, 0, layer_out ); 326 calc_activ_func( layer_out, _w.data.db + _w.rows*_w.cols ); 327 328 CV_SWAP( layer_in, layer_out, temp ); 329 } 330 331 cvGetRows( _outputs, layer_out, i, i + dn ); 332 scale_output( layer_in, layer_out ); 333 } 334 335 __END__; 336 337 return 0.f; 338} 339 340 341void CvANN_MLP::scale_input( const CvMat* _src, CvMat* _dst ) const 342{ 343 int i, j, cols = _src->cols; 344 double* dst = _dst->data.db; 345 const double* w = weights[0]; 346 int step = _src->step; 347 348 if( CV_MAT_TYPE( _src->type ) == CV_32F ) 349 { 350 const float* src = _src->data.fl; 351 step /= sizeof(src[0]); 352 353 for( i = 0; i < _src->rows; i++, src += step, dst += cols ) 354 for( j = 0; j < cols; j++ ) 355 dst[j] = src[j]*w[j*2] + w[j*2+1]; 356 } 357 else 358 { 359 const double* src = _src->data.db; 360 step /= sizeof(src[0]); 361 362 for( i = 0; i < _src->rows; i++, src += step, dst += cols ) 363 for( j = 0; j < cols; j++ ) 364 dst[j] = src[j]*w[j*2] + w[j*2+1]; 365 } 366} 367 368 369void CvANN_MLP::scale_output( const CvMat* _src, CvMat* _dst ) const 370{ 371 int i, j, cols = _src->cols; 372 const double* src = _src->data.db; 373 const double* w = weights[layer_sizes->cols]; 374 int step = _dst->step; 375 376 if( CV_MAT_TYPE( _dst->type ) == CV_32F ) 377 { 378 float* dst = _dst->data.fl; 379 step /= sizeof(dst[0]); 380 381 for( i = 0; i < _src->rows; i++, src += cols, dst += step ) 382 for( j = 0; j < cols; j++ ) 383 dst[j] = (float)(src[j]*w[j*2] + w[j*2+1]); 384 } 385 else 386 { 387 double* dst = _dst->data.db; 388 step /= sizeof(dst[0]); 389 390 for( i = 0; i < _src->rows; i++, src += cols, dst += step ) 391 for( j = 0; j < cols; j++ ) 392 dst[j] = src[j]*w[j*2] + w[j*2+1]; 393 } 394} 395 396 397void CvANN_MLP::calc_activ_func( CvMat* sums, const double* bias ) const 398{ 399 int i, j, n = sums->rows, cols = sums->cols; 400 double* data = sums->data.db; 401 double scale = 0, scale2 = f_param2; 402 403 switch( activ_func ) 404 { 405 case IDENTITY: 406 scale = 1.; 407 break; 408 case SIGMOID_SYM: 409 scale = -f_param1; 410 break; 411 case GAUSSIAN: 412 scale = -f_param1*f_param1; 413 break; 414 default: 415 ; 416 } 417 418 assert( CV_IS_MAT_CONT(sums->type) ); 419 420 if( activ_func != GAUSSIAN ) 421 { 422 for( i = 0; i < n; i++, data += cols ) 423 for( j = 0; j < cols; j++ ) 424 data[j] = (data[j] + bias[j])*scale; 425 426 if( activ_func == IDENTITY ) 427 return; 428 } 429 else 430 { 431 for( i = 0; i < n; i++, data += cols ) 432 for( j = 0; j < cols; j++ ) 433 { 434 double t = data[j] + bias[j]; 435 data[j] = t*t*scale; 436 } 437 } 438 439 cvExp( sums, sums ); 440 441 n *= cols; 442 data -= n; 443 444 switch( activ_func ) 445 { 446 case SIGMOID_SYM: 447 for( i = 0; i <= n - 4; i += 4 ) 448 { 449 double x0 = 1.+data[i], x1 = 1.+data[i+1], x2 = 1.+data[i+2], x3 = 1.+data[i+3]; 450 double a = x0*x1, b = x2*x3, d = scale2/(a*b), t0, t1; 451 a *= d; b *= d; 452 t0 = (2 - x0)*b*x1; t1 = (2 - x1)*b*x0; 453 data[i] = t0; data[i+1] = t1; 454 t0 = (2 - x2)*a*x3; t1 = (2 - x3)*a*x2; 455 data[i+2] = t0; data[i+3] = t1; 456 } 457 458 for( ; i < n; i++ ) 459 { 460 double t = scale2*(1. - data[i])/(1. + data[i]); 461 data[i] = t; 462 } 463 break; 464 465 case GAUSSIAN: 466 for( i = 0; i < n; i++ ) 467 data[i] = scale2*data[i]; 468 break; 469 470 default: 471 ; 472 } 473} 474 475 476void CvANN_MLP::calc_activ_func_deriv( CvMat* _xf, CvMat* _df, 477 const double* bias ) const 478{ 479 int i, j, n = _xf->rows, cols = _xf->cols; 480 double* xf = _xf->data.db; 481 double* df = _df->data.db; 482 double scale, scale2 = f_param2; 483 assert( CV_IS_MAT_CONT( _xf->type & _df->type ) ); 484 485 if( activ_func == IDENTITY ) 486 { 487 for( i = 0; i < n; i++, xf += cols, df += cols ) 488 for( j = 0; j < cols; j++ ) 489 { 490 xf[j] += bias[j]; 491 df[j] = 1; 492 } 493 return; 494 } 495 else if( activ_func == GAUSSIAN ) 496 { 497 scale = -f_param1*f_param1; 498 scale2 *= scale; 499 for( i = 0; i < n; i++, xf += cols, df += cols ) 500 for( j = 0; j < cols; j++ ) 501 { 502 double t = xf[j] + bias[j]; 503 df[j] = t*2*scale2; 504 xf[j] = t*t*scale; 505 } 506 } 507 else 508 { 509 scale = -f_param1; 510 for( i = 0; i < n; i++, xf += cols, df += cols ) 511 for( j = 0; j < cols; j++ ) 512 xf[j] = (xf[j] + bias[j])*scale; 513 } 514 515 cvExp( _xf, _xf ); 516 517 n *= cols; 518 xf -= n; df -= n; 519 520 // ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax); 521 // ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2= 522 // 2*a*exp(-ax)/(1+exp(-ax))^2 523 switch( activ_func ) 524 { 525 case SIGMOID_SYM: 526 scale *= -2*f_param2; 527 for( i = 0; i <= n - 4; i += 4 ) 528 { 529 double x0 = 1.+xf[i], x1 = 1.+xf[i+1], x2 = 1.+xf[i+2], x3 = 1.+xf[i+3]; 530 double a = x0*x1, b = x2*x3, d = 1./(a*b), t0, t1; 531 a *= d; b *= d; 532 533 t0 = b*x1; t1 = b*x0; 534 df[i] = scale*xf[i]*t0*t0; 535 df[i+1] = scale*xf[i+1]*t1*t1; 536 t0 *= scale2*(2 - x0); t1 *= scale2*(2 - x1); 537 xf[i] = t0; xf[i+1] = t1; 538 539 t0 = a*x3; t1 = a*x2; 540 df[i+2] = scale*xf[i+2]*t0*t0; 541 df[i+3] = scale*xf[i+3]*t1*t1; 542 t0 *= scale2*(2 - x2); t1 *= scale2*(2 - x3); 543 xf[i+2] = t0; xf[i+3] = t1; 544 } 545 546 for( ; i < n; i++ ) 547 { 548 double t0 = 1./(1. + xf[i]); 549 double t1 = scale*xf[i]*t0*t0; 550 t0 *= scale2*(1. - xf[i]); 551 df[i] = t1; 552 xf[i] = t0; 553 } 554 break; 555 556 case GAUSSIAN: 557 for( i = 0; i < n; i++ ) 558 df[i] *= xf[i]; 559 break; 560 default: 561 ; 562 } 563} 564 565 566void CvANN_MLP::calc_input_scale( const CvVectors* vecs, int flags ) 567{ 568 bool reset_weights = (flags & UPDATE_WEIGHTS) == 0; 569 bool no_scale = (flags & NO_INPUT_SCALE) != 0; 570 double* scale = weights[0]; 571 int count = vecs->count; 572 573 if( reset_weights ) 574 { 575 int i, j, vcount = layer_sizes->data.i[0]; 576 int type = vecs->type; 577 double a = no_scale ? 1. : 0.; 578 579 for( j = 0; j < vcount; j++ ) 580 scale[2*j] = a, scale[j*2+1] = 0.; 581 582 if( no_scale ) 583 return; 584 585 for( i = 0; i < count; i++ ) 586 { 587 const float* f = vecs->data.fl[i]; 588 const double* d = vecs->data.db[i]; 589 for( j = 0; j < vcount; j++ ) 590 { 591 double t = type == CV_32F ? (double)f[j] : d[j]; 592 scale[j*2] += t; 593 scale[j*2+1] += t*t; 594 } 595 } 596 597 for( j = 0; j < vcount; j++ ) 598 { 599 double s = scale[j*2], s2 = scale[j*2+1]; 600 double m = s/count, sigma2 = s2/count - m*m; 601 scale[j*2] = sigma2 < DBL_EPSILON ? 1 : 1./sqrt(sigma2); 602 scale[j*2+1] = -m*scale[j*2]; 603 } 604 } 605} 606 607 608void CvANN_MLP::calc_output_scale( const CvVectors* vecs, int flags ) 609{ 610 int i, j, vcount = layer_sizes->data.i[layer_sizes->cols-1]; 611 int type = vecs->type; 612 double m = min_val, M = max_val, m1 = min_val1, M1 = max_val1; 613 bool reset_weights = (flags & UPDATE_WEIGHTS) == 0; 614 bool no_scale = (flags & NO_OUTPUT_SCALE) != 0; 615 int l_count = layer_sizes->cols; 616 double* scale = weights[l_count]; 617 double* inv_scale = weights[l_count+1]; 618 int count = vecs->count; 619 620 CV_FUNCNAME( "CvANN_MLP::calc_output_scale" ); 621 622 __BEGIN__; 623 624 if( reset_weights ) 625 { 626 double a0 = no_scale ? 1 : DBL_MAX, b0 = no_scale ? 0 : -DBL_MAX; 627 628 for( j = 0; j < vcount; j++ ) 629 { 630 scale[2*j] = inv_scale[2*j] = a0; 631 scale[j*2+1] = inv_scale[2*j+1] = b0; 632 } 633 634 if( no_scale ) 635 EXIT; 636 } 637 638 for( i = 0; i < count; i++ ) 639 { 640 const float* f = vecs->data.fl[i]; 641 const double* d = vecs->data.db[i]; 642 643 for( j = 0; j < vcount; j++ ) 644 { 645 double t = type == CV_32F ? (double)f[j] : d[j]; 646 647 if( reset_weights ) 648 { 649 double mj = scale[j*2], Mj = scale[j*2+1]; 650 if( mj > t ) mj = t; 651 if( Mj < t ) Mj = t; 652 653 scale[j*2] = mj; 654 scale[j*2+1] = Mj; 655 } 656 else 657 { 658 t = t*scale[j*2] + scale[2*j+1]; 659 if( t < m1 || t > M1 ) 660 CV_ERROR( CV_StsOutOfRange, 661 "Some of new output training vector components run exceed the original range too much" ); 662 } 663 } 664 } 665 666 if( reset_weights ) 667 for( j = 0; j < vcount; j++ ) 668 { 669 // map mj..Mj to m..M 670 double mj = scale[j*2], Mj = scale[j*2+1]; 671 double a, b; 672 double delta = Mj - mj; 673 if( delta < DBL_EPSILON ) 674 a = 1, b = (M + m - Mj - mj)*0.5; 675 else 676 a = (M - m)/delta, b = m - mj*a; 677 inv_scale[j*2] = a; inv_scale[j*2+1] = b; 678 a = 1./a; b = -b*a; 679 scale[j*2] = a; scale[j*2+1] = b; 680 } 681 682 __END__; 683} 684 685 686bool CvANN_MLP::prepare_to_train( const CvMat* _inputs, const CvMat* _outputs, 687 const CvMat* _sample_weights, const CvMat* _sample_idx, 688 CvVectors* _ivecs, CvVectors* _ovecs, double** _sw, int _flags ) 689{ 690 bool ok = false; 691 CvMat* sample_idx = 0; 692 CvVectors ivecs, ovecs; 693 double* sw = 0; 694 int count = 0; 695 696 CV_FUNCNAME( "CvANN_MLP::prepare_to_train" ); 697 698 ivecs.data.ptr = ovecs.data.ptr = 0; 699 assert( _ivecs && _ovecs ); 700 701 __BEGIN__; 702 703 const int* sidx = 0; 704 int i, sw_type = 0, sw_count = 0; 705 int sw_step = 0; 706 double sw_sum = 0; 707 708 if( !layer_sizes ) 709 CV_ERROR( CV_StsError, 710 "The network has not been created. Use method create or the appropriate constructor" ); 711 712 if( !CV_IS_MAT(_inputs) || CV_MAT_TYPE(_inputs->type) != CV_32FC1 && 713 CV_MAT_TYPE(_inputs->type) != CV_64FC1 || _inputs->cols != layer_sizes->data.i[0] ) 714 CV_ERROR( CV_StsBadArg, 715 "input training data should be a floating-point matrix with" 716 "the number of rows equal to the number of training samples and " 717 "the number of columns equal to the size of 0-th (input) layer" ); 718 719 if( !CV_IS_MAT(_outputs) || CV_MAT_TYPE(_outputs->type) != CV_32FC1 && 720 CV_MAT_TYPE(_outputs->type) != CV_64FC1 || 721 _outputs->cols != layer_sizes->data.i[layer_sizes->cols - 1] ) 722 CV_ERROR( CV_StsBadArg, 723 "output training data should be a floating-point matrix with" 724 "the number of rows equal to the number of training samples and " 725 "the number of columns equal to the size of last (output) layer" ); 726 727 if( _inputs->rows != _outputs->rows ) 728 CV_ERROR( CV_StsUnmatchedSizes, "The numbers of input and output samples do not match" ); 729 730 if( _sample_idx ) 731 { 732 CV_CALL( sample_idx = cvPreprocessIndexArray( _sample_idx, _inputs->rows )); 733 sidx = sample_idx->data.i; 734 count = sample_idx->cols + sample_idx->rows - 1; 735 } 736 else 737 count = _inputs->rows; 738 739 if( _sample_weights ) 740 { 741 if( !CV_IS_MAT(_sample_weights) ) 742 CV_ERROR( CV_StsBadArg, "sample_weights (if passed) must be a valid matrix" ); 743 744 sw_type = CV_MAT_TYPE(_sample_weights->type); 745 sw_count = _sample_weights->cols + _sample_weights->rows - 1; 746 747 if( sw_type != CV_32FC1 && sw_type != CV_64FC1 || 748 _sample_weights->cols != 1 && _sample_weights->rows != 1 || 749 sw_count != count && sw_count != _inputs->rows ) 750 CV_ERROR( CV_StsBadArg, 751 "sample_weights must be 1d floating-point vector containing weights " 752 "of all or selected training samples" ); 753 754 sw_step = CV_IS_MAT_CONT(_sample_weights->type) ? 1 : 755 _sample_weights->step/CV_ELEM_SIZE(sw_type); 756 757 CV_CALL( sw = (double*)cvAlloc( count*sizeof(sw[0]) )); 758 } 759 760 CV_CALL( ivecs.data.ptr = (uchar**)cvAlloc( count*sizeof(ivecs.data.ptr[0]) )); 761 CV_CALL( ovecs.data.ptr = (uchar**)cvAlloc( count*sizeof(ovecs.data.ptr[0]) )); 762 763 ivecs.type = CV_MAT_TYPE(_inputs->type); 764 ovecs.type = CV_MAT_TYPE(_outputs->type); 765 ivecs.count = ovecs.count = count; 766 767 for( i = 0; i < count; i++ ) 768 { 769 int idx = sidx ? sidx[i] : i; 770 ivecs.data.ptr[i] = _inputs->data.ptr + idx*_inputs->step; 771 ovecs.data.ptr[i] = _outputs->data.ptr + idx*_outputs->step; 772 if( sw ) 773 { 774 int si = sw_count == count ? i : idx; 775 double w = sw_type == CV_32FC1 ? 776 (double)_sample_weights->data.fl[si*sw_step] : 777 _sample_weights->data.db[si*sw_step]; 778 sw[i] = w; 779 if( w < 0 ) 780 CV_ERROR( CV_StsOutOfRange, "some of sample weights are negative" ); 781 sw_sum += w; 782 } 783 } 784 785 // normalize weights 786 if( sw ) 787 { 788 sw_sum = sw_sum > DBL_EPSILON ? 1./sw_sum : 0; 789 for( i = 0; i < count; i++ ) 790 sw[i] *= sw_sum; 791 } 792 793 calc_input_scale( &ivecs, _flags ); 794 CV_CALL( calc_output_scale( &ovecs, _flags )); 795 796 ok = true; 797 798 __END__; 799 800 if( !ok ) 801 { 802 cvFree( &ivecs.data.ptr ); 803 cvFree( &ovecs.data.ptr ); 804 cvFree( &sw ); 805 } 806 807 cvReleaseMat( &sample_idx ); 808 *_ivecs = ivecs; 809 *_ovecs = ovecs; 810 *_sw = sw; 811 812 return ok; 813} 814 815 816int CvANN_MLP::train( const CvMat* _inputs, const CvMat* _outputs, 817 const CvMat* _sample_weights, const CvMat* _sample_idx, 818 CvANN_MLP_TrainParams _params, int flags ) 819{ 820 const int MAX_ITER = 1000; 821 const double DEFAULT_EPSILON = FLT_EPSILON; 822 823 double* sw = 0; 824 CvVectors x0, u; 825 int iter = -1; 826 827 x0.data.ptr = u.data.ptr = 0; 828 829 CV_FUNCNAME( "CvANN_MLP::train" ); 830 831 __BEGIN__; 832 833 int max_iter; 834 double epsilon; 835 836 params = _params; 837 838 // initialize training data 839 CV_CALL( prepare_to_train( _inputs, _outputs, _sample_weights, 840 _sample_idx, &x0, &u, &sw, flags )); 841 842 // ... and link weights 843 if( !(flags & UPDATE_WEIGHTS) ) 844 init_weights(); 845 846 max_iter = params.term_crit.type & CV_TERMCRIT_ITER ? params.term_crit.max_iter : MAX_ITER; 847 max_iter = MIN( max_iter, MAX_ITER ); 848 max_iter = MAX( max_iter, 1 ); 849 850 epsilon = params.term_crit.type & CV_TERMCRIT_EPS ? params.term_crit.epsilon : DEFAULT_EPSILON; 851 epsilon = MAX(epsilon, DBL_EPSILON); 852 853 params.term_crit.type = CV_TERMCRIT_ITER + CV_TERMCRIT_EPS; 854 params.term_crit.max_iter = max_iter; 855 params.term_crit.epsilon = epsilon; 856 857 if( params.train_method == CvANN_MLP_TrainParams::BACKPROP ) 858 { 859 CV_CALL( iter = train_backprop( x0, u, sw )); 860 } 861 else 862 { 863 CV_CALL( iter = train_rprop( x0, u, sw )); 864 } 865 866 __END__; 867 868 cvFree( &x0.data.ptr ); 869 cvFree( &u.data.ptr ); 870 cvFree( &sw ); 871 872 return iter; 873} 874 875 876int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw ) 877{ 878 CvMat* dw = 0; 879 CvMat* buf = 0; 880 double **x = 0, **df = 0; 881 CvMat* _idx = 0; 882 int iter = -1, count = x0.count; 883 884 CV_FUNCNAME( "CvANN_MLP::train_backprop" ); 885 886 __BEGIN__; 887 888 int i, j, k, ivcount, ovcount, l_count, total = 0, max_iter; 889 double *buf_ptr; 890 double prev_E = DBL_MAX*0.5, E = 0, epsilon; 891 892 max_iter = params.term_crit.max_iter*count; 893 epsilon = params.term_crit.epsilon*count; 894 895 l_count = layer_sizes->cols; 896 ivcount = layer_sizes->data.i[0]; 897 ovcount = layer_sizes->data.i[l_count-1]; 898 899 // allocate buffers 900 for( i = 0; i < l_count; i++ ) 901 total += layer_sizes->data.i[i] + 1; 902 903 CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type )); 904 cvZero( dw ); 905 CV_CALL( buf = cvCreateMat( 1, (total + max_count)*2, CV_64F )); 906 CV_CALL( _idx = cvCreateMat( 1, count, CV_32SC1 )); 907 for( i = 0; i < count; i++ ) 908 _idx->data.i[i] = i; 909 910 CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) )); 911 df = x + total; 912 buf_ptr = buf->data.db; 913 914 for( j = 0; j < l_count; j++ ) 915 { 916 x[j] = buf_ptr; 917 df[j] = x[j] + layer_sizes->data.i[j]; 918 buf_ptr += (df[j] - x[j])*2; 919 } 920 921 // run back-propagation loop 922 /* 923 y_i = w_i*x_{i-1} 924 x_i = f(y_i) 925 E = 1/2*||u - x_N||^2 926 grad_N = (x_N - u)*f'(y_i) 927 dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i 928 w_i(t+1) = w_i(t) + dw_i(t) 929 grad_{i-1} = w_i^t*grad_i 930 */ 931 for( iter = 0; iter < max_iter; iter++ ) 932 { 933 int idx = iter % count; 934 double* w = weights[0]; 935 double sweight = sw ? count*sw[idx] : 1.; 936 CvMat _w, _dw, hdr1, hdr2, ghdr1, ghdr2, _df; 937 CvMat *x1 = &hdr1, *x2 = &hdr2, *grad1 = &ghdr1, *grad2 = &ghdr2, *temp; 938 939 if( idx == 0 ) 940 { 941 if( fabs(prev_E - E) < epsilon ) 942 break; 943 prev_E = E; 944 E = 0; 945 946 // shuffle indices 947 for( i = 0; i < count; i++ ) 948 { 949 int tt; 950 j = (unsigned)cvRandInt(&rng) % count; 951 k = (unsigned)cvRandInt(&rng) % count; 952 CV_SWAP( _idx->data.i[j], _idx->data.i[k], tt ); 953 } 954 } 955 956 idx = _idx->data.i[idx]; 957 958 if( x0.type == CV_32F ) 959 { 960 const float* x0data = x0.data.fl[idx]; 961 for( j = 0; j < ivcount; j++ ) 962 x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1]; 963 } 964 else 965 { 966 const double* x0data = x0.data.db[idx]; 967 for( j = 0; j < ivcount; j++ ) 968 x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1]; 969 } 970 971 cvInitMatHeader( x1, 1, ivcount, CV_64F, x[0] ); 972 973 // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i]) 974 for( i = 1; i < l_count; i++ ) 975 { 976 cvInitMatHeader( x2, 1, layer_sizes->data.i[i], CV_64F, x[i] ); 977 cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] ); 978 cvGEMM( x1, &_w, 1, 0, 0, x2 ); 979 _df = *x2; 980 _df.data.db = df[i]; 981 calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols ); 982 CV_SWAP( x1, x2, temp ); 983 } 984 985 cvInitMatHeader( grad1, 1, ovcount, CV_64F, buf_ptr ); 986 *grad2 = *grad1; 987 grad2->data.db = buf_ptr + max_count; 988 989 w = weights[l_count+1]; 990 991 // calculate error 992 if( u.type == CV_32F ) 993 { 994 const float* udata = u.data.fl[idx]; 995 for( k = 0; k < ovcount; k++ ) 996 { 997 double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k]; 998 grad1->data.db[k] = t*sweight; 999 E += t*t; 1000 } 1001 } 1002 else 1003 { 1004 const double* udata = u.data.db[idx]; 1005 for( k = 0; k < ovcount; k++ ) 1006 { 1007 double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k]; 1008 grad1->data.db[k] = t*sweight; 1009 E += t*t; 1010 } 1011 } 1012 E *= sweight; 1013 1014 // backward pass, update weights 1015 for( i = l_count-1; i > 0; i-- ) 1016 { 1017 int n1 = layer_sizes->data.i[i-1], n2 = layer_sizes->data.i[i]; 1018 cvInitMatHeader( &_df, 1, n2, CV_64F, df[i] ); 1019 cvMul( grad1, &_df, grad1 ); 1020 cvInitMatHeader( &_w, n1+1, n2, CV_64F, weights[i] ); 1021 cvInitMatHeader( &_dw, n1+1, n2, CV_64F, dw->data.db + (weights[i] - weights[0]) ); 1022 cvInitMatHeader( x1, n1+1, 1, CV_64F, x[i-1] ); 1023 x[i-1][n1] = 1.; 1024 cvGEMM( x1, grad1, params.bp_dw_scale, &_dw, params.bp_moment_scale, &_dw ); 1025 cvAdd( &_w, &_dw, &_w ); 1026 if( i > 1 ) 1027 { 1028 grad2->cols = n1; 1029 _w.rows = n1; 1030 cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T ); 1031 } 1032 CV_SWAP( grad1, grad2, temp ); 1033 } 1034 } 1035 1036 iter /= count; 1037 1038 __END__; 1039 1040 cvReleaseMat( &dw ); 1041 cvReleaseMat( &buf ); 1042 cvReleaseMat( &_idx ); 1043 cvFree( &x ); 1044 1045 return iter; 1046} 1047 1048 1049int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw ) 1050{ 1051 const int max_buf_sz = 1 << 16; 1052 CvMat* dw = 0; 1053 CvMat* dEdw = 0; 1054 CvMat* prev_dEdw_sign = 0; 1055 CvMat* buf = 0; 1056 double **x = 0, **df = 0; 1057 int iter = -1, count = x0.count; 1058 1059 CV_FUNCNAME( "CvANN_MLP::train" ); 1060 1061 __BEGIN__; 1062 1063 int i, ivcount, ovcount, l_count, total = 0, max_iter, buf_sz, dcount0, dcount=0; 1064 double *buf_ptr; 1065 double prev_E = DBL_MAX*0.5, epsilon; 1066 double dw_plus, dw_minus, dw_min, dw_max; 1067 double inv_count; 1068 1069 max_iter = params.term_crit.max_iter; 1070 epsilon = params.term_crit.epsilon; 1071 dw_plus = params.rp_dw_plus; 1072 dw_minus = params.rp_dw_minus; 1073 dw_min = params.rp_dw_min; 1074 dw_max = params.rp_dw_max; 1075 1076 l_count = layer_sizes->cols; 1077 ivcount = layer_sizes->data.i[0]; 1078 ovcount = layer_sizes->data.i[l_count-1]; 1079 1080 // allocate buffers 1081 for( i = 0; i < l_count; i++ ) 1082 total += layer_sizes->data.i[i]; 1083 1084 CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type )); 1085 cvSet( dw, cvScalarAll(params.rp_dw0) ); 1086 CV_CALL( dEdw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type )); 1087 cvZero( dEdw ); 1088 CV_CALL( prev_dEdw_sign = cvCreateMat( wbuf->rows, wbuf->cols, CV_8SC1 )); 1089 cvZero( prev_dEdw_sign ); 1090 1091 inv_count = 1./count; 1092 dcount0 = max_buf_sz/(2*total); 1093 dcount0 = MAX( dcount0, 1 ); 1094 dcount0 = MIN( dcount0, count ); 1095 buf_sz = dcount0*(total + max_count)*2; 1096 1097 CV_CALL( buf = cvCreateMat( 1, buf_sz, CV_64F )); 1098 1099 CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) )); 1100 df = x + total; 1101 buf_ptr = buf->data.db; 1102 1103 for( i = 0; i < l_count; i++ ) 1104 { 1105 x[i] = buf_ptr; 1106 df[i] = x[i] + layer_sizes->data.i[i]*dcount0; 1107 buf_ptr += (df[i] - x[i])*2; 1108 } 1109 1110 // run rprop loop 1111 /* 1112 y_i(t) = w_i(t)*x_{i-1}(t) 1113 x_i(t) = f(y_i(t)) 1114 E = sum_over_all_samples(1/2*||u - x_N||^2) 1115 grad_N = (x_N - u)*f'(y_i) 1116 1117 MIN(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0 1118 dw_i{jk}(t) = MAX(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0 1119 dw_i{jk}(t-1) else 1120 1121 if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0) 1122 dE/dw_i{jk}(t)<-0 1123 else 1124 w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t) 1125 grad_{i-1}(t) = w_i^t(t)*grad_i(t) 1126 */ 1127 for( iter = 0; iter < max_iter; iter++ ) 1128 { 1129 int n1, n2, si, j, k; 1130 double* w; 1131 CvMat _w, _dEdw, hdr1, hdr2, ghdr1, ghdr2, _df; 1132 CvMat *x1, *x2, *grad1, *grad2, *temp; 1133 double E = 0; 1134 1135 // first, iterate through all the samples and compute dEdw 1136 for( si = 0; si < count; si += dcount ) 1137 { 1138 dcount = MIN( count - si, dcount0 ); 1139 w = weights[0]; 1140 grad1 = &ghdr1; grad2 = &ghdr2; 1141 x1 = &hdr1; x2 = &hdr2; 1142 1143 // grab and preprocess input data 1144 if( x0.type == CV_32F ) 1145 for( i = 0; i < dcount; i++ ) 1146 { 1147 const float* x0data = x0.data.fl[si+i]; 1148 double* xdata = x[0]+i*ivcount; 1149 for( j = 0; j < ivcount; j++ ) 1150 xdata[j] = x0data[j]*w[j*2] + w[j*2+1]; 1151 } 1152 else 1153 for( i = 0; i < dcount; i++ ) 1154 { 1155 const double* x0data = x0.data.db[si+i]; 1156 double* xdata = x[0]+i*ivcount; 1157 for( j = 0; j < ivcount; j++ ) 1158 xdata[j] = x0data[j]*w[j*2] + w[j*2+1]; 1159 } 1160 1161 cvInitMatHeader( x1, dcount, ivcount, CV_64F, x[0] ); 1162 1163 // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i]) 1164 for( i = 1; i < l_count; i++ ) 1165 { 1166 cvInitMatHeader( x2, dcount, layer_sizes->data.i[i], CV_64F, x[i] ); 1167 cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] ); 1168 cvGEMM( x1, &_w, 1, 0, 0, x2 ); 1169 _df = *x2; 1170 _df.data.db = df[i]; 1171 calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols ); 1172 CV_SWAP( x1, x2, temp ); 1173 } 1174 1175 cvInitMatHeader( grad1, dcount, ovcount, CV_64F, buf_ptr ); 1176 w = weights[l_count+1]; 1177 grad2->data.db = buf_ptr + max_count*dcount; 1178 1179 // calculate error 1180 if( u.type == CV_32F ) 1181 for( i = 0; i < dcount; i++ ) 1182 { 1183 const float* udata = u.data.fl[si+i]; 1184 const double* xdata = x[l_count-1] + i*ovcount; 1185 double* gdata = grad1->data.db + i*ovcount; 1186 double sweight = sw ? sw[si+i] : inv_count, E1 = 0; 1187 1188 for( j = 0; j < ovcount; j++ ) 1189 { 1190 double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j]; 1191 gdata[j] = t*sweight; 1192 E1 += t*t; 1193 } 1194 E += sweight*E1; 1195 } 1196 else 1197 for( i = 0; i < dcount; i++ ) 1198 { 1199 const double* udata = u.data.db[si+i]; 1200 const double* xdata = x[l_count-1] + i*ovcount; 1201 double* gdata = grad1->data.db + i*ovcount; 1202 double sweight = sw ? sw[si+i] : inv_count, E1 = 0; 1203 1204 for( j = 0; j < ovcount; j++ ) 1205 { 1206 double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j]; 1207 gdata[j] = t*sweight; 1208 E1 += t*t; 1209 } 1210 E += sweight*E1; 1211 } 1212 1213 // backward pass, update dEdw 1214 for( i = l_count-1; i > 0; i-- ) 1215 { 1216 n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i]; 1217 cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] ); 1218 cvMul( grad1, &_df, grad1 ); 1219 cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) ); 1220 cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] ); 1221 cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T ); 1222 // update bias part of dEdw 1223 for( k = 0; k < dcount; k++ ) 1224 { 1225 double* dst = _dEdw.data.db + n1*n2; 1226 const double* src = grad1->data.db + k*n2; 1227 for( j = 0; j < n2; j++ ) 1228 dst[j] += src[j]; 1229 } 1230 cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] ); 1231 cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db ); 1232 1233 if( i > 1 ) 1234 cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T ); 1235 CV_SWAP( grad1, grad2, temp ); 1236 } 1237 } 1238 1239 // now update weights 1240 for( i = 1; i < l_count; i++ ) 1241 { 1242 n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i]; 1243 for( k = 0; k <= n1; k++ ) 1244 { 1245 double* wk = weights[i]+k*n2; 1246 size_t delta = wk - weights[0]; 1247 double* dwk = dw->data.db + delta; 1248 double* dEdwk = dEdw->data.db + delta; 1249 char* prevEk = (char*)(prev_dEdw_sign->data.ptr + delta); 1250 1251 for( j = 0; j < n2; j++ ) 1252 { 1253 double Eval = dEdwk[j]; 1254 double dval = dwk[j]; 1255 double wval = wk[j]; 1256 int s = CV_SIGN(Eval); 1257 int ss = prevEk[j]*s; 1258 if( ss > 0 ) 1259 { 1260 dval *= dw_plus; 1261 dval = MIN( dval, dw_max ); 1262 dwk[j] = dval; 1263 wk[j] = wval + dval*s; 1264 } 1265 else if( ss < 0 ) 1266 { 1267 dval *= dw_minus; 1268 dval = MAX( dval, dw_min ); 1269 prevEk[j] = 0; 1270 dwk[j] = dval; 1271 wk[j] = wval + dval*s; 1272 } 1273 else 1274 { 1275 prevEk[j] = (char)s; 1276 wk[j] = wval + dval*s; 1277 } 1278 dEdwk[j] = 0.; 1279 } 1280 } 1281 } 1282 1283 if( fabs(prev_E - E) < epsilon ) 1284 break; 1285 prev_E = E; 1286 E = 0; 1287 } 1288 1289 __END__; 1290 1291 cvReleaseMat( &dw ); 1292 cvReleaseMat( &dEdw ); 1293 cvReleaseMat( &prev_dEdw_sign ); 1294 cvReleaseMat( &buf ); 1295 cvFree( &x ); 1296 1297 return iter; 1298} 1299 1300 1301void CvANN_MLP::write_params( CvFileStorage* fs ) 1302{ 1303 //CV_FUNCNAME( "CvANN_MLP::write_params" ); 1304 1305 __BEGIN__; 1306 1307 const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" : 1308 activ_func == SIGMOID_SYM ? "SIGMOID_SYM" : 1309 activ_func == GAUSSIAN ? "GAUSSIAN" : 0; 1310 1311 if( activ_func_name ) 1312 cvWriteString( fs, "activation_function", activ_func_name ); 1313 else 1314 cvWriteInt( fs, "activation_function", activ_func ); 1315 1316 if( activ_func != IDENTITY ) 1317 { 1318 cvWriteReal( fs, "f_param1", f_param1 ); 1319 cvWriteReal( fs, "f_param2", f_param2 ); 1320 } 1321 1322 cvWriteReal( fs, "min_val", min_val ); 1323 cvWriteReal( fs, "max_val", max_val ); 1324 cvWriteReal( fs, "min_val1", min_val1 ); 1325 cvWriteReal( fs, "max_val1", max_val1 ); 1326 1327 cvStartWriteStruct( fs, "training_params", CV_NODE_MAP ); 1328 if( params.train_method == CvANN_MLP_TrainParams::BACKPROP ) 1329 { 1330 cvWriteString( fs, "train_method", "BACKPROP" ); 1331 cvWriteReal( fs, "dw_scale", params.bp_dw_scale ); 1332 cvWriteReal( fs, "moment_scale", params.bp_moment_scale ); 1333 } 1334 else if( params.train_method == CvANN_MLP_TrainParams::RPROP ) 1335 { 1336 cvWriteString( fs, "train_method", "RPROP" ); 1337 cvWriteReal( fs, "dw0", params.rp_dw0 ); 1338 cvWriteReal( fs, "dw_plus", params.rp_dw_plus ); 1339 cvWriteReal( fs, "dw_minus", params.rp_dw_minus ); 1340 cvWriteReal( fs, "dw_min", params.rp_dw_min ); 1341 cvWriteReal( fs, "dw_max", params.rp_dw_max ); 1342 } 1343 1344 cvStartWriteStruct( fs, "term_criteria", CV_NODE_MAP + CV_NODE_FLOW ); 1345 if( params.term_crit.type & CV_TERMCRIT_EPS ) 1346 cvWriteReal( fs, "epsilon", params.term_crit.epsilon ); 1347 if( params.term_crit.type & CV_TERMCRIT_ITER ) 1348 cvWriteInt( fs, "iterations", params.term_crit.max_iter ); 1349 cvEndWriteStruct( fs ); 1350 1351 cvEndWriteStruct( fs ); 1352 1353 __END__; 1354} 1355 1356 1357void CvANN_MLP::write( CvFileStorage* fs, const char* name ) 1358{ 1359 CV_FUNCNAME( "CvANN_MLP::write" ); 1360 1361 __BEGIN__; 1362 1363 int i, l_count = layer_sizes->cols; 1364 1365 if( !layer_sizes ) 1366 CV_ERROR( CV_StsError, "The network has not been initialized" ); 1367 1368 cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_ML_ANN_MLP ); 1369 1370 cvWrite( fs, "layer_sizes", layer_sizes ); 1371 1372 write_params( fs ); 1373 1374 cvStartWriteStruct( fs, "input_scale", CV_NODE_SEQ + CV_NODE_FLOW ); 1375 cvWriteRawData( fs, weights[0], layer_sizes->data.i[0]*2, "d" ); 1376 cvEndWriteStruct( fs ); 1377 1378 cvStartWriteStruct( fs, "output_scale", CV_NODE_SEQ + CV_NODE_FLOW ); 1379 cvWriteRawData( fs, weights[l_count], layer_sizes->data.i[l_count-1]*2, "d" ); 1380 cvEndWriteStruct( fs ); 1381 1382 cvStartWriteStruct( fs, "inv_output_scale", CV_NODE_SEQ + CV_NODE_FLOW ); 1383 cvWriteRawData( fs, weights[l_count+1], layer_sizes->data.i[l_count-1]*2, "d" ); 1384 cvEndWriteStruct( fs ); 1385 1386 cvStartWriteStruct( fs, "weights", CV_NODE_SEQ ); 1387 for( i = 1; i < l_count; i++ ) 1388 { 1389 cvStartWriteStruct( fs, 0, CV_NODE_SEQ + CV_NODE_FLOW ); 1390 cvWriteRawData( fs, weights[i], (layer_sizes->data.i[i-1]+1)*layer_sizes->data.i[i], "d" ); 1391 cvEndWriteStruct( fs ); 1392 } 1393 1394 cvEndWriteStruct( fs ); 1395 1396 __END__; 1397} 1398 1399 1400void CvANN_MLP::read_params( CvFileStorage* fs, CvFileNode* node ) 1401{ 1402 //CV_FUNCNAME( "CvANN_MLP::read_params" ); 1403 1404 __BEGIN__; 1405 1406 const char* activ_func_name = cvReadStringByName( fs, node, "activation_function", 0 ); 1407 CvFileNode* tparams_node; 1408 1409 if( activ_func_name ) 1410 activ_func = strcmp( activ_func_name, "SIGMOID_SYM" ) == 0 ? SIGMOID_SYM : 1411 strcmp( activ_func_name, "IDENTITY" ) == 0 ? IDENTITY : 1412 strcmp( activ_func_name, "GAUSSIAN" ) == 0 ? GAUSSIAN : 0; 1413 else 1414 activ_func = cvReadIntByName( fs, node, "activation_function" ); 1415 1416 f_param1 = cvReadRealByName( fs, node, "f_param1", 0 ); 1417 f_param2 = cvReadRealByName( fs, node, "f_param2", 0 ); 1418 1419 set_activ_func( activ_func, f_param1, f_param2 ); 1420 1421 min_val = cvReadRealByName( fs, node, "min_val", 0. ); 1422 max_val = cvReadRealByName( fs, node, "max_val", 1. ); 1423 min_val1 = cvReadRealByName( fs, node, "min_val1", 0. ); 1424 max_val1 = cvReadRealByName( fs, node, "max_val1", 1. ); 1425 1426 tparams_node = cvGetFileNodeByName( fs, node, "training_params" ); 1427 params = CvANN_MLP_TrainParams(); 1428 1429 if( tparams_node ) 1430 { 1431 const char* tmethod_name = cvReadStringByName( fs, tparams_node, "train_method", "" ); 1432 CvFileNode* tcrit_node; 1433 1434 if( strcmp( tmethod_name, "BACKPROP" ) == 0 ) 1435 { 1436 params.train_method = CvANN_MLP_TrainParams::BACKPROP; 1437 params.bp_dw_scale = cvReadRealByName( fs, tparams_node, "dw_scale", 0 ); 1438 params.bp_moment_scale = cvReadRealByName( fs, tparams_node, "moment_scale", 0 ); 1439 } 1440 else if( strcmp( tmethod_name, "RPROP" ) == 0 ) 1441 { 1442 params.train_method = CvANN_MLP_TrainParams::RPROP; 1443 params.rp_dw0 = cvReadRealByName( fs, tparams_node, "dw0", 0 ); 1444 params.rp_dw_plus = cvReadRealByName( fs, tparams_node, "dw_plus", 0 ); 1445 params.rp_dw_minus = cvReadRealByName( fs, tparams_node, "dw_minus", 0 ); 1446 params.rp_dw_min = cvReadRealByName( fs, tparams_node, "dw_min", 0 ); 1447 params.rp_dw_max = cvReadRealByName( fs, tparams_node, "dw_max", 0 ); 1448 } 1449 1450 tcrit_node = cvGetFileNodeByName( fs, tparams_node, "term_criteria" ); 1451 if( tcrit_node ) 1452 { 1453 params.term_crit.epsilon = cvReadRealByName( fs, tcrit_node, "epsilon", -1 ); 1454 params.term_crit.max_iter = cvReadIntByName( fs, tcrit_node, "iterations", -1 ); 1455 params.term_crit.type = (params.term_crit.epsilon >= 0 ? CV_TERMCRIT_EPS : 0) + 1456 (params.term_crit.max_iter >= 0 ? CV_TERMCRIT_ITER : 0); 1457 } 1458 } 1459 1460 __END__; 1461} 1462 1463 1464void CvANN_MLP::read( CvFileStorage* fs, CvFileNode* node ) 1465{ 1466 CvMat* _layer_sizes = 0; 1467 1468 CV_FUNCNAME( "CvANN_MLP::read" ); 1469 1470 __BEGIN__; 1471 1472 CvFileNode* w; 1473 CvSeqReader reader; 1474 int i, l_count; 1475 1476 _layer_sizes = (CvMat*)cvReadByName( fs, node, "layer_sizes" ); 1477 CV_CALL( create( _layer_sizes, SIGMOID_SYM, 0, 0 )); 1478 l_count = layer_sizes->cols; 1479 1480 CV_CALL( read_params( fs, node )); 1481 1482 w = cvGetFileNodeByName( fs, node, "input_scale" ); 1483 if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ || 1484 w->data.seq->total != layer_sizes->data.i[0]*2 ) 1485 CV_ERROR( CV_StsParseError, "input_scale tag is not found or is invalid" ); 1486 1487 CV_CALL( cvReadRawData( fs, w, weights[0], "d" )); 1488 1489 w = cvGetFileNodeByName( fs, node, "output_scale" ); 1490 if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ || 1491 w->data.seq->total != layer_sizes->data.i[l_count-1]*2 ) 1492 CV_ERROR( CV_StsParseError, "output_scale tag is not found or is invalid" ); 1493 1494 CV_CALL( cvReadRawData( fs, w, weights[l_count], "d" )); 1495 1496 w = cvGetFileNodeByName( fs, node, "inv_output_scale" ); 1497 if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ || 1498 w->data.seq->total != layer_sizes->data.i[l_count-1]*2 ) 1499 CV_ERROR( CV_StsParseError, "inv_output_scale tag is not found or is invalid" ); 1500 1501 CV_CALL( cvReadRawData( fs, w, weights[l_count+1], "d" )); 1502 1503 w = cvGetFileNodeByName( fs, node, "weights" ); 1504 if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ || 1505 w->data.seq->total != l_count - 1 ) 1506 CV_ERROR( CV_StsParseError, "weights tag is not found or is invalid" ); 1507 1508 cvStartReadSeq( w->data.seq, &reader ); 1509 1510 for( i = 1; i < l_count; i++ ) 1511 { 1512 w = (CvFileNode*)reader.ptr; 1513 CV_CALL( cvReadRawData( fs, w, weights[i], "d" )); 1514 CV_NEXT_SEQ_ELEM( reader.seq->elem_size, reader ); 1515 } 1516 1517 __END__; 1518} 1519 1520/* End of file. */ 1521