1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                        Intel License Agreement
11//
12// Copyright (C) 2000, Intel Corporation, all rights reserved.
13// Third party copyrights are property of their respective owners.
14//
15// Redistribution and use in source and binary forms, with or without modification,
16// are permitted provided that the following conditions are met:
17//
18//   * Redistribution's of source code must retain the above copyright notice,
19//     this list of conditions and the following disclaimer.
20//
21//   * Redistribution's in binary form must reproduce the above copyright notice,
22//     this list of conditions and the following disclaimer in the documentation
23//     and/or other materials provided with the distribution.
24//
25//   * The name of Intel Corporation may not be used to endorse or promote products
26//     derived from this software without specific prior written permission.
27//
28// This software is provided by the copyright holders and contributors "as is" and
29// any express or implied warranties, including, but not limited to, the implied
30// warranties of merchantability and fitness for a particular purpose are disclaimed.
31// In no event shall the Intel Corporation or contributors be liable for any direct,
32// indirect, incidental, special, exemplary, or consequential damages
33// (including, but not limited to, procurement of substitute goods or services;
34// loss of use, data, or profits; or business interruption) however caused
35// and on any theory of liability, whether in contract, strict liability,
36// or tort (including negligence or otherwise) arising in any way out of
37// the use of this software, even if advised of the possibility of such damage.
38//
39//M*/
40
41#include "_ml.h"
42
43CvANN_MLP_TrainParams::CvANN_MLP_TrainParams()
44{
45    term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 );
46    train_method = RPROP;
47    bp_dw_scale = bp_moment_scale = 0.1;
48    rp_dw0 = 0.1; rp_dw_plus = 1.2; rp_dw_minus = 0.5;
49    rp_dw_min = FLT_EPSILON; rp_dw_max = 50.;
50}
51
52
53CvANN_MLP_TrainParams::CvANN_MLP_TrainParams( CvTermCriteria _term_crit,
54                                              int _train_method,
55                                              double _param1, double _param2 )
56{
57    term_crit = _term_crit;
58    train_method = _train_method;
59    bp_dw_scale = bp_moment_scale = 0.1;
60    rp_dw0 = 1.; rp_dw_plus = 1.2; rp_dw_minus = 0.5;
61    rp_dw_min = FLT_EPSILON; rp_dw_max = 50.;
62
63    if( train_method == RPROP )
64    {
65        rp_dw0 = _param1;
66        if( rp_dw0 < FLT_EPSILON )
67            rp_dw0 = 1.;
68        rp_dw_min = _param2;
69        rp_dw_min = MAX( rp_dw_min, 0 );
70    }
71    else if( train_method == BACKPROP )
72    {
73        bp_dw_scale = _param1;
74        if( bp_dw_scale <= 0 )
75            bp_dw_scale = 0.1;
76        bp_dw_scale = MAX( bp_dw_scale, 1e-3 );
77        bp_dw_scale = MIN( bp_dw_scale, 1 );
78        bp_moment_scale = _param2;
79        if( bp_moment_scale < 0 )
80            bp_moment_scale = 0.1;
81        bp_moment_scale = MIN( bp_moment_scale, 1 );
82    }
83    else
84        train_method = RPROP;
85}
86
87
88CvANN_MLP_TrainParams::~CvANN_MLP_TrainParams()
89{
90}
91
92
93CvANN_MLP::CvANN_MLP()
94{
95    layer_sizes = wbuf = 0;
96    min_val = max_val = min_val1 = max_val1 = 0.;
97    weights = 0;
98    rng = cvRNG(-1);
99    default_model_name = "my_nn";
100    clear();
101}
102
103
104CvANN_MLP::CvANN_MLP( const CvMat* _layer_sizes,
105                      int _activ_func,
106                      double _f_param1, double _f_param2 )
107{
108    layer_sizes = wbuf = 0;
109    min_val = max_val = min_val1 = max_val1 = 0.;
110    weights = 0;
111    rng = cvRNG(-1);
112    default_model_name = "my_nn";
113    create( _layer_sizes, _activ_func, _f_param1, _f_param2 );
114}
115
116
117CvANN_MLP::~CvANN_MLP()
118{
119    clear();
120}
121
122
123void CvANN_MLP::clear()
124{
125    cvReleaseMat( &layer_sizes );
126    cvReleaseMat( &wbuf );
127    cvFree( &weights );
128    activ_func = SIGMOID_SYM;
129    f_param1 = f_param2 = 1;
130    max_buf_sz = 1 << 12;
131}
132
133
134void CvANN_MLP::set_activ_func( int _activ_func, double _f_param1, double _f_param2 )
135{
136    CV_FUNCNAME( "CvANN_MLP::set_activ_func" );
137
138    __BEGIN__;
139
140    if( _activ_func < 0 || _activ_func > GAUSSIAN )
141        CV_ERROR( CV_StsOutOfRange, "Unknown activation function" );
142
143    activ_func = _activ_func;
144
145    switch( activ_func )
146    {
147    case SIGMOID_SYM:
148        max_val = 0.95; min_val = -max_val;
149        max_val1 = 0.98; min_val1 = -max_val1;
150        if( fabs(_f_param1) < FLT_EPSILON )
151            _f_param1 = 2./3;
152        if( fabs(_f_param2) < FLT_EPSILON )
153            _f_param2 = 1.7159;
154        break;
155    case GAUSSIAN:
156        max_val = 1.; min_val = 0.05;
157        max_val1 = 1.; min_val1 = 0.02;
158        if( fabs(_f_param1) < FLT_EPSILON )
159            _f_param1 = 1.;
160        if( fabs(_f_param2) < FLT_EPSILON )
161            _f_param2 = 1.;
162        break;
163    default:
164        min_val = max_val = min_val1 = max_val1 = 0.;
165        _f_param1 = 1.;
166        _f_param2 = 0.;
167    }
168
169    f_param1 = _f_param1;
170    f_param2 = _f_param2;
171
172    __END__;
173}
174
175
176void CvANN_MLP::init_weights()
177{
178    int i, j, k;
179
180    for( i = 1; i < layer_sizes->cols; i++ )
181    {
182        int n1 = layer_sizes->data.i[i-1];
183        int n2 = layer_sizes->data.i[i];
184        double val = 0, G = n2 > 2 ? 0.7*pow((double)n1,1./(n2-1)) : 1.;
185        double* w = weights[i];
186
187        // initialize weights using Nguyen-Widrow algorithm
188        for( j = 0; j < n2; j++ )
189        {
190            double s = 0;
191            for( k = 0; k <= n1; k++ )
192            {
193                val = cvRandReal(&rng)*2-1.;
194                w[k*n2 + j] = val;
195                s += val;
196            }
197
198            if( i < layer_sizes->cols - 1 )
199            {
200                s = 1./(s - val);
201                for( k = 0; k <= n1; k++ )
202                    w[k*n2 + j] *= s;
203                w[n1*n2 + j] *= G*(-1+j*2./n2);
204            }
205        }
206    }
207}
208
209
210void CvANN_MLP::create( const CvMat* _layer_sizes, int _activ_func,
211                        double _f_param1, double _f_param2 )
212{
213    CV_FUNCNAME( "CvANN_MLP::create" );
214
215    __BEGIN__;
216
217    int i, l_step, l_count, buf_sz = 0;
218    int *l_src, *l_dst;
219
220    clear();
221
222    if( !CV_IS_MAT(_layer_sizes) ||
223        _layer_sizes->cols != 1 && _layer_sizes->rows != 1 ||
224        CV_MAT_TYPE(_layer_sizes->type) != CV_32SC1 )
225        CV_ERROR( CV_StsBadArg,
226        "The array of layer neuron counters must be an integer vector" );
227
228    CV_CALL( set_activ_func( _activ_func, _f_param1, _f_param2 ));
229
230    l_count = _layer_sizes->rows + _layer_sizes->cols - 1;
231    l_src = _layer_sizes->data.i;
232    l_step = CV_IS_MAT_CONT(_layer_sizes->type) ? 1 :
233                _layer_sizes->step / sizeof(l_src[0]);
234    CV_CALL( layer_sizes = cvCreateMat( 1, l_count, CV_32SC1 ));
235    l_dst = layer_sizes->data.i;
236
237    max_count = 0;
238    for( i = 0; i < l_count; i++ )
239    {
240        int n = l_src[i*l_step];
241        if( n < 1 + (0 < i && i < l_count-1))
242            CV_ERROR( CV_StsOutOfRange,
243            "there should be at least one input and one output "
244            "and every hidden layer must have more than 1 neuron" );
245        l_dst[i] = n;
246        max_count = MAX( max_count, n );
247        if( i > 0 )
248            buf_sz += (l_dst[i-1]+1)*n;
249    }
250
251    buf_sz += (l_dst[0] + l_dst[l_count-1]*2)*2;
252
253    CV_CALL( wbuf = cvCreateMat( 1, buf_sz, CV_64F ));
254    CV_CALL( weights = (double**)cvAlloc( (l_count+1)*sizeof(weights[0]) ));
255
256    weights[0] = wbuf->data.db;
257    weights[1] = weights[0] + l_dst[0]*2;
258    for( i = 1; i < l_count; i++ )
259        weights[i+1] = weights[i] + (l_dst[i-1] + 1)*l_dst[i];
260    weights[l_count+1] = weights[l_count] + l_dst[l_count-1]*2;
261
262    __END__;
263}
264
265
266float CvANN_MLP::predict( const CvMat* _inputs, CvMat* _outputs ) const
267{
268    CV_FUNCNAME( "CvANN_MLP::predict" );
269
270    __BEGIN__;
271
272    double* buf;
273    int i, j, n, dn = 0, l_count, dn0, buf_sz, min_buf_sz;
274
275    if( !layer_sizes )
276        CV_ERROR( CV_StsError, "The network has not been initialized" );
277
278    if( !CV_IS_MAT(_inputs) || !CV_IS_MAT(_outputs) ||
279        !CV_ARE_TYPES_EQ(_inputs,_outputs) ||
280        CV_MAT_TYPE(_inputs->type) != CV_32FC1 &&
281        CV_MAT_TYPE(_inputs->type) != CV_64FC1 ||
282        _inputs->rows != _outputs->rows )
283        CV_ERROR( CV_StsBadArg, "Both input and output must be floating-point matrices "
284                                "of the same type and have the same number of rows" );
285
286    if( _inputs->cols != layer_sizes->data.i[0] )
287        CV_ERROR( CV_StsBadSize, "input matrix must have the same number of columns as "
288                                 "the number of neurons in the input layer" );
289
290    if( _outputs->cols != layer_sizes->data.i[layer_sizes->cols - 1] )
291        CV_ERROR( CV_StsBadSize, "output matrix must have the same number of columns as "
292                                 "the number of neurons in the output layer" );
293    n = dn0 = _inputs->rows;
294    min_buf_sz = 2*max_count;
295    buf_sz = n*min_buf_sz;
296
297    if( buf_sz > max_buf_sz )
298    {
299        dn0 = max_buf_sz/min_buf_sz;
300        dn0 = MAX( dn0, 1 );
301        buf_sz = dn0*min_buf_sz;
302    }
303
304    buf = (double*)cvStackAlloc( buf_sz*sizeof(buf[0]) );
305    l_count = layer_sizes->cols;
306
307    for( i = 0; i < n; i += dn )
308    {
309        CvMat hdr[2], _w, *layer_in = &hdr[0], *layer_out = &hdr[1], *temp;
310        dn = MIN( dn0, n - i );
311
312        cvGetRows( _inputs, layer_in, i, i + dn );
313        cvInitMatHeader( layer_out, dn, layer_in->cols, CV_64F, buf );
314
315        scale_input( layer_in, layer_out );
316        CV_SWAP( layer_in, layer_out, temp );
317
318        for( j = 1; j < l_count; j++ )
319        {
320            double* data = buf + (j&1 ? max_count*dn0 : 0);
321            int cols = layer_sizes->data.i[j];
322
323            cvInitMatHeader( layer_out, dn, cols, CV_64F, data );
324            cvInitMatHeader( &_w, layer_in->cols, layer_out->cols, CV_64F, weights[j] );
325            cvGEMM( layer_in, &_w, 1, 0, 0, layer_out );
326            calc_activ_func( layer_out, _w.data.db + _w.rows*_w.cols );
327
328            CV_SWAP( layer_in, layer_out, temp );
329        }
330
331        cvGetRows( _outputs, layer_out, i, i + dn );
332        scale_output( layer_in, layer_out );
333    }
334
335    __END__;
336
337    return 0.f;
338}
339
340
341void CvANN_MLP::scale_input( const CvMat* _src, CvMat* _dst ) const
342{
343    int i, j, cols = _src->cols;
344    double* dst = _dst->data.db;
345    const double* w = weights[0];
346    int step = _src->step;
347
348    if( CV_MAT_TYPE( _src->type ) == CV_32F )
349    {
350        const float* src = _src->data.fl;
351        step /= sizeof(src[0]);
352
353        for( i = 0; i < _src->rows; i++, src += step, dst += cols )
354            for( j = 0; j < cols; j++ )
355                dst[j] = src[j]*w[j*2] + w[j*2+1];
356    }
357    else
358    {
359        const double* src = _src->data.db;
360        step /= sizeof(src[0]);
361
362        for( i = 0; i < _src->rows; i++, src += step, dst += cols )
363            for( j = 0; j < cols; j++ )
364                dst[j] = src[j]*w[j*2] + w[j*2+1];
365    }
366}
367
368
369void CvANN_MLP::scale_output( const CvMat* _src, CvMat* _dst ) const
370{
371    int i, j, cols = _src->cols;
372    const double* src = _src->data.db;
373    const double* w = weights[layer_sizes->cols];
374    int step = _dst->step;
375
376    if( CV_MAT_TYPE( _dst->type ) == CV_32F )
377    {
378        float* dst = _dst->data.fl;
379        step /= sizeof(dst[0]);
380
381        for( i = 0; i < _src->rows; i++, src += cols, dst += step )
382            for( j = 0; j < cols; j++ )
383                dst[j] = (float)(src[j]*w[j*2] + w[j*2+1]);
384    }
385    else
386    {
387        double* dst = _dst->data.db;
388        step /= sizeof(dst[0]);
389
390        for( i = 0; i < _src->rows; i++, src += cols, dst += step )
391            for( j = 0; j < cols; j++ )
392                dst[j] = src[j]*w[j*2] + w[j*2+1];
393    }
394}
395
396
397void CvANN_MLP::calc_activ_func( CvMat* sums, const double* bias ) const
398{
399    int i, j, n = sums->rows, cols = sums->cols;
400    double* data = sums->data.db;
401    double scale = 0, scale2 = f_param2;
402
403    switch( activ_func )
404    {
405    case IDENTITY:
406        scale = 1.;
407        break;
408    case SIGMOID_SYM:
409        scale = -f_param1;
410        break;
411    case GAUSSIAN:
412        scale = -f_param1*f_param1;
413        break;
414    default:
415        ;
416    }
417
418    assert( CV_IS_MAT_CONT(sums->type) );
419
420    if( activ_func != GAUSSIAN )
421    {
422        for( i = 0; i < n; i++, data += cols )
423            for( j = 0; j < cols; j++ )
424                data[j] = (data[j] + bias[j])*scale;
425
426        if( activ_func == IDENTITY )
427            return;
428    }
429    else
430    {
431        for( i = 0; i < n; i++, data += cols )
432            for( j = 0; j < cols; j++ )
433            {
434                double t = data[j] + bias[j];
435                data[j] = t*t*scale;
436            }
437    }
438
439    cvExp( sums, sums );
440
441    n *= cols;
442    data -= n;
443
444    switch( activ_func )
445    {
446    case SIGMOID_SYM:
447        for( i = 0; i <= n - 4; i += 4 )
448        {
449            double x0 = 1.+data[i], x1 = 1.+data[i+1], x2 = 1.+data[i+2], x3 = 1.+data[i+3];
450            double a = x0*x1, b = x2*x3, d = scale2/(a*b), t0, t1;
451            a *= d; b *= d;
452            t0 = (2 - x0)*b*x1; t1 = (2 - x1)*b*x0;
453            data[i] = t0; data[i+1] = t1;
454            t0 = (2 - x2)*a*x3; t1 = (2 - x3)*a*x2;
455            data[i+2] = t0; data[i+3] = t1;
456        }
457
458        for( ; i < n; i++ )
459        {
460            double t = scale2*(1. - data[i])/(1. + data[i]);
461            data[i] = t;
462        }
463        break;
464
465    case GAUSSIAN:
466        for( i = 0; i < n; i++ )
467            data[i] = scale2*data[i];
468        break;
469
470    default:
471        ;
472    }
473}
474
475
476void CvANN_MLP::calc_activ_func_deriv( CvMat* _xf, CvMat* _df,
477                                       const double* bias ) const
478{
479    int i, j, n = _xf->rows, cols = _xf->cols;
480    double* xf = _xf->data.db;
481    double* df = _df->data.db;
482    double scale, scale2 = f_param2;
483    assert( CV_IS_MAT_CONT( _xf->type & _df->type ) );
484
485    if( activ_func == IDENTITY )
486    {
487        for( i = 0; i < n; i++, xf += cols, df += cols )
488            for( j = 0; j < cols; j++ )
489            {
490                xf[j] += bias[j];
491                df[j] = 1;
492            }
493        return;
494    }
495    else if( activ_func == GAUSSIAN )
496    {
497        scale = -f_param1*f_param1;
498        scale2 *= scale;
499        for( i = 0; i < n; i++, xf += cols, df += cols )
500            for( j = 0; j < cols; j++ )
501            {
502                double t = xf[j] + bias[j];
503                df[j] = t*2*scale2;
504                xf[j] = t*t*scale;
505            }
506    }
507    else
508    {
509        scale = -f_param1;
510        for( i = 0; i < n; i++, xf += cols, df += cols )
511            for( j = 0; j < cols; j++ )
512                xf[j] = (xf[j] + bias[j])*scale;
513    }
514
515    cvExp( _xf, _xf );
516
517    n *= cols;
518    xf -= n; df -= n;
519
520    // ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax);
521    // ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2=
522    // 2*a*exp(-ax)/(1+exp(-ax))^2
523    switch( activ_func )
524    {
525    case SIGMOID_SYM:
526        scale *= -2*f_param2;
527        for( i = 0; i <= n - 4; i += 4 )
528        {
529            double x0 = 1.+xf[i], x1 = 1.+xf[i+1], x2 = 1.+xf[i+2], x3 = 1.+xf[i+3];
530            double a = x0*x1, b = x2*x3, d = 1./(a*b), t0, t1;
531            a *= d; b *= d;
532
533            t0 = b*x1; t1 = b*x0;
534            df[i] = scale*xf[i]*t0*t0;
535            df[i+1] = scale*xf[i+1]*t1*t1;
536            t0 *= scale2*(2 - x0); t1 *= scale2*(2 - x1);
537            xf[i] = t0; xf[i+1] = t1;
538
539            t0 = a*x3; t1 = a*x2;
540            df[i+2] = scale*xf[i+2]*t0*t0;
541            df[i+3] = scale*xf[i+3]*t1*t1;
542            t0 *= scale2*(2 - x2); t1 *= scale2*(2 - x3);
543            xf[i+2] = t0; xf[i+3] = t1;
544        }
545
546        for( ; i < n; i++ )
547        {
548            double t0 = 1./(1. + xf[i]);
549            double t1 = scale*xf[i]*t0*t0;
550            t0 *= scale2*(1. - xf[i]);
551            df[i] = t1;
552            xf[i] = t0;
553        }
554        break;
555
556    case GAUSSIAN:
557        for( i = 0; i < n; i++ )
558            df[i] *= xf[i];
559        break;
560    default:
561        ;
562    }
563}
564
565
566void CvANN_MLP::calc_input_scale( const CvVectors* vecs, int flags )
567{
568    bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
569    bool no_scale = (flags & NO_INPUT_SCALE) != 0;
570    double* scale = weights[0];
571    int count = vecs->count;
572
573    if( reset_weights )
574    {
575        int i, j, vcount = layer_sizes->data.i[0];
576        int type = vecs->type;
577        double a = no_scale ? 1. : 0.;
578
579        for( j = 0; j < vcount; j++ )
580            scale[2*j] = a, scale[j*2+1] = 0.;
581
582        if( no_scale )
583            return;
584
585        for( i = 0; i < count; i++ )
586        {
587            const float* f = vecs->data.fl[i];
588            const double* d = vecs->data.db[i];
589            for( j = 0; j < vcount; j++ )
590            {
591                double t = type == CV_32F ? (double)f[j] : d[j];
592                scale[j*2] += t;
593                scale[j*2+1] += t*t;
594            }
595        }
596
597        for( j = 0; j < vcount; j++ )
598        {
599            double s = scale[j*2], s2 = scale[j*2+1];
600            double m = s/count, sigma2 = s2/count - m*m;
601            scale[j*2] = sigma2 < DBL_EPSILON ? 1 : 1./sqrt(sigma2);
602            scale[j*2+1] = -m*scale[j*2];
603        }
604    }
605}
606
607
608void CvANN_MLP::calc_output_scale( const CvVectors* vecs, int flags )
609{
610    int i, j, vcount = layer_sizes->data.i[layer_sizes->cols-1];
611    int type = vecs->type;
612    double m = min_val, M = max_val, m1 = min_val1, M1 = max_val1;
613    bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
614    bool no_scale = (flags & NO_OUTPUT_SCALE) != 0;
615    int l_count = layer_sizes->cols;
616    double* scale = weights[l_count];
617    double* inv_scale = weights[l_count+1];
618    int count = vecs->count;
619
620    CV_FUNCNAME( "CvANN_MLP::calc_output_scale" );
621
622    __BEGIN__;
623
624    if( reset_weights )
625    {
626        double a0 = no_scale ? 1 : DBL_MAX, b0 = no_scale ? 0 : -DBL_MAX;
627
628        for( j = 0; j < vcount; j++ )
629        {
630            scale[2*j] = inv_scale[2*j] = a0;
631            scale[j*2+1] = inv_scale[2*j+1] = b0;
632        }
633
634        if( no_scale )
635            EXIT;
636    }
637
638    for( i = 0; i < count; i++ )
639    {
640        const float* f = vecs->data.fl[i];
641        const double* d = vecs->data.db[i];
642
643        for( j = 0; j < vcount; j++ )
644        {
645            double t = type == CV_32F ? (double)f[j] : d[j];
646
647            if( reset_weights )
648            {
649                double mj = scale[j*2], Mj = scale[j*2+1];
650                if( mj > t ) mj = t;
651                if( Mj < t ) Mj = t;
652
653                scale[j*2] = mj;
654                scale[j*2+1] = Mj;
655            }
656            else
657            {
658                t = t*scale[j*2] + scale[2*j+1];
659                if( t < m1 || t > M1 )
660                    CV_ERROR( CV_StsOutOfRange,
661                    "Some of new output training vector components run exceed the original range too much" );
662            }
663        }
664    }
665
666    if( reset_weights )
667        for( j = 0; j < vcount; j++ )
668        {
669            // map mj..Mj to m..M
670            double mj = scale[j*2], Mj = scale[j*2+1];
671            double a, b;
672            double delta = Mj - mj;
673            if( delta < DBL_EPSILON )
674                a = 1, b = (M + m - Mj - mj)*0.5;
675            else
676                a = (M - m)/delta, b = m - mj*a;
677            inv_scale[j*2] = a; inv_scale[j*2+1] = b;
678            a = 1./a; b = -b*a;
679            scale[j*2] = a; scale[j*2+1] = b;
680        }
681
682    __END__;
683}
684
685
686bool CvANN_MLP::prepare_to_train( const CvMat* _inputs, const CvMat* _outputs,
687            const CvMat* _sample_weights, const CvMat* _sample_idx,
688            CvVectors* _ivecs, CvVectors* _ovecs, double** _sw, int _flags )
689{
690    bool ok = false;
691    CvMat* sample_idx = 0;
692    CvVectors ivecs, ovecs;
693    double* sw = 0;
694    int count = 0;
695
696    CV_FUNCNAME( "CvANN_MLP::prepare_to_train" );
697
698    ivecs.data.ptr = ovecs.data.ptr = 0;
699    assert( _ivecs && _ovecs );
700
701    __BEGIN__;
702
703    const int* sidx = 0;
704    int i, sw_type = 0, sw_count = 0;
705    int sw_step = 0;
706    double sw_sum = 0;
707
708    if( !layer_sizes )
709        CV_ERROR( CV_StsError,
710        "The network has not been created. Use method create or the appropriate constructor" );
711
712    if( !CV_IS_MAT(_inputs) || CV_MAT_TYPE(_inputs->type) != CV_32FC1 &&
713        CV_MAT_TYPE(_inputs->type) != CV_64FC1 || _inputs->cols != layer_sizes->data.i[0] )
714        CV_ERROR( CV_StsBadArg,
715        "input training data should be a floating-point matrix with"
716        "the number of rows equal to the number of training samples and "
717        "the number of columns equal to the size of 0-th (input) layer" );
718
719    if( !CV_IS_MAT(_outputs) || CV_MAT_TYPE(_outputs->type) != CV_32FC1 &&
720        CV_MAT_TYPE(_outputs->type) != CV_64FC1 ||
721        _outputs->cols != layer_sizes->data.i[layer_sizes->cols - 1] )
722        CV_ERROR( CV_StsBadArg,
723        "output training data should be a floating-point matrix with"
724        "the number of rows equal to the number of training samples and "
725        "the number of columns equal to the size of last (output) layer" );
726
727    if( _inputs->rows != _outputs->rows )
728        CV_ERROR( CV_StsUnmatchedSizes, "The numbers of input and output samples do not match" );
729
730    if( _sample_idx )
731    {
732        CV_CALL( sample_idx = cvPreprocessIndexArray( _sample_idx, _inputs->rows ));
733        sidx = sample_idx->data.i;
734        count = sample_idx->cols + sample_idx->rows - 1;
735    }
736    else
737        count = _inputs->rows;
738
739    if( _sample_weights )
740    {
741        if( !CV_IS_MAT(_sample_weights) )
742            CV_ERROR( CV_StsBadArg, "sample_weights (if passed) must be a valid matrix" );
743
744        sw_type = CV_MAT_TYPE(_sample_weights->type);
745        sw_count = _sample_weights->cols + _sample_weights->rows - 1;
746
747        if( sw_type != CV_32FC1 && sw_type != CV_64FC1 ||
748            _sample_weights->cols != 1 && _sample_weights->rows != 1 ||
749            sw_count != count && sw_count != _inputs->rows )
750            CV_ERROR( CV_StsBadArg,
751            "sample_weights must be 1d floating-point vector containing weights "
752            "of all or selected training samples" );
753
754        sw_step = CV_IS_MAT_CONT(_sample_weights->type) ? 1 :
755            _sample_weights->step/CV_ELEM_SIZE(sw_type);
756
757        CV_CALL( sw = (double*)cvAlloc( count*sizeof(sw[0]) ));
758    }
759
760    CV_CALL( ivecs.data.ptr = (uchar**)cvAlloc( count*sizeof(ivecs.data.ptr[0]) ));
761    CV_CALL( ovecs.data.ptr = (uchar**)cvAlloc( count*sizeof(ovecs.data.ptr[0]) ));
762
763    ivecs.type = CV_MAT_TYPE(_inputs->type);
764    ovecs.type = CV_MAT_TYPE(_outputs->type);
765    ivecs.count = ovecs.count = count;
766
767    for( i = 0; i < count; i++ )
768    {
769        int idx = sidx ? sidx[i] : i;
770        ivecs.data.ptr[i] = _inputs->data.ptr + idx*_inputs->step;
771        ovecs.data.ptr[i] = _outputs->data.ptr + idx*_outputs->step;
772        if( sw )
773        {
774            int si = sw_count == count ? i : idx;
775            double w = sw_type == CV_32FC1 ?
776                (double)_sample_weights->data.fl[si*sw_step] :
777                _sample_weights->data.db[si*sw_step];
778            sw[i] = w;
779            if( w < 0 )
780                CV_ERROR( CV_StsOutOfRange, "some of sample weights are negative" );
781            sw_sum += w;
782        }
783    }
784
785    // normalize weights
786    if( sw )
787    {
788        sw_sum = sw_sum > DBL_EPSILON ? 1./sw_sum : 0;
789        for( i = 0; i < count; i++ )
790            sw[i] *= sw_sum;
791    }
792
793    calc_input_scale( &ivecs, _flags );
794    CV_CALL( calc_output_scale( &ovecs, _flags ));
795
796    ok = true;
797
798    __END__;
799
800    if( !ok )
801    {
802        cvFree( &ivecs.data.ptr );
803        cvFree( &ovecs.data.ptr );
804        cvFree( &sw );
805    }
806
807    cvReleaseMat( &sample_idx );
808    *_ivecs = ivecs;
809    *_ovecs = ovecs;
810    *_sw = sw;
811
812    return ok;
813}
814
815
816int CvANN_MLP::train( const CvMat* _inputs, const CvMat* _outputs,
817                      const CvMat* _sample_weights, const CvMat* _sample_idx,
818                      CvANN_MLP_TrainParams _params, int flags )
819{
820    const int MAX_ITER = 1000;
821    const double DEFAULT_EPSILON = FLT_EPSILON;
822
823    double* sw = 0;
824    CvVectors x0, u;
825    int iter = -1;
826
827    x0.data.ptr = u.data.ptr = 0;
828
829    CV_FUNCNAME( "CvANN_MLP::train" );
830
831    __BEGIN__;
832
833    int max_iter;
834    double epsilon;
835
836    params = _params;
837
838    // initialize training data
839    CV_CALL( prepare_to_train( _inputs, _outputs, _sample_weights,
840                               _sample_idx, &x0, &u, &sw, flags ));
841
842    // ... and link weights
843    if( !(flags & UPDATE_WEIGHTS) )
844        init_weights();
845
846    max_iter = params.term_crit.type & CV_TERMCRIT_ITER ? params.term_crit.max_iter : MAX_ITER;
847    max_iter = MIN( max_iter, MAX_ITER );
848    max_iter = MAX( max_iter, 1 );
849
850    epsilon = params.term_crit.type & CV_TERMCRIT_EPS ? params.term_crit.epsilon : DEFAULT_EPSILON;
851    epsilon = MAX(epsilon, DBL_EPSILON);
852
853    params.term_crit.type = CV_TERMCRIT_ITER + CV_TERMCRIT_EPS;
854    params.term_crit.max_iter = max_iter;
855    params.term_crit.epsilon = epsilon;
856
857    if( params.train_method == CvANN_MLP_TrainParams::BACKPROP )
858    {
859        CV_CALL( iter = train_backprop( x0, u, sw ));
860    }
861    else
862    {
863        CV_CALL( iter = train_rprop( x0, u, sw ));
864    }
865
866    __END__;
867
868    cvFree( &x0.data.ptr );
869    cvFree( &u.data.ptr );
870    cvFree( &sw );
871
872    return iter;
873}
874
875
876int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw )
877{
878    CvMat* dw = 0;
879    CvMat* buf = 0;
880    double **x = 0, **df = 0;
881    CvMat* _idx = 0;
882    int iter = -1, count = x0.count;
883
884    CV_FUNCNAME( "CvANN_MLP::train_backprop" );
885
886    __BEGIN__;
887
888    int i, j, k, ivcount, ovcount, l_count, total = 0, max_iter;
889    double *buf_ptr;
890    double prev_E = DBL_MAX*0.5, E = 0, epsilon;
891
892    max_iter = params.term_crit.max_iter*count;
893    epsilon = params.term_crit.epsilon*count;
894
895    l_count = layer_sizes->cols;
896    ivcount = layer_sizes->data.i[0];
897    ovcount = layer_sizes->data.i[l_count-1];
898
899    // allocate buffers
900    for( i = 0; i < l_count; i++ )
901        total += layer_sizes->data.i[i] + 1;
902
903    CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
904    cvZero( dw );
905    CV_CALL( buf = cvCreateMat( 1, (total + max_count)*2, CV_64F ));
906    CV_CALL( _idx = cvCreateMat( 1, count, CV_32SC1 ));
907    for( i = 0; i < count; i++ )
908        _idx->data.i[i] = i;
909
910    CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) ));
911    df = x + total;
912    buf_ptr = buf->data.db;
913
914    for( j = 0; j < l_count; j++ )
915    {
916        x[j] = buf_ptr;
917        df[j] = x[j] + layer_sizes->data.i[j];
918        buf_ptr += (df[j] - x[j])*2;
919    }
920
921    // run back-propagation loop
922    /*
923        y_i = w_i*x_{i-1}
924        x_i = f(y_i)
925        E = 1/2*||u - x_N||^2
926        grad_N = (x_N - u)*f'(y_i)
927        dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i
928        w_i(t+1) = w_i(t) + dw_i(t)
929        grad_{i-1} = w_i^t*grad_i
930    */
931    for( iter = 0; iter < max_iter; iter++ )
932    {
933        int idx = iter % count;
934        double* w = weights[0];
935        double sweight = sw ? count*sw[idx] : 1.;
936        CvMat _w, _dw, hdr1, hdr2, ghdr1, ghdr2, _df;
937        CvMat *x1 = &hdr1, *x2 = &hdr2, *grad1 = &ghdr1, *grad2 = &ghdr2, *temp;
938
939        if( idx == 0 )
940        {
941            if( fabs(prev_E - E) < epsilon )
942                break;
943            prev_E = E;
944            E = 0;
945
946            // shuffle indices
947            for( i = 0; i < count; i++ )
948            {
949                int tt;
950                j = (unsigned)cvRandInt(&rng) % count;
951                k = (unsigned)cvRandInt(&rng) % count;
952                CV_SWAP( _idx->data.i[j], _idx->data.i[k], tt );
953            }
954        }
955
956        idx = _idx->data.i[idx];
957
958        if( x0.type == CV_32F )
959        {
960            const float* x0data = x0.data.fl[idx];
961            for( j = 0; j < ivcount; j++ )
962                x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1];
963        }
964        else
965        {
966            const double* x0data = x0.data.db[idx];
967            for( j = 0; j < ivcount; j++ )
968                x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1];
969        }
970
971        cvInitMatHeader( x1, 1, ivcount, CV_64F, x[0] );
972
973        // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
974        for( i = 1; i < l_count; i++ )
975        {
976            cvInitMatHeader( x2, 1, layer_sizes->data.i[i], CV_64F, x[i] );
977            cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] );
978            cvGEMM( x1, &_w, 1, 0, 0, x2 );
979            _df = *x2;
980            _df.data.db = df[i];
981            calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols );
982            CV_SWAP( x1, x2, temp );
983        }
984
985        cvInitMatHeader( grad1, 1, ovcount, CV_64F, buf_ptr );
986        *grad2 = *grad1;
987        grad2->data.db = buf_ptr + max_count;
988
989        w = weights[l_count+1];
990
991        // calculate error
992        if( u.type == CV_32F )
993        {
994            const float* udata = u.data.fl[idx];
995            for( k = 0; k < ovcount; k++ )
996            {
997                double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k];
998                grad1->data.db[k] = t*sweight;
999                E += t*t;
1000            }
1001        }
1002        else
1003        {
1004            const double* udata = u.data.db[idx];
1005            for( k = 0; k < ovcount; k++ )
1006            {
1007                double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k];
1008                grad1->data.db[k] = t*sweight;
1009                E += t*t;
1010            }
1011        }
1012        E *= sweight;
1013
1014        // backward pass, update weights
1015        for( i = l_count-1; i > 0; i-- )
1016        {
1017            int n1 = layer_sizes->data.i[i-1], n2 = layer_sizes->data.i[i];
1018            cvInitMatHeader( &_df, 1, n2, CV_64F, df[i] );
1019            cvMul( grad1, &_df, grad1 );
1020            cvInitMatHeader( &_w, n1+1, n2, CV_64F, weights[i] );
1021            cvInitMatHeader( &_dw, n1+1, n2, CV_64F, dw->data.db + (weights[i] - weights[0]) );
1022            cvInitMatHeader( x1, n1+1, 1, CV_64F, x[i-1] );
1023            x[i-1][n1] = 1.;
1024            cvGEMM( x1, grad1, params.bp_dw_scale, &_dw, params.bp_moment_scale, &_dw );
1025            cvAdd( &_w, &_dw, &_w );
1026            if( i > 1 )
1027            {
1028                grad2->cols = n1;
1029                _w.rows = n1;
1030                cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
1031            }
1032            CV_SWAP( grad1, grad2, temp );
1033        }
1034    }
1035
1036    iter /= count;
1037
1038    __END__;
1039
1040    cvReleaseMat( &dw );
1041    cvReleaseMat( &buf );
1042    cvReleaseMat( &_idx );
1043    cvFree( &x );
1044
1045    return iter;
1046}
1047
1048
1049int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw )
1050{
1051    const int max_buf_sz = 1 << 16;
1052    CvMat* dw = 0;
1053    CvMat* dEdw = 0;
1054    CvMat* prev_dEdw_sign = 0;
1055    CvMat* buf = 0;
1056    double **x = 0, **df = 0;
1057    int iter = -1, count = x0.count;
1058
1059    CV_FUNCNAME( "CvANN_MLP::train" );
1060
1061    __BEGIN__;
1062
1063    int i, ivcount, ovcount, l_count, total = 0, max_iter, buf_sz, dcount0, dcount=0;
1064    double *buf_ptr;
1065    double prev_E = DBL_MAX*0.5, epsilon;
1066    double dw_plus, dw_minus, dw_min, dw_max;
1067    double inv_count;
1068
1069    max_iter = params.term_crit.max_iter;
1070    epsilon = params.term_crit.epsilon;
1071    dw_plus = params.rp_dw_plus;
1072    dw_minus = params.rp_dw_minus;
1073    dw_min = params.rp_dw_min;
1074    dw_max = params.rp_dw_max;
1075
1076    l_count = layer_sizes->cols;
1077    ivcount = layer_sizes->data.i[0];
1078    ovcount = layer_sizes->data.i[l_count-1];
1079
1080    // allocate buffers
1081    for( i = 0; i < l_count; i++ )
1082        total += layer_sizes->data.i[i];
1083
1084    CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
1085    cvSet( dw, cvScalarAll(params.rp_dw0) );
1086    CV_CALL( dEdw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
1087    cvZero( dEdw );
1088    CV_CALL( prev_dEdw_sign = cvCreateMat( wbuf->rows, wbuf->cols, CV_8SC1 ));
1089    cvZero( prev_dEdw_sign );
1090
1091    inv_count = 1./count;
1092    dcount0 = max_buf_sz/(2*total);
1093    dcount0 = MAX( dcount0, 1 );
1094    dcount0 = MIN( dcount0, count );
1095    buf_sz = dcount0*(total + max_count)*2;
1096
1097    CV_CALL( buf = cvCreateMat( 1, buf_sz, CV_64F ));
1098
1099    CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) ));
1100    df = x + total;
1101    buf_ptr = buf->data.db;
1102
1103    for( i = 0; i < l_count; i++ )
1104    {
1105        x[i] = buf_ptr;
1106        df[i] = x[i] + layer_sizes->data.i[i]*dcount0;
1107        buf_ptr += (df[i] - x[i])*2;
1108    }
1109
1110    // run rprop loop
1111    /*
1112        y_i(t) = w_i(t)*x_{i-1}(t)
1113        x_i(t) = f(y_i(t))
1114        E = sum_over_all_samples(1/2*||u - x_N||^2)
1115        grad_N = (x_N - u)*f'(y_i)
1116
1117                      MIN(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0
1118        dw_i{jk}(t) = MAX(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0
1119                      dw_i{jk}(t-1) else
1120
1121        if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0)
1122           dE/dw_i{jk}(t)<-0
1123        else
1124           w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t)
1125        grad_{i-1}(t) = w_i^t(t)*grad_i(t)
1126    */
1127    for( iter = 0; iter < max_iter; iter++ )
1128    {
1129        int n1, n2, si, j, k;
1130        double* w;
1131        CvMat _w, _dEdw, hdr1, hdr2, ghdr1, ghdr2, _df;
1132        CvMat *x1, *x2, *grad1, *grad2, *temp;
1133        double E = 0;
1134
1135        // first, iterate through all the samples and compute dEdw
1136        for( si = 0; si < count; si += dcount )
1137        {
1138            dcount = MIN( count - si, dcount0 );
1139            w = weights[0];
1140            grad1 = &ghdr1; grad2 = &ghdr2;
1141            x1 = &hdr1; x2 = &hdr2;
1142
1143            // grab and preprocess input data
1144            if( x0.type == CV_32F )
1145                for( i = 0; i < dcount; i++ )
1146                {
1147                    const float* x0data = x0.data.fl[si+i];
1148                    double* xdata = x[0]+i*ivcount;
1149                    for( j = 0; j < ivcount; j++ )
1150                        xdata[j] = x0data[j]*w[j*2] + w[j*2+1];
1151                }
1152            else
1153                for( i = 0; i < dcount; i++ )
1154                {
1155                    const double* x0data = x0.data.db[si+i];
1156                    double* xdata = x[0]+i*ivcount;
1157                    for( j = 0; j < ivcount; j++ )
1158                        xdata[j] = x0data[j]*w[j*2] + w[j*2+1];
1159                }
1160
1161            cvInitMatHeader( x1, dcount, ivcount, CV_64F, x[0] );
1162
1163            // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
1164            for( i = 1; i < l_count; i++ )
1165            {
1166                cvInitMatHeader( x2, dcount, layer_sizes->data.i[i], CV_64F, x[i] );
1167                cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] );
1168                cvGEMM( x1, &_w, 1, 0, 0, x2 );
1169                _df = *x2;
1170                _df.data.db = df[i];
1171                calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols );
1172                CV_SWAP( x1, x2, temp );
1173            }
1174
1175            cvInitMatHeader( grad1, dcount, ovcount, CV_64F, buf_ptr );
1176            w = weights[l_count+1];
1177            grad2->data.db = buf_ptr + max_count*dcount;
1178
1179            // calculate error
1180            if( u.type == CV_32F )
1181                for( i = 0; i < dcount; i++ )
1182                {
1183                    const float* udata = u.data.fl[si+i];
1184                    const double* xdata = x[l_count-1] + i*ovcount;
1185                    double* gdata = grad1->data.db + i*ovcount;
1186                    double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
1187
1188                    for( j = 0; j < ovcount; j++ )
1189                    {
1190                        double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j];
1191                        gdata[j] = t*sweight;
1192                        E1 += t*t;
1193                    }
1194                    E += sweight*E1;
1195                }
1196            else
1197                for( i = 0; i < dcount; i++ )
1198                {
1199                    const double* udata = u.data.db[si+i];
1200                    const double* xdata = x[l_count-1] + i*ovcount;
1201                    double* gdata = grad1->data.db + i*ovcount;
1202                    double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
1203
1204                    for( j = 0; j < ovcount; j++ )
1205                    {
1206                        double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j];
1207                        gdata[j] = t*sweight;
1208                        E1 += t*t;
1209                    }
1210                    E += sweight*E1;
1211                }
1212
1213            // backward pass, update dEdw
1214            for( i = l_count-1; i > 0; i-- )
1215            {
1216                n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
1217                cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] );
1218                cvMul( grad1, &_df, grad1 );
1219                cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
1220                cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
1221                cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
1222                // update bias part of dEdw
1223                for( k = 0; k < dcount; k++ )
1224                {
1225                    double* dst = _dEdw.data.db + n1*n2;
1226                    const double* src = grad1->data.db + k*n2;
1227                    for( j = 0; j < n2; j++ )
1228                        dst[j] += src[j];
1229                }
1230                cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
1231                cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db );
1232
1233                if( i > 1 )
1234                    cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
1235                CV_SWAP( grad1, grad2, temp );
1236            }
1237        }
1238
1239        // now update weights
1240        for( i = 1; i < l_count; i++ )
1241        {
1242            n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
1243            for( k = 0; k <= n1; k++ )
1244            {
1245                double* wk = weights[i]+k*n2;
1246                size_t delta = wk - weights[0];
1247                double* dwk = dw->data.db + delta;
1248                double* dEdwk = dEdw->data.db + delta;
1249                char* prevEk = (char*)(prev_dEdw_sign->data.ptr + delta);
1250
1251                for( j = 0; j < n2; j++ )
1252                {
1253                    double Eval = dEdwk[j];
1254                    double dval = dwk[j];
1255                    double wval = wk[j];
1256                    int s = CV_SIGN(Eval);
1257                    int ss = prevEk[j]*s;
1258                    if( ss > 0 )
1259                    {
1260                        dval *= dw_plus;
1261                        dval = MIN( dval, dw_max );
1262                        dwk[j] = dval;
1263                        wk[j] = wval + dval*s;
1264                    }
1265                    else if( ss < 0 )
1266                    {
1267                        dval *= dw_minus;
1268                        dval = MAX( dval, dw_min );
1269                        prevEk[j] = 0;
1270                        dwk[j] = dval;
1271                        wk[j] = wval + dval*s;
1272                    }
1273                    else
1274                    {
1275                        prevEk[j] = (char)s;
1276                        wk[j] = wval + dval*s;
1277                    }
1278                    dEdwk[j] = 0.;
1279                }
1280            }
1281        }
1282
1283        if( fabs(prev_E - E) < epsilon )
1284            break;
1285        prev_E = E;
1286        E = 0;
1287    }
1288
1289    __END__;
1290
1291    cvReleaseMat( &dw );
1292    cvReleaseMat( &dEdw );
1293    cvReleaseMat( &prev_dEdw_sign );
1294    cvReleaseMat( &buf );
1295    cvFree( &x );
1296
1297    return iter;
1298}
1299
1300
1301void CvANN_MLP::write_params( CvFileStorage* fs )
1302{
1303    //CV_FUNCNAME( "CvANN_MLP::write_params" );
1304
1305    __BEGIN__;
1306
1307    const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" :
1308                            activ_func == SIGMOID_SYM ? "SIGMOID_SYM" :
1309                            activ_func == GAUSSIAN ? "GAUSSIAN" : 0;
1310
1311    if( activ_func_name )
1312        cvWriteString( fs, "activation_function", activ_func_name );
1313    else
1314        cvWriteInt( fs, "activation_function", activ_func );
1315
1316    if( activ_func != IDENTITY )
1317    {
1318        cvWriteReal( fs, "f_param1", f_param1 );
1319        cvWriteReal( fs, "f_param2", f_param2 );
1320    }
1321
1322    cvWriteReal( fs, "min_val", min_val );
1323    cvWriteReal( fs, "max_val", max_val );
1324    cvWriteReal( fs, "min_val1", min_val1 );
1325    cvWriteReal( fs, "max_val1", max_val1 );
1326
1327    cvStartWriteStruct( fs, "training_params", CV_NODE_MAP );
1328    if( params.train_method == CvANN_MLP_TrainParams::BACKPROP )
1329    {
1330        cvWriteString( fs, "train_method", "BACKPROP" );
1331        cvWriteReal( fs, "dw_scale", params.bp_dw_scale );
1332        cvWriteReal( fs, "moment_scale", params.bp_moment_scale );
1333    }
1334    else if( params.train_method == CvANN_MLP_TrainParams::RPROP )
1335    {
1336        cvWriteString( fs, "train_method", "RPROP" );
1337        cvWriteReal( fs, "dw0", params.rp_dw0 );
1338        cvWriteReal( fs, "dw_plus", params.rp_dw_plus );
1339        cvWriteReal( fs, "dw_minus", params.rp_dw_minus );
1340        cvWriteReal( fs, "dw_min", params.rp_dw_min );
1341        cvWriteReal( fs, "dw_max", params.rp_dw_max );
1342    }
1343
1344    cvStartWriteStruct( fs, "term_criteria", CV_NODE_MAP + CV_NODE_FLOW );
1345    if( params.term_crit.type & CV_TERMCRIT_EPS )
1346        cvWriteReal( fs, "epsilon", params.term_crit.epsilon );
1347    if( params.term_crit.type & CV_TERMCRIT_ITER )
1348        cvWriteInt( fs, "iterations", params.term_crit.max_iter );
1349    cvEndWriteStruct( fs );
1350
1351    cvEndWriteStruct( fs );
1352
1353    __END__;
1354}
1355
1356
1357void CvANN_MLP::write( CvFileStorage* fs, const char* name )
1358{
1359    CV_FUNCNAME( "CvANN_MLP::write" );
1360
1361    __BEGIN__;
1362
1363    int i, l_count = layer_sizes->cols;
1364
1365    if( !layer_sizes )
1366        CV_ERROR( CV_StsError, "The network has not been initialized" );
1367
1368    cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_ML_ANN_MLP );
1369
1370    cvWrite( fs, "layer_sizes", layer_sizes );
1371
1372    write_params( fs );
1373
1374    cvStartWriteStruct( fs, "input_scale", CV_NODE_SEQ + CV_NODE_FLOW );
1375    cvWriteRawData( fs, weights[0], layer_sizes->data.i[0]*2, "d" );
1376    cvEndWriteStruct( fs );
1377
1378    cvStartWriteStruct( fs, "output_scale", CV_NODE_SEQ + CV_NODE_FLOW );
1379    cvWriteRawData( fs, weights[l_count], layer_sizes->data.i[l_count-1]*2, "d" );
1380    cvEndWriteStruct( fs );
1381
1382    cvStartWriteStruct( fs, "inv_output_scale", CV_NODE_SEQ + CV_NODE_FLOW );
1383    cvWriteRawData( fs, weights[l_count+1], layer_sizes->data.i[l_count-1]*2, "d" );
1384    cvEndWriteStruct( fs );
1385
1386    cvStartWriteStruct( fs, "weights", CV_NODE_SEQ );
1387    for( i = 1; i < l_count; i++ )
1388    {
1389        cvStartWriteStruct( fs, 0, CV_NODE_SEQ + CV_NODE_FLOW );
1390        cvWriteRawData( fs, weights[i], (layer_sizes->data.i[i-1]+1)*layer_sizes->data.i[i], "d" );
1391        cvEndWriteStruct( fs );
1392    }
1393
1394    cvEndWriteStruct( fs );
1395
1396    __END__;
1397}
1398
1399
1400void CvANN_MLP::read_params( CvFileStorage* fs, CvFileNode* node )
1401{
1402    //CV_FUNCNAME( "CvANN_MLP::read_params" );
1403
1404    __BEGIN__;
1405
1406    const char* activ_func_name = cvReadStringByName( fs, node, "activation_function", 0 );
1407    CvFileNode* tparams_node;
1408
1409    if( activ_func_name )
1410        activ_func = strcmp( activ_func_name, "SIGMOID_SYM" ) == 0 ? SIGMOID_SYM :
1411                     strcmp( activ_func_name, "IDENTITY" ) == 0 ? IDENTITY :
1412                     strcmp( activ_func_name, "GAUSSIAN" ) == 0 ? GAUSSIAN : 0;
1413    else
1414        activ_func = cvReadIntByName( fs, node, "activation_function" );
1415
1416    f_param1 = cvReadRealByName( fs, node, "f_param1", 0 );
1417    f_param2 = cvReadRealByName( fs, node, "f_param2", 0 );
1418
1419    set_activ_func( activ_func, f_param1, f_param2 );
1420
1421    min_val = cvReadRealByName( fs, node, "min_val", 0. );
1422    max_val = cvReadRealByName( fs, node, "max_val", 1. );
1423    min_val1 = cvReadRealByName( fs, node, "min_val1", 0. );
1424    max_val1 = cvReadRealByName( fs, node, "max_val1", 1. );
1425
1426    tparams_node = cvGetFileNodeByName( fs, node, "training_params" );
1427    params = CvANN_MLP_TrainParams();
1428
1429    if( tparams_node )
1430    {
1431        const char* tmethod_name = cvReadStringByName( fs, tparams_node, "train_method", "" );
1432        CvFileNode* tcrit_node;
1433
1434        if( strcmp( tmethod_name, "BACKPROP" ) == 0 )
1435        {
1436            params.train_method = CvANN_MLP_TrainParams::BACKPROP;
1437            params.bp_dw_scale = cvReadRealByName( fs, tparams_node, "dw_scale", 0 );
1438            params.bp_moment_scale = cvReadRealByName( fs, tparams_node, "moment_scale", 0 );
1439        }
1440        else if( strcmp( tmethod_name, "RPROP" ) == 0 )
1441        {
1442            params.train_method = CvANN_MLP_TrainParams::RPROP;
1443            params.rp_dw0 = cvReadRealByName( fs, tparams_node, "dw0", 0 );
1444            params.rp_dw_plus = cvReadRealByName( fs, tparams_node, "dw_plus", 0 );
1445            params.rp_dw_minus = cvReadRealByName( fs, tparams_node, "dw_minus", 0 );
1446            params.rp_dw_min = cvReadRealByName( fs, tparams_node, "dw_min", 0 );
1447            params.rp_dw_max = cvReadRealByName( fs, tparams_node, "dw_max", 0 );
1448        }
1449
1450        tcrit_node = cvGetFileNodeByName( fs, tparams_node, "term_criteria" );
1451        if( tcrit_node )
1452        {
1453            params.term_crit.epsilon = cvReadRealByName( fs, tcrit_node, "epsilon", -1 );
1454            params.term_crit.max_iter = cvReadIntByName( fs, tcrit_node, "iterations", -1 );
1455            params.term_crit.type = (params.term_crit.epsilon >= 0 ? CV_TERMCRIT_EPS : 0) +
1456                                   (params.term_crit.max_iter >= 0 ? CV_TERMCRIT_ITER : 0);
1457        }
1458    }
1459
1460    __END__;
1461}
1462
1463
1464void CvANN_MLP::read( CvFileStorage* fs, CvFileNode* node )
1465{
1466    CvMat* _layer_sizes = 0;
1467
1468    CV_FUNCNAME( "CvANN_MLP::read" );
1469
1470    __BEGIN__;
1471
1472    CvFileNode* w;
1473    CvSeqReader reader;
1474    int i, l_count;
1475
1476    _layer_sizes = (CvMat*)cvReadByName( fs, node, "layer_sizes" );
1477    CV_CALL( create( _layer_sizes, SIGMOID_SYM, 0, 0 ));
1478    l_count = layer_sizes->cols;
1479
1480    CV_CALL( read_params( fs, node ));
1481
1482    w = cvGetFileNodeByName( fs, node, "input_scale" );
1483    if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
1484        w->data.seq->total != layer_sizes->data.i[0]*2 )
1485        CV_ERROR( CV_StsParseError, "input_scale tag is not found or is invalid" );
1486
1487    CV_CALL( cvReadRawData( fs, w, weights[0], "d" ));
1488
1489    w = cvGetFileNodeByName( fs, node, "output_scale" );
1490    if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
1491        w->data.seq->total != layer_sizes->data.i[l_count-1]*2 )
1492        CV_ERROR( CV_StsParseError, "output_scale tag is not found or is invalid" );
1493
1494    CV_CALL( cvReadRawData( fs, w, weights[l_count], "d" ));
1495
1496    w = cvGetFileNodeByName( fs, node, "inv_output_scale" );
1497    if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
1498        w->data.seq->total != layer_sizes->data.i[l_count-1]*2 )
1499        CV_ERROR( CV_StsParseError, "inv_output_scale tag is not found or is invalid" );
1500
1501    CV_CALL( cvReadRawData( fs, w, weights[l_count+1], "d" ));
1502
1503    w = cvGetFileNodeByName( fs, node, "weights" );
1504    if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
1505        w->data.seq->total != l_count - 1 )
1506        CV_ERROR( CV_StsParseError, "weights tag is not found or is invalid" );
1507
1508    cvStartReadSeq( w->data.seq, &reader );
1509
1510    for( i = 1; i < l_count; i++ )
1511    {
1512        w = (CvFileNode*)reader.ptr;
1513        CV_CALL( cvReadRawData( fs, w, weights[i], "d" ));
1514        CV_NEXT_SEQ_ELEM( reader.seq->elem_size, reader );
1515    }
1516
1517    __END__;
1518}
1519
1520/* End of file. */
1521