1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                        Intel License Agreement
11//
12// Copyright (C) 2000, Intel Corporation, all rights reserved.
13// Third party copyrights are property of their respective owners.
14//
15// Redistribution and use in source and binary forms, with or without modification,
16// are permitted provided that the following conditions are met:
17//
18//   * Redistribution's of source code must retain the above copyright notice,
19//     this list of conditions and the following disclaimer.
20//
21//   * Redistribution's in binary form must reproduce the above copyright notice,
22//     this list of conditions and the following disclaimer in the documentation
23//     and/or other materials provided with the distribution.
24//
25//   * The name of Intel Corporation may not be used to endorse or promote products
26//     derived from this software without specific prior written permission.
27//
28// This software is provided by the copyright holders and contributors "as is" and
29// any express or implied warranties, including, but not limited to, the implied
30// warranties of merchantability and fitness for a particular purpose are disclaimed.
31// In no event shall the Intel Corporation or contributors be liable for any direct,
32// indirect, incidental, special, exemplary, or consequential damages
33// (including, but not limited to, procurement of substitute goods or services;
34// loss of use, data, or profits; or business interruption) however caused
35// and on any theory of liability, whether in contract, strict liability,
36// or tort (including negligence or otherwise) arising in any way out of
37// the use of this software, even if advised of the possibility of such damage.
38//
39//M*/
40
41#include "precomp.hpp"
42
43namespace cv { namespace ml {
44
45struct AnnParams
46{
47    AnnParams()
48    {
49        termCrit = TermCriteria( TermCriteria::COUNT + TermCriteria::EPS, 1000, 0.01 );
50        trainMethod = ANN_MLP::RPROP;
51        bpDWScale = bpMomentScale = 0.1;
52        rpDW0 = 0.1; rpDWPlus = 1.2; rpDWMinus = 0.5;
53        rpDWMin = FLT_EPSILON; rpDWMax = 50.;
54    }
55
56    TermCriteria termCrit;
57    int trainMethod;
58
59    double bpDWScale;
60    double bpMomentScale;
61
62    double rpDW0;
63    double rpDWPlus;
64    double rpDWMinus;
65    double rpDWMin;
66    double rpDWMax;
67};
68
69template <typename T>
70inline T inBounds(T val, T min_val, T max_val)
71{
72    return std::min(std::max(val, min_val), max_val);
73}
74
75class ANN_MLPImpl : public ANN_MLP
76{
77public:
78    ANN_MLPImpl()
79    {
80        clear();
81        setActivationFunction( SIGMOID_SYM, 0, 0 );
82        setLayerSizes(Mat());
83        setTrainMethod(ANN_MLP::RPROP, 0.1, FLT_EPSILON);
84    }
85
86    virtual ~ANN_MLPImpl() {}
87
88    CV_IMPL_PROPERTY(TermCriteria, TermCriteria, params.termCrit)
89    CV_IMPL_PROPERTY(double, BackpropWeightScale, params.bpDWScale)
90    CV_IMPL_PROPERTY(double, BackpropMomentumScale, params.bpMomentScale)
91    CV_IMPL_PROPERTY(double, RpropDW0, params.rpDW0)
92    CV_IMPL_PROPERTY(double, RpropDWPlus, params.rpDWPlus)
93    CV_IMPL_PROPERTY(double, RpropDWMinus, params.rpDWMinus)
94    CV_IMPL_PROPERTY(double, RpropDWMin, params.rpDWMin)
95    CV_IMPL_PROPERTY(double, RpropDWMax, params.rpDWMax)
96
97    void clear()
98    {
99        min_val = max_val = min_val1 = max_val1 = 0.;
100        rng = RNG((uint64)-1);
101        weights.clear();
102        trained = false;
103        max_buf_sz = 1 << 12;
104    }
105
106    int layer_count() const { return (int)layer_sizes.size(); }
107
108    void setTrainMethod(int method, double param1, double param2)
109    {
110        if (method != ANN_MLP::RPROP && method != ANN_MLP::BACKPROP)
111            method = ANN_MLP::RPROP;
112        params.trainMethod = method;
113        if(method == ANN_MLP::RPROP )
114        {
115            if( param1 < FLT_EPSILON )
116                param1 = 1.;
117            params.rpDW0 = param1;
118            params.rpDWMin = std::max( param2, 0. );
119        }
120        else if(method == ANN_MLP::BACKPROP )
121        {
122            if( param1 <= 0 )
123                param1 = 0.1;
124            params.bpDWScale = inBounds<double>(param1, 1e-3, 1.);
125            if( param2 < 0 )
126                param2 = 0.1;
127            params.bpMomentScale = std::min( param2, 1. );
128        }
129    }
130
131    int getTrainMethod() const
132    {
133        return params.trainMethod;
134    }
135
136    void setActivationFunction(int _activ_func, double _f_param1, double _f_param2 )
137    {
138        if( _activ_func < 0 || _activ_func > GAUSSIAN )
139            CV_Error( CV_StsOutOfRange, "Unknown activation function" );
140
141        activ_func = _activ_func;
142
143        switch( activ_func )
144        {
145        case SIGMOID_SYM:
146            max_val = 0.95; min_val = -max_val;
147            max_val1 = 0.98; min_val1 = -max_val1;
148            if( fabs(_f_param1) < FLT_EPSILON )
149                _f_param1 = 2./3;
150            if( fabs(_f_param2) < FLT_EPSILON )
151                _f_param2 = 1.7159;
152            break;
153        case GAUSSIAN:
154            max_val = 1.; min_val = 0.05;
155            max_val1 = 1.; min_val1 = 0.02;
156            if( fabs(_f_param1) < FLT_EPSILON )
157                _f_param1 = 1.;
158            if( fabs(_f_param2) < FLT_EPSILON )
159                _f_param2 = 1.;
160            break;
161        default:
162            min_val = max_val = min_val1 = max_val1 = 0.;
163            _f_param1 = 1.;
164            _f_param2 = 0.;
165        }
166
167        f_param1 = _f_param1;
168        f_param2 = _f_param2;
169    }
170
171
172    void init_weights()
173    {
174        int i, j, k, l_count = layer_count();
175
176        for( i = 1; i < l_count; i++ )
177        {
178            int n1 = layer_sizes[i-1];
179            int n2 = layer_sizes[i];
180            double val = 0, G = n2 > 2 ? 0.7*pow((double)n1,1./(n2-1)) : 1.;
181            double* w = weights[i].ptr<double>();
182
183            // initialize weights using Nguyen-Widrow algorithm
184            for( j = 0; j < n2; j++ )
185            {
186                double s = 0;
187                for( k = 0; k <= n1; k++ )
188                {
189                    val = rng.uniform(0., 1.)*2-1.;
190                    w[k*n2 + j] = val;
191                    s += fabs(val);
192                }
193
194                if( i < l_count - 1 )
195                {
196                    s = 1./(s - fabs(val));
197                    for( k = 0; k <= n1; k++ )
198                        w[k*n2 + j] *= s;
199                    w[n1*n2 + j] *= G*(-1+j*2./n2);
200                }
201            }
202        }
203    }
204
205    Mat getLayerSizes() const
206    {
207        return Mat_<int>(layer_sizes, true);
208    }
209
210    void setLayerSizes( InputArray _layer_sizes )
211    {
212        clear();
213
214        _layer_sizes.copyTo(layer_sizes);
215        int l_count = layer_count();
216
217        weights.resize(l_count + 2);
218        max_lsize = 0;
219
220        if( l_count > 0 )
221        {
222            for( int i = 0; i < l_count; i++ )
223            {
224                int n = layer_sizes[i];
225                if( n < 1 + (0 < i && i < l_count-1))
226                    CV_Error( CV_StsOutOfRange,
227                             "there should be at least one input and one output "
228                             "and every hidden layer must have more than 1 neuron" );
229                max_lsize = std::max( max_lsize, n );
230                if( i > 0 )
231                    weights[i].create(layer_sizes[i-1]+1, n, CV_64F);
232            }
233
234            int ninputs = layer_sizes.front();
235            int noutputs = layer_sizes.back();
236            weights[0].create(1, ninputs*2, CV_64F);
237            weights[l_count].create(1, noutputs*2, CV_64F);
238            weights[l_count+1].create(1, noutputs*2, CV_64F);
239        }
240    }
241
242    float predict( InputArray _inputs, OutputArray _outputs, int ) const
243    {
244        if( !trained )
245            CV_Error( CV_StsError, "The network has not been trained or loaded" );
246
247        Mat inputs = _inputs.getMat();
248        int type = inputs.type(), l_count = layer_count();
249        int n = inputs.rows, dn0 = n;
250
251        CV_Assert( (type == CV_32F || type == CV_64F) && inputs.cols == layer_sizes[0] );
252        int noutputs = layer_sizes[l_count-1];
253        Mat outputs;
254
255        int min_buf_sz = 2*max_lsize;
256        int buf_sz = n*min_buf_sz;
257
258        if( buf_sz > max_buf_sz )
259        {
260            dn0 = max_buf_sz/min_buf_sz;
261            dn0 = std::max( dn0, 1 );
262            buf_sz = dn0*min_buf_sz;
263        }
264
265        cv::AutoBuffer<double> _buf(buf_sz+noutputs);
266        double* buf = _buf;
267
268        if( !_outputs.needed() )
269        {
270            CV_Assert( n == 1 );
271            outputs = Mat(n, noutputs, type, buf + buf_sz);
272        }
273        else
274        {
275            _outputs.create(n, noutputs, type);
276            outputs = _outputs.getMat();
277        }
278
279        int dn = 0;
280        for( int i = 0; i < n; i += dn )
281        {
282            dn = std::min( dn0, n - i );
283
284            Mat layer_in = inputs.rowRange(i, i + dn);
285            Mat layer_out( dn, layer_in.cols, CV_64F, buf);
286
287            scale_input( layer_in, layer_out );
288            layer_in = layer_out;
289
290            for( int j = 1; j < l_count; j++ )
291            {
292                double* data = buf + ((j&1) ? max_lsize*dn0 : 0);
293                int cols = layer_sizes[j];
294
295                layer_out = Mat(dn, cols, CV_64F, data);
296                Mat w = weights[j].rowRange(0, layer_in.cols);
297                gemm(layer_in, w, 1, noArray(), 0, layer_out);
298                calc_activ_func( layer_out, weights[j] );
299
300                layer_in = layer_out;
301            }
302
303            layer_out = outputs.rowRange(i, i + dn);
304            scale_output( layer_in, layer_out );
305        }
306
307        if( n == 1 )
308        {
309            int maxIdx[] = {0, 0};
310            minMaxIdx(outputs, 0, 0, 0, maxIdx);
311            return (float)(maxIdx[0] + maxIdx[1]);
312        }
313
314        return 0.f;
315    }
316
317    void scale_input( const Mat& _src, Mat& _dst ) const
318    {
319        int cols = _src.cols;
320        const double* w = weights[0].ptr<double>();
321
322        if( _src.type() == CV_32F )
323        {
324            for( int i = 0; i < _src.rows; i++ )
325            {
326                const float* src = _src.ptr<float>(i);
327                double* dst = _dst.ptr<double>(i);
328                for( int j = 0; j < cols; j++ )
329                    dst[j] = src[j]*w[j*2] + w[j*2+1];
330            }
331        }
332        else
333        {
334            for( int i = 0; i < _src.rows; i++ )
335            {
336                const float* src = _src.ptr<float>(i);
337                double* dst = _dst.ptr<double>(i);
338                for( int j = 0; j < cols; j++ )
339                    dst[j] = src[j]*w[j*2] + w[j*2+1];
340            }
341        }
342    }
343
344    void scale_output( const Mat& _src, Mat& _dst ) const
345    {
346        int cols = _src.cols;
347        const double* w = weights[layer_count()].ptr<double>();
348
349        if( _dst.type() == CV_32F )
350        {
351            for( int i = 0; i < _src.rows; i++ )
352            {
353                const double* src = _src.ptr<double>(i);
354                float* dst = _dst.ptr<float>(i);
355                for( int j = 0; j < cols; j++ )
356                    dst[j] = (float)(src[j]*w[j*2] + w[j*2+1]);
357            }
358        }
359        else
360        {
361            for( int i = 0; i < _src.rows; i++ )
362            {
363                const double* src = _src.ptr<double>(i);
364                double* dst = _dst.ptr<double>(i);
365                for( int j = 0; j < cols; j++ )
366                    dst[j] = src[j]*w[j*2] + w[j*2+1];
367            }
368        }
369    }
370
371    void calc_activ_func( Mat& sums, const Mat& w ) const
372    {
373        const double* bias = w.ptr<double>(w.rows-1);
374        int i, j, n = sums.rows, cols = sums.cols;
375        double scale = 0, scale2 = f_param2;
376
377        switch( activ_func )
378        {
379            case IDENTITY:
380                scale = 1.;
381                break;
382            case SIGMOID_SYM:
383                scale = -f_param1;
384                break;
385            case GAUSSIAN:
386                scale = -f_param1*f_param1;
387                break;
388            default:
389                ;
390        }
391
392        CV_Assert( sums.isContinuous() );
393
394        if( activ_func != GAUSSIAN )
395        {
396            for( i = 0; i < n; i++ )
397            {
398                double* data = sums.ptr<double>(i);
399                for( j = 0; j < cols; j++ )
400                    data[j] = (data[j] + bias[j])*scale;
401            }
402
403            if( activ_func == IDENTITY )
404                return;
405        }
406        else
407        {
408            for( i = 0; i < n; i++ )
409            {
410                double* data = sums.ptr<double>(i);
411                for( j = 0; j < cols; j++ )
412                {
413                    double t = data[j] + bias[j];
414                    data[j] = t*t*scale;
415                }
416            }
417        }
418
419        exp( sums, sums );
420
421        if( sums.isContinuous() )
422        {
423            cols *= n;
424            n = 1;
425        }
426
427        switch( activ_func )
428        {
429            case SIGMOID_SYM:
430                for( i = 0; i < n; i++ )
431                {
432                    double* data = sums.ptr<double>(i);
433                    for( j = 0; j < cols; j++ )
434                    {
435                        double t = scale2*(1. - data[j])/(1. + data[j]);
436                        data[j] = t;
437                    }
438                }
439                break;
440
441            case GAUSSIAN:
442                for( i = 0; i < n; i++ )
443                {
444                    double* data = sums.ptr<double>(i);
445                    for( j = 0; j < cols; j++ )
446                        data[j] = scale2*data[j];
447                }
448                break;
449
450            default:
451                ;
452        }
453    }
454
455    void calc_activ_func_deriv( Mat& _xf, Mat& _df, const Mat& w ) const
456    {
457        const double* bias = w.ptr<double>(w.rows-1);
458        int i, j, n = _xf.rows, cols = _xf.cols;
459
460        if( activ_func == IDENTITY )
461        {
462            for( i = 0; i < n; i++ )
463            {
464                double* xf = _xf.ptr<double>(i);
465                double* df = _df.ptr<double>(i);
466
467                for( j = 0; j < cols; j++ )
468                {
469                    xf[j] += bias[j];
470                    df[j] = 1;
471                }
472            }
473        }
474        else if( activ_func == GAUSSIAN )
475        {
476            double scale = -f_param1*f_param1;
477            double scale2 = scale*f_param2;
478            for( i = 0; i < n; i++ )
479            {
480                double* xf = _xf.ptr<double>(i);
481                double* df = _df.ptr<double>(i);
482
483                for( j = 0; j < cols; j++ )
484                {
485                    double t = xf[j] + bias[j];
486                    df[j] = t*2*scale2;
487                    xf[j] = t*t*scale;
488                }
489            }
490            exp( _xf, _xf );
491
492            for( i = 0; i < n; i++ )
493            {
494                double* xf = _xf.ptr<double>(i);
495                double* df = _df.ptr<double>(i);
496
497                for( j = 0; j < cols; j++ )
498                    df[j] *= xf[j];
499            }
500        }
501        else
502        {
503            double scale = f_param1;
504            double scale2 = f_param2;
505
506            for( i = 0; i < n; i++ )
507            {
508                double* xf = _xf.ptr<double>(i);
509                double* df = _df.ptr<double>(i);
510
511                for( j = 0; j < cols; j++ )
512                {
513                    xf[j] = (xf[j] + bias[j])*scale;
514                    df[j] = -fabs(xf[j]);
515                }
516            }
517
518            exp( _df, _df );
519
520            // ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax);
521            // ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2=
522            // 2*a*exp(-ax)/(1+exp(-ax))^2
523            scale *= 2*f_param2;
524            for( i = 0; i < n; i++ )
525            {
526                double* xf = _xf.ptr<double>(i);
527                double* df = _df.ptr<double>(i);
528
529                for( j = 0; j < cols; j++ )
530                {
531                    int s0 = xf[j] > 0 ? 1 : -1;
532                    double t0 = 1./(1. + df[j]);
533                    double t1 = scale*df[j]*t0*t0;
534                    t0 *= scale2*(1. - df[j])*s0;
535                    df[j] = t1;
536                    xf[j] = t0;
537                }
538            }
539        }
540    }
541
542    void calc_input_scale( const Mat& inputs, int flags )
543    {
544        bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
545        bool no_scale = (flags & NO_INPUT_SCALE) != 0;
546        double* scale = weights[0].ptr<double>();
547        int count = inputs.rows;
548
549        if( reset_weights )
550        {
551            int i, j, vcount = layer_sizes[0];
552            int type = inputs.type();
553            double a = no_scale ? 1. : 0.;
554
555            for( j = 0; j < vcount; j++ )
556                scale[2*j] = a, scale[j*2+1] = 0.;
557
558            if( no_scale )
559                return;
560
561            for( i = 0; i < count; i++ )
562            {
563                const uchar* p = inputs.ptr(i);
564                const float* f = (const float*)p;
565                const double* d = (const double*)p;
566                for( j = 0; j < vcount; j++ )
567                {
568                    double t = type == CV_32F ? (double)f[j] : d[j];
569                    scale[j*2] += t;
570                    scale[j*2+1] += t*t;
571                }
572            }
573
574            for( j = 0; j < vcount; j++ )
575            {
576                double s = scale[j*2], s2 = scale[j*2+1];
577                double m = s/count, sigma2 = s2/count - m*m;
578                scale[j*2] = sigma2 < DBL_EPSILON ? 1 : 1./sqrt(sigma2);
579                scale[j*2+1] = -m*scale[j*2];
580            }
581        }
582    }
583
584    void calc_output_scale( const Mat& outputs, int flags )
585    {
586        int i, j, vcount = layer_sizes.back();
587        int type = outputs.type();
588        double m = min_val, M = max_val, m1 = min_val1, M1 = max_val1;
589        bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
590        bool no_scale = (flags & NO_OUTPUT_SCALE) != 0;
591        int l_count = layer_count();
592        double* scale = weights[l_count].ptr<double>();
593        double* inv_scale = weights[l_count+1].ptr<double>();
594        int count = outputs.rows;
595
596        if( reset_weights )
597        {
598            double a0 = no_scale ? 1 : DBL_MAX, b0 = no_scale ? 0 : -DBL_MAX;
599
600            for( j = 0; j < vcount; j++ )
601            {
602                scale[2*j] = inv_scale[2*j] = a0;
603                scale[j*2+1] = inv_scale[2*j+1] = b0;
604            }
605
606            if( no_scale )
607                return;
608        }
609
610        for( i = 0; i < count; i++ )
611        {
612            const uchar* p = outputs.ptr(i);
613            const float* f = (const float*)p;
614            const double* d = (const double*)p;
615
616            for( j = 0; j < vcount; j++ )
617            {
618                double t = type == CV_32F ? (double)f[j] : d[j];
619
620                if( reset_weights )
621                {
622                    double mj = scale[j*2], Mj = scale[j*2+1];
623                    if( mj > t ) mj = t;
624                    if( Mj < t ) Mj = t;
625
626                    scale[j*2] = mj;
627                    scale[j*2+1] = Mj;
628                }
629                else if( !no_scale )
630                {
631                    t = t*inv_scale[j*2] + inv_scale[2*j+1];
632                    if( t < m1 || t > M1 )
633                        CV_Error( CV_StsOutOfRange,
634                                 "Some of new output training vector components run exceed the original range too much" );
635                }
636            }
637        }
638
639        if( reset_weights )
640            for( j = 0; j < vcount; j++ )
641            {
642                // map mj..Mj to m..M
643                double mj = scale[j*2], Mj = scale[j*2+1];
644                double a, b;
645                double delta = Mj - mj;
646                if( delta < DBL_EPSILON )
647                    a = 1, b = (M + m - Mj - mj)*0.5;
648                else
649                    a = (M - m)/delta, b = m - mj*a;
650                inv_scale[j*2] = a; inv_scale[j*2+1] = b;
651                a = 1./a; b = -b*a;
652                scale[j*2] = a; scale[j*2+1] = b;
653            }
654    }
655
656    void prepare_to_train( const Mat& inputs, const Mat& outputs,
657                           Mat& sample_weights, int flags )
658    {
659        if( layer_sizes.empty() )
660            CV_Error( CV_StsError,
661                     "The network has not been created. Use method create or the appropriate constructor" );
662
663        if( (inputs.type() != CV_32F && inputs.type() != CV_64F) ||
664            inputs.cols != layer_sizes[0] )
665            CV_Error( CV_StsBadArg,
666                     "input training data should be a floating-point matrix with "
667                     "the number of rows equal to the number of training samples and "
668                     "the number of columns equal to the size of 0-th (input) layer" );
669
670        if( (outputs.type() != CV_32F && outputs.type() != CV_64F) ||
671            outputs.cols != layer_sizes.back() )
672            CV_Error( CV_StsBadArg,
673                     "output training data should be a floating-point matrix with "
674                     "the number of rows equal to the number of training samples and "
675                     "the number of columns equal to the size of last (output) layer" );
676
677        if( inputs.rows != outputs.rows )
678            CV_Error( CV_StsUnmatchedSizes, "The numbers of input and output samples do not match" );
679
680        Mat temp;
681        double s = sum(sample_weights)[0];
682        sample_weights.convertTo(temp, CV_64F, 1./s);
683        sample_weights = temp;
684
685        calc_input_scale( inputs, flags );
686        calc_output_scale( outputs, flags );
687    }
688
689    bool train( const Ptr<TrainData>& trainData, int flags )
690    {
691        const int MAX_ITER = 1000;
692        const double DEFAULT_EPSILON = FLT_EPSILON;
693
694        // initialize training data
695        Mat inputs = trainData->getTrainSamples();
696        Mat outputs = trainData->getTrainResponses();
697        Mat sw = trainData->getTrainSampleWeights();
698        prepare_to_train( inputs, outputs, sw, flags );
699
700        // ... and link weights
701        if( !(flags & UPDATE_WEIGHTS) )
702            init_weights();
703
704        TermCriteria termcrit;
705        termcrit.type = TermCriteria::COUNT + TermCriteria::EPS;
706        termcrit.maxCount = std::max((params.termCrit.type & CV_TERMCRIT_ITER ? params.termCrit.maxCount : MAX_ITER), 1);
707        termcrit.epsilon = std::max((params.termCrit.type & CV_TERMCRIT_EPS ? params.termCrit.epsilon : DEFAULT_EPSILON), DBL_EPSILON);
708
709        int iter = params.trainMethod == ANN_MLP::BACKPROP ?
710            train_backprop( inputs, outputs, sw, termcrit ) :
711            train_rprop( inputs, outputs, sw, termcrit );
712
713        trained = iter > 0;
714        return trained;
715    }
716
717    int train_backprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
718    {
719        int i, j, k;
720        double prev_E = DBL_MAX*0.5, E = 0;
721        int itype = inputs.type(), otype = outputs.type();
722
723        int count = inputs.rows;
724
725        int iter = -1, max_iter = termCrit.maxCount*count;
726        double epsilon = termCrit.epsilon*count;
727
728        int l_count = layer_count();
729        int ivcount = layer_sizes[0];
730        int ovcount = layer_sizes.back();
731
732        // allocate buffers
733        vector<vector<double> > x(l_count);
734        vector<vector<double> > df(l_count);
735        vector<Mat> dw(l_count);
736
737        for( i = 0; i < l_count; i++ )
738        {
739            int n = layer_sizes[i];
740            x[i].resize(n+1);
741            df[i].resize(n);
742            dw[i] = Mat::zeros(weights[i].size(), CV_64F);
743        }
744
745        Mat _idx_m(1, count, CV_32S);
746        int* _idx = _idx_m.ptr<int>();
747        for( i = 0; i < count; i++ )
748            _idx[i] = i;
749
750        AutoBuffer<double> _buf(max_lsize*2);
751        double* buf[] = { _buf, (double*)_buf + max_lsize };
752
753        const double* sw = _sw.empty() ? 0 : _sw.ptr<double>();
754
755        // run back-propagation loop
756        /*
757         y_i = w_i*x_{i-1}
758         x_i = f(y_i)
759         E = 1/2*||u - x_N||^2
760         grad_N = (x_N - u)*f'(y_i)
761         dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i
762         w_i(t+1) = w_i(t) + dw_i(t)
763         grad_{i-1} = w_i^t*grad_i
764        */
765        for( iter = 0; iter < max_iter; iter++ )
766        {
767            int idx = iter % count;
768            double sweight = sw ? count*sw[idx] : 1.;
769
770            if( idx == 0 )
771            {
772                //printf("%d. E = %g\n", iter/count, E);
773                if( fabs(prev_E - E) < epsilon )
774                    break;
775                prev_E = E;
776                E = 0;
777
778                // shuffle indices
779                for( i = 0; i < count; i++ )
780                {
781                    j = rng.uniform(0, count);
782                    k = rng.uniform(0, count);
783                    std::swap(_idx[j], _idx[k]);
784                }
785            }
786
787            idx = _idx[idx];
788
789            const uchar* x0data_p = inputs.ptr(idx);
790            const float* x0data_f = (const float*)x0data_p;
791            const double* x0data_d = (const double*)x0data_p;
792
793            double* w = weights[0].ptr<double>();
794            for( j = 0; j < ivcount; j++ )
795                x[0][j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2 + 1];
796
797            Mat x1( 1, ivcount, CV_64F, &x[0][0] );
798
799            // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
800            for( i = 1; i < l_count; i++ )
801            {
802                int n = layer_sizes[i];
803                Mat x2(1, n, CV_64F, &x[i][0] );
804                Mat _w = weights[i].rowRange(0, x1.cols);
805                gemm(x1, _w, 1, noArray(), 0, x2);
806                Mat _df(1, n, CV_64F, &df[i][0] );
807                calc_activ_func_deriv( x2, _df, weights[i] );
808                x1 = x2;
809            }
810
811            Mat grad1( 1, ovcount, CV_64F, buf[l_count&1] );
812            w = weights[l_count+1].ptr<double>();
813
814            // calculate error
815            const uchar* udata_p = outputs.ptr(idx);
816            const float* udata_f = (const float*)udata_p;
817            const double* udata_d = (const double*)udata_p;
818
819            double* gdata = grad1.ptr<double>();
820            for( k = 0; k < ovcount; k++ )
821            {
822                double t = (otype == CV_32F ? (double)udata_f[k] : udata_d[k])*w[k*2] + w[k*2+1] - x[l_count-1][k];
823                gdata[k] = t*sweight;
824                E += t*t;
825            }
826            E *= sweight;
827
828            // backward pass, update weights
829            for( i = l_count-1; i > 0; i-- )
830            {
831                int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
832                Mat _df(1, n2, CV_64F, &df[i][0]);
833                multiply( grad1, _df, grad1 );
834                Mat _x(n1+1, 1, CV_64F, &x[i-1][0]);
835                x[i-1][n1] = 1.;
836                gemm( _x, grad1, params.bpDWScale, dw[i], params.bpMomentScale, dw[i] );
837                add( weights[i], dw[i], weights[i] );
838                if( i > 1 )
839                {
840                    Mat grad2(1, n1, CV_64F, buf[i&1]);
841                    Mat _w = weights[i].rowRange(0, n1);
842                    gemm( grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T );
843                    grad1 = grad2;
844                }
845            }
846        }
847
848        iter /= count;
849        return iter;
850    }
851
852    struct RPropLoop : public ParallelLoopBody
853    {
854        RPropLoop(ANN_MLPImpl* _ann,
855                  const Mat& _inputs, const Mat& _outputs, const Mat& _sw,
856                  int _dcount0, vector<Mat>& _dEdw, double* _E)
857        {
858            ann = _ann;
859            inputs = _inputs;
860            outputs = _outputs;
861            sw = _sw.ptr<double>();
862            dcount0 = _dcount0;
863            dEdw = &_dEdw;
864            pE = _E;
865        }
866
867        ANN_MLPImpl* ann;
868        vector<Mat>* dEdw;
869        Mat inputs, outputs;
870        const double* sw;
871        int dcount0;
872        double* pE;
873
874        void operator()( const Range& range ) const
875        {
876            double inv_count = 1./inputs.rows;
877            int ivcount = ann->layer_sizes.front();
878            int ovcount = ann->layer_sizes.back();
879            int itype = inputs.type(), otype = outputs.type();
880            int count = inputs.rows;
881            int i, j, k, l_count = ann->layer_count();
882            vector<vector<double> > x(l_count);
883            vector<vector<double> > df(l_count);
884            vector<double> _buf(ann->max_lsize*dcount0*2);
885            double* buf[] = { &_buf[0], &_buf[ann->max_lsize*dcount0] };
886            double E = 0;
887
888            for( i = 0; i < l_count; i++ )
889            {
890                x[i].resize(ann->layer_sizes[i]*dcount0);
891                df[i].resize(ann->layer_sizes[i]*dcount0);
892            }
893
894            for( int si = range.start; si < range.end; si++ )
895            {
896                int i0 = si*dcount0, i1 = std::min((si + 1)*dcount0, count);
897                int dcount = i1 - i0;
898                const double* w = ann->weights[0].ptr<double>();
899
900                // grab and preprocess input data
901                for( i = 0; i < dcount; i++ )
902                {
903                    const uchar* x0data_p = inputs.ptr(i0 + i);
904                    const float* x0data_f = (const float*)x0data_p;
905                    const double* x0data_d = (const double*)x0data_p;
906
907                    double* xdata = &x[0][i*ivcount];
908                    for( j = 0; j < ivcount; j++ )
909                        xdata[j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2+1];
910                }
911                Mat x1(dcount, ivcount, CV_64F, &x[0][0]);
912
913                // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
914                for( i = 1; i < l_count; i++ )
915                {
916                    Mat x2( dcount, ann->layer_sizes[i], CV_64F, &x[i][0] );
917                    Mat _w = ann->weights[i].rowRange(0, x1.cols);
918                    gemm( x1, _w, 1, noArray(), 0, x2 );
919                    Mat _df( x2.size(), CV_64F, &df[i][0] );
920                    ann->calc_activ_func_deriv( x2, _df, ann->weights[i] );
921                    x1 = x2;
922                }
923
924                Mat grad1(dcount, ovcount, CV_64F, buf[l_count & 1]);
925
926                w = ann->weights[l_count+1].ptr<double>();
927
928                // calculate error
929                for( i = 0; i < dcount; i++ )
930                {
931                    const uchar* udata_p = outputs.ptr(i0+i);
932                    const float* udata_f = (const float*)udata_p;
933                    const double* udata_d = (const double*)udata_p;
934
935                    const double* xdata = &x[l_count-1][i*ovcount];
936                    double* gdata = grad1.ptr<double>(i);
937                    double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
938
939                    for( j = 0; j < ovcount; j++ )
940                    {
941                        double t = (otype == CV_32F ? (double)udata_f[j] : udata_d[j])*w[j*2] + w[j*2+1] - xdata[j];
942                        gdata[j] = t*sweight;
943                        E1 += t*t;
944                    }
945                    E += sweight*E1;
946                }
947
948                for( i = l_count-1; i > 0; i-- )
949                {
950                    int n1 = ann->layer_sizes[i-1], n2 = ann->layer_sizes[i];
951                    Mat _df(dcount, n2, CV_64F, &df[i][0]);
952                    multiply(grad1, _df, grad1);
953
954                    {
955                        AutoLock lock(ann->mtx);
956                        Mat _dEdw = dEdw->at(i).rowRange(0, n1);
957                        x1 = Mat(dcount, n1, CV_64F, &x[i-1][0]);
958                        gemm(x1, grad1, 1, _dEdw, 1, _dEdw, GEMM_1_T);
959
960                        // update bias part of dEdw
961                        double* dst = dEdw->at(i).ptr<double>(n1);
962                        for( k = 0; k < dcount; k++ )
963                        {
964                            const double* src = grad1.ptr<double>(k);
965                            for( j = 0; j < n2; j++ )
966                                dst[j] += src[j];
967                        }
968                    }
969
970                    Mat grad2( dcount, n1, CV_64F, buf[i&1] );
971                    if( i > 1 )
972                    {
973                        Mat _w = ann->weights[i].rowRange(0, n1);
974                        gemm(grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T);
975                    }
976                    grad1 = grad2;
977                }
978            }
979            {
980                AutoLock lock(ann->mtx);
981                *pE += E;
982            }
983        }
984    };
985
986    int train_rprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
987    {
988        const int max_buf_size = 1 << 16;
989        int i, iter = -1, count = inputs.rows;
990
991        double prev_E = DBL_MAX*0.5;
992
993        int max_iter = termCrit.maxCount;
994        double epsilon = termCrit.epsilon;
995        double dw_plus = params.rpDWPlus;
996        double dw_minus = params.rpDWMinus;
997        double dw_min = params.rpDWMin;
998        double dw_max = params.rpDWMax;
999
1000        int l_count = layer_count();
1001
1002        // allocate buffers
1003        vector<Mat> dw(l_count), dEdw(l_count), prev_dEdw_sign(l_count);
1004
1005        int total = 0;
1006        for( i = 0; i < l_count; i++ )
1007        {
1008            total += layer_sizes[i];
1009            dw[i].create(weights[i].size(), CV_64F);
1010            dw[i].setTo(Scalar::all(params.rpDW0));
1011            prev_dEdw_sign[i] = Mat::zeros(weights[i].size(), CV_8S);
1012            dEdw[i] = Mat::zeros(weights[i].size(), CV_64F);
1013        }
1014
1015        int dcount0 = max_buf_size/(2*total);
1016        dcount0 = std::max( dcount0, 1 );
1017        dcount0 = std::min( dcount0, count );
1018        int chunk_count = (count + dcount0 - 1)/dcount0;
1019
1020        // run rprop loop
1021        /*
1022         y_i(t) = w_i(t)*x_{i-1}(t)
1023         x_i(t) = f(y_i(t))
1024         E = sum_over_all_samples(1/2*||u - x_N||^2)
1025         grad_N = (x_N - u)*f'(y_i)
1026
1027         std::min(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0
1028         dw_i{jk}(t) = std::max(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0
1029         dw_i{jk}(t-1) else
1030
1031         if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0)
1032         dE/dw_i{jk}(t)<-0
1033         else
1034         w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t)
1035         grad_{i-1}(t) = w_i^t(t)*grad_i(t)
1036         */
1037        for( iter = 0; iter < max_iter; iter++ )
1038        {
1039            double E = 0;
1040
1041            for( i = 0; i < l_count; i++ )
1042                dEdw[i].setTo(Scalar::all(0));
1043
1044            // first, iterate through all the samples and compute dEdw
1045            RPropLoop invoker(this, inputs, outputs, _sw, dcount0, dEdw, &E);
1046            parallel_for_(Range(0, chunk_count), invoker);
1047            //invoker(Range(0, chunk_count));
1048
1049            // now update weights
1050            for( i = 1; i < l_count; i++ )
1051            {
1052                int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
1053                for( int k = 0; k <= n1; k++ )
1054                {
1055                    CV_Assert(weights[i].size() == Size(n2, n1+1));
1056                    double* wk = weights[i].ptr<double>(k);
1057                    double* dwk = dw[i].ptr<double>(k);
1058                    double* dEdwk = dEdw[i].ptr<double>(k);
1059                    schar* prevEk = prev_dEdw_sign[i].ptr<schar>(k);
1060
1061                    for( int j = 0; j < n2; j++ )
1062                    {
1063                        double Eval = dEdwk[j];
1064                        double dval = dwk[j];
1065                        double wval = wk[j];
1066                        int s = CV_SIGN(Eval);
1067                        int ss = prevEk[j]*s;
1068                        if( ss > 0 )
1069                        {
1070                            dval *= dw_plus;
1071                            dval = std::min( dval, dw_max );
1072                            dwk[j] = dval;
1073                            wk[j] = wval + dval*s;
1074                        }
1075                        else if( ss < 0 )
1076                        {
1077                            dval *= dw_minus;
1078                            dval = std::max( dval, dw_min );
1079                            prevEk[j] = 0;
1080                            dwk[j] = dval;
1081                            wk[j] = wval + dval*s;
1082                        }
1083                        else
1084                        {
1085                            prevEk[j] = (schar)s;
1086                            wk[j] = wval + dval*s;
1087                        }
1088                        dEdwk[j] = 0.;
1089                    }
1090                }
1091            }
1092
1093            //printf("%d. E = %g\n", iter, E);
1094            if( fabs(prev_E - E) < epsilon )
1095                break;
1096            prev_E = E;
1097        }
1098
1099        return iter;
1100    }
1101
1102    void write_params( FileStorage& fs ) const
1103    {
1104        const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" :
1105                                      activ_func == SIGMOID_SYM ? "SIGMOID_SYM" :
1106                                      activ_func == GAUSSIAN ? "GAUSSIAN" : 0;
1107
1108        if( activ_func_name )
1109            fs << "activation_function" << activ_func_name;
1110        else
1111            fs << "activation_function_id" << activ_func;
1112
1113        if( activ_func != IDENTITY )
1114        {
1115            fs << "f_param1" << f_param1;
1116            fs << "f_param2" << f_param2;
1117        }
1118
1119        fs << "min_val" << min_val << "max_val" << max_val << "min_val1" << min_val1 << "max_val1" << max_val1;
1120
1121        fs << "training_params" << "{";
1122        if( params.trainMethod == ANN_MLP::BACKPROP )
1123        {
1124            fs << "train_method" << "BACKPROP";
1125            fs << "dw_scale" << params.bpDWScale;
1126            fs << "moment_scale" << params.bpMomentScale;
1127        }
1128        else if( params.trainMethod == ANN_MLP::RPROP )
1129        {
1130            fs << "train_method" << "RPROP";
1131            fs << "dw0" << params.rpDW0;
1132            fs << "dw_plus" << params.rpDWPlus;
1133            fs << "dw_minus" << params.rpDWMinus;
1134            fs << "dw_min" << params.rpDWMin;
1135            fs << "dw_max" << params.rpDWMax;
1136        }
1137        else
1138            CV_Error(CV_StsError, "Unknown training method");
1139
1140        fs << "term_criteria" << "{";
1141        if( params.termCrit.type & TermCriteria::EPS )
1142            fs << "epsilon" << params.termCrit.epsilon;
1143        if( params.termCrit.type & TermCriteria::COUNT )
1144            fs << "iterations" << params.termCrit.maxCount;
1145        fs << "}" << "}";
1146    }
1147
1148    void write( FileStorage& fs ) const
1149    {
1150        if( layer_sizes.empty() )
1151            return;
1152        int i, l_count = layer_count();
1153
1154        fs << "layer_sizes" << layer_sizes;
1155
1156        write_params( fs );
1157
1158        size_t esz = weights[0].elemSize();
1159
1160        fs << "input_scale" << "[";
1161        fs.writeRaw("d", weights[0].ptr(), weights[0].total()*esz);
1162
1163        fs << "]" << "output_scale" << "[";
1164        fs.writeRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz);
1165
1166        fs << "]" << "inv_output_scale" << "[";
1167        fs.writeRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz);
1168
1169        fs << "]" << "weights" << "[";
1170        for( i = 1; i < l_count; i++ )
1171        {
1172            fs << "[";
1173            fs.writeRaw("d", weights[i].ptr(), weights[i].total()*esz);
1174            fs << "]";
1175        }
1176        fs << "]";
1177    }
1178
1179    void read_params( const FileNode& fn )
1180    {
1181        String activ_func_name = (String)fn["activation_function"];
1182        if( !activ_func_name.empty() )
1183        {
1184            activ_func = activ_func_name == "SIGMOID_SYM" ? SIGMOID_SYM :
1185                         activ_func_name == "IDENTITY" ? IDENTITY :
1186                         activ_func_name == "GAUSSIAN" ? GAUSSIAN : -1;
1187            CV_Assert( activ_func >= 0 );
1188        }
1189        else
1190            activ_func = (int)fn["activation_function_id"];
1191
1192        f_param1 = (double)fn["f_param1"];
1193        f_param2 = (double)fn["f_param2"];
1194
1195        setActivationFunction( activ_func, f_param1, f_param2 );
1196
1197        min_val = (double)fn["min_val"];
1198        max_val = (double)fn["max_val"];
1199        min_val1 = (double)fn["min_val1"];
1200        max_val1 = (double)fn["max_val1"];
1201
1202        FileNode tpn = fn["training_params"];
1203        params = AnnParams();
1204
1205        if( !tpn.empty() )
1206        {
1207            String tmethod_name = (String)tpn["train_method"];
1208
1209            if( tmethod_name == "BACKPROP" )
1210            {
1211                params.trainMethod = ANN_MLP::BACKPROP;
1212                params.bpDWScale = (double)tpn["dw_scale"];
1213                params.bpMomentScale = (double)tpn["moment_scale"];
1214            }
1215            else if( tmethod_name == "RPROP" )
1216            {
1217                params.trainMethod = ANN_MLP::RPROP;
1218                params.rpDW0 = (double)tpn["dw0"];
1219                params.rpDWPlus = (double)tpn["dw_plus"];
1220                params.rpDWMinus = (double)tpn["dw_minus"];
1221                params.rpDWMin = (double)tpn["dw_min"];
1222                params.rpDWMax = (double)tpn["dw_max"];
1223            }
1224            else
1225                CV_Error(CV_StsParseError, "Unknown training method (should be BACKPROP or RPROP)");
1226
1227            FileNode tcn = tpn["term_criteria"];
1228            if( !tcn.empty() )
1229            {
1230                FileNode tcn_e = tcn["epsilon"];
1231                FileNode tcn_i = tcn["iterations"];
1232                params.termCrit.type = 0;
1233                if( !tcn_e.empty() )
1234                {
1235                    params.termCrit.type |= TermCriteria::EPS;
1236                    params.termCrit.epsilon = (double)tcn_e;
1237                }
1238                if( !tcn_i.empty() )
1239                {
1240                    params.termCrit.type |= TermCriteria::COUNT;
1241                    params.termCrit.maxCount = (int)tcn_i;
1242                }
1243            }
1244        }
1245    }
1246
1247    void read( const FileNode& fn )
1248    {
1249        clear();
1250
1251        vector<int> _layer_sizes;
1252        readVectorOrMat(fn["layer_sizes"], _layer_sizes);
1253        setLayerSizes( _layer_sizes );
1254
1255        int i, l_count = layer_count();
1256        read_params(fn);
1257
1258        size_t esz = weights[0].elemSize();
1259
1260        FileNode w = fn["input_scale"];
1261        w.readRaw("d", weights[0].ptr(), weights[0].total()*esz);
1262
1263        w = fn["output_scale"];
1264        w.readRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz);
1265
1266        w = fn["inv_output_scale"];
1267        w.readRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz);
1268
1269        FileNodeIterator w_it = fn["weights"].begin();
1270
1271        for( i = 1; i < l_count; i++, ++w_it )
1272            (*w_it).readRaw("d", weights[i].ptr(), weights[i].total()*esz);
1273        trained = true;
1274    }
1275
1276    Mat getWeights(int layerIdx) const
1277    {
1278        CV_Assert( 0 <= layerIdx && layerIdx < (int)weights.size() );
1279        return weights[layerIdx];
1280    }
1281
1282    bool isTrained() const
1283    {
1284        return trained;
1285    }
1286
1287    bool isClassifier() const
1288    {
1289        return false;
1290    }
1291
1292    int getVarCount() const
1293    {
1294        return layer_sizes.empty() ? 0 : layer_sizes[0];
1295    }
1296
1297    String getDefaultName() const
1298    {
1299        return "opencv_ml_ann_mlp";
1300    }
1301
1302    vector<int> layer_sizes;
1303    vector<Mat> weights;
1304    double f_param1, f_param2;
1305    double min_val, max_val, min_val1, max_val1;
1306    int activ_func;
1307    int max_lsize, max_buf_sz;
1308    AnnParams params;
1309    RNG rng;
1310    Mutex mtx;
1311    bool trained;
1312};
1313
1314
1315Ptr<ANN_MLP> ANN_MLP::create()
1316{
1317    return makePtr<ANN_MLPImpl>();
1318}
1319
1320}}
1321
1322/* End of file. */
1323