1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                        Intel License Agreement
11//
12// Copyright (C) 2000, Intel Corporation, all rights reserved.
13// Third party copyrights are property of their respective owners.
14//
15// Redistribution and use in source and binary forms, with or without modification,
16// are permitted provided that the following conditions are met:
17//
18//   * Redistribution's of source code must retain the above copyright notice,
19//     this list of conditions and the following disclaimer.
20//
21//   * Redistribution's in binary form must reproduce the above copyright notice,
22//     this list of conditions and the following disclaimer in the documentation
23//     and/or other materials provided with the distribution.
24//
25//   * The name of Intel Corporation may not be used to endorse or promote products
26//     derived from this software without specific prior written permission.
27//
28// This software is provided by the copyright holders and contributors "as is" and
29// any express or implied warranties, including, but not limited to, the implied
30// warranties of merchantability and fitness for a particular purpose are disclaimed.
31// In no event shall the Intel Corporation or contributors be liable for any direct,
32// indirect, incidental, special, exemplary, or consequential damages
33// (including, but not limited to, procurement of substitute goods or services;
34// loss of use, data, or profits; or business interruption) however caused
35// and on any theory of liability, whether in contract, strict liability,
36// or tort (including negligence or otherwise) arising in any way out of
37// the use of this software, even if advised of the possibility of such damage.
38//
39//M*/
40
41#include "old_ml_precomp.hpp"
42
43
44CvStatModel::CvStatModel()
45{
46    default_model_name = "my_stat_model";
47}
48
49
50CvStatModel::~CvStatModel()
51{
52    clear();
53}
54
55
56void CvStatModel::clear()
57{
58}
59
60
61void CvStatModel::save( const char* filename, const char* name ) const
62{
63    CvFileStorage* fs = 0;
64
65    CV_FUNCNAME( "CvStatModel::save" );
66
67    __BEGIN__;
68
69    CV_CALL( fs = cvOpenFileStorage( filename, 0, CV_STORAGE_WRITE ));
70    if( !fs )
71        CV_ERROR( CV_StsError, "Could not open the file storage. Check the path and permissions" );
72
73    write( fs, name ? name : default_model_name );
74
75    __END__;
76
77    cvReleaseFileStorage( &fs );
78}
79
80
81void CvStatModel::load( const char* filename, const char* name )
82{
83    CvFileStorage* fs = 0;
84
85    CV_FUNCNAME( "CvAlgorithm::load" );
86
87    __BEGIN__;
88
89    CvFileNode* model_node = 0;
90
91    CV_CALL( fs = cvOpenFileStorage( filename, 0, CV_STORAGE_READ ));
92    if( !fs )
93        EXIT;
94
95    if( name )
96        model_node = cvGetFileNodeByName( fs, 0, name );
97    else
98    {
99        CvFileNode* root = cvGetRootFileNode( fs );
100        if( root->data.seq->total > 0 )
101            model_node = (CvFileNode*)cvGetSeqElem( root->data.seq, 0 );
102    }
103
104    read( fs, model_node );
105
106    __END__;
107
108    cvReleaseFileStorage( &fs );
109}
110
111
112void CvStatModel::write( CvFileStorage*, const char* ) const
113{
114    OPENCV_ERROR( CV_StsNotImplemented, "CvStatModel::write", "" );
115}
116
117
118void CvStatModel::read( CvFileStorage*, CvFileNode* )
119{
120    OPENCV_ERROR( CV_StsNotImplemented, "CvStatModel::read", "" );
121}
122
123
124/* Calculates upper triangular matrix S, where A is a symmetrical matrix A=S'*S */
125static void cvChol( CvMat* A, CvMat* S )
126{
127    int dim = A->rows;
128
129    int i, j, k;
130    float sum;
131
132    for( i = 0; i < dim; i++ )
133    {
134        for( j = 0; j < i; j++ )
135            CV_MAT_ELEM(*S, float, i, j) = 0;
136
137        sum = 0;
138        for( k = 0; k < i; k++ )
139            sum += CV_MAT_ELEM(*S, float, k, i) * CV_MAT_ELEM(*S, float, k, i);
140
141        CV_MAT_ELEM(*S, float, i, i) = (float)sqrt(CV_MAT_ELEM(*A, float, i, i) - sum);
142
143        for( j = i + 1; j < dim; j++ )
144        {
145            sum = 0;
146            for( k = 0; k < i; k++ )
147                sum += CV_MAT_ELEM(*S, float, k, i) * CV_MAT_ELEM(*S, float, k, j);
148
149            CV_MAT_ELEM(*S, float, i, j) =
150                (CV_MAT_ELEM(*A, float, i, j) - sum) / CV_MAT_ELEM(*S, float, i, i);
151
152        }
153    }
154}
155
156/* Generates <sample> from multivariate normal distribution, where <mean> - is an
157   average row vector, <cov> - symmetric covariation matrix */
158CV_IMPL void cvRandMVNormal( CvMat* mean, CvMat* cov, CvMat* sample, CvRNG* rng )
159{
160    int dim = sample->cols;
161    int amount = sample->rows;
162
163    CvRNG state = rng ? *rng : cvRNG( cvGetTickCount() );
164    cvRandArr(&state, sample, CV_RAND_NORMAL, cvScalarAll(0), cvScalarAll(1) );
165
166    CvMat* utmat = cvCreateMat(dim, dim, sample->type);
167    CvMat* vect = cvCreateMatHeader(1, dim, sample->type);
168
169    cvChol(cov, utmat);
170
171    int i;
172    for( i = 0; i < amount; i++ )
173    {
174        cvGetRow(sample, vect, i);
175        cvMatMulAdd(vect, utmat, mean, vect);
176    }
177
178    cvReleaseMat(&vect);
179    cvReleaseMat(&utmat);
180}
181
182
183/* Generates <sample> of <amount> points from a discrete variate xi,
184   where Pr{xi = k} == probs[k], 0 < k < len - 1. */
185static void cvRandSeries( float probs[], int len, int sample[], int amount )
186{
187    CvMat* univals = cvCreateMat(1, amount, CV_32FC1);
188    float* knots = (float*)cvAlloc( len * sizeof(float) );
189
190    int i, j;
191
192    CvRNG state = cvRNG(-1);
193    cvRandArr(&state, univals, CV_RAND_UNI, cvScalarAll(0), cvScalarAll(1) );
194
195    knots[0] = probs[0];
196    for( i = 1; i < len; i++ )
197        knots[i] = knots[i - 1] + probs[i];
198
199    for( i = 0; i < amount; i++ )
200        for( j = 0; j < len; j++ )
201        {
202            if ( CV_MAT_ELEM(*univals, float, 0, i) <= knots[j] )
203            {
204                sample[i] = j;
205                break;
206            }
207        }
208
209    cvFree(&knots);
210}
211
212/* Generates <sample> from gaussian mixture distribution */
213CV_IMPL void cvRandGaussMixture( CvMat* means[],
214                                 CvMat* covs[],
215                                 float weights[],
216                                 int clsnum,
217                                 CvMat* sample,
218                                 CvMat* sampClasses )
219{
220    int dim = sample->cols;
221    int amount = sample->rows;
222
223    int i, clss;
224
225    int* sample_clsnum = (int*)cvAlloc( amount * sizeof(int) );
226    CvMat** utmats = (CvMat**)cvAlloc( clsnum * sizeof(CvMat*) );
227    CvMat* vect = cvCreateMatHeader(1, dim, CV_32FC1);
228
229    CvMat* classes;
230    if( sampClasses )
231        classes = sampClasses;
232    else
233        classes = cvCreateMat(1, amount, CV_32FC1);
234
235    CvRNG state = cvRNG(-1);
236    cvRandArr(&state, sample, CV_RAND_NORMAL, cvScalarAll(0), cvScalarAll(1));
237
238    cvRandSeries(weights, clsnum, sample_clsnum, amount);
239
240    for( i = 0; i < clsnum; i++ )
241    {
242        utmats[i] = cvCreateMat(dim, dim, CV_32FC1);
243        cvChol(covs[i], utmats[i]);
244    }
245
246    for( i = 0; i < amount; i++ )
247    {
248        CV_MAT_ELEM(*classes, float, 0, i) = (float)sample_clsnum[i];
249        cvGetRow(sample, vect, i);
250        clss = sample_clsnum[i];
251        cvMatMulAdd(vect, utmats[clss], means[clss], vect);
252    }
253
254    if( !sampClasses )
255        cvReleaseMat(&classes);
256    for( i = 0; i < clsnum; i++ )
257        cvReleaseMat(&utmats[i]);
258    cvFree(&utmats);
259    cvFree(&sample_clsnum);
260    cvReleaseMat(&vect);
261}
262
263
264CvMat* icvGenerateRandomClusterCenters ( int seed, const CvMat* data,
265                                         int num_of_clusters, CvMat* _centers )
266{
267    CvMat* centers = _centers;
268
269    CV_FUNCNAME("icvGenerateRandomClusterCenters");
270    __BEGIN__;
271
272    CvRNG rng;
273    CvMat data_comp, centers_comp;
274    CvPoint minLoc, maxLoc; // Not used, just for function "cvMinMaxLoc"
275    double minVal, maxVal;
276    int i;
277    int dim = data ? data->cols : 0;
278
279    if( ICV_IS_MAT_OF_TYPE(data, CV_32FC1) )
280    {
281        if( _centers && !ICV_IS_MAT_OF_TYPE (_centers, CV_32FC1) )
282        {
283            CV_ERROR(CV_StsBadArg,"");
284        }
285        else if( !_centers )
286            CV_CALL(centers = cvCreateMat (num_of_clusters, dim, CV_32FC1));
287    }
288    else if( ICV_IS_MAT_OF_TYPE(data, CV_64FC1) )
289    {
290        if( _centers && !ICV_IS_MAT_OF_TYPE (_centers, CV_64FC1) )
291        {
292            CV_ERROR(CV_StsBadArg,"");
293        }
294        else if( !_centers )
295            CV_CALL(centers = cvCreateMat (num_of_clusters, dim, CV_64FC1));
296    }
297    else
298        CV_ERROR (CV_StsBadArg,"");
299
300    if( num_of_clusters < 1 )
301        CV_ERROR (CV_StsBadArg,"");
302
303    rng = cvRNG(seed);
304    for (i = 0; i < dim; i++)
305    {
306        CV_CALL(cvGetCol (data, &data_comp, i));
307        CV_CALL(cvMinMaxLoc (&data_comp, &minVal, &maxVal, &minLoc, &maxLoc));
308        CV_CALL(cvGetCol (centers, &centers_comp, i));
309        CV_CALL(cvRandArr (&rng, &centers_comp, CV_RAND_UNI, cvScalarAll(minVal), cvScalarAll(maxVal)));
310    }
311
312    __END__;
313
314    if( (cvGetErrStatus () < 0) || (centers != _centers) )
315        cvReleaseMat (&centers);
316
317    return _centers ? _centers : centers;
318} // end of icvGenerateRandomClusterCenters
319
320// By S. Dilman - begin -
321
322#define ICV_RAND_MAX    4294967296 // == 2^32
323
324// static void cvRandRoundUni (CvMat* center,
325//                              float radius_small,
326//                              float radius_large,
327//                              CvMat* desired_matrix,
328//                              CvRNG* rng_state_ptr)
329// {
330//     float rad, norm, coefficient;
331//     int dim, size, i, j;
332//     CvMat *cov, sample;
333//     CvRNG rng_local;
334
335//     CV_FUNCNAME("cvRandRoundUni");
336//     __BEGIN__
337
338//     rng_local = *rng_state_ptr;
339
340//     CV_ASSERT ((radius_small >= 0) &&
341//                (radius_large > 0) &&
342//                (radius_small <= radius_large));
343//     CV_ASSERT (center && desired_matrix && rng_state_ptr);
344//     CV_ASSERT (center->rows == 1);
345//     CV_ASSERT (center->cols == desired_matrix->cols);
346
347//     dim = desired_matrix->cols;
348//     size = desired_matrix->rows;
349//     cov = cvCreateMat (dim, dim, CV_32FC1);
350//     cvSetIdentity (cov);
351//     cvRandMVNormal (center, cov, desired_matrix, &rng_local);
352
353//     for (i = 0; i < size; i++)
354//     {
355//         rad = (float)(cvRandReal(&rng_local)*(radius_large - radius_small) + radius_small);
356//         cvGetRow (desired_matrix, &sample, i);
357//         norm = (float) cvNorm (&sample, 0, CV_L2);
358//         coefficient = rad / norm;
359//         for (j = 0; j < dim; j++)
360//              CV_MAT_ELEM (sample, float, 0, j) *= coefficient;
361//     }
362
363//     __END__
364
365// }
366
367// By S. Dilman - end -
368
369static int CV_CDECL
370icvCmpIntegers( const void* a, const void* b )
371{
372    return *(const int*)a - *(const int*)b;
373}
374
375
376static int CV_CDECL
377icvCmpIntegersPtr( const void* _a, const void* _b )
378{
379    int a = **(const int**)_a;
380    int b = **(const int**)_b;
381    return (a < b ? -1 : 0)|(a > b);
382}
383
384
385static int icvCmpSparseVecElems( const void* a, const void* b )
386{
387    return ((CvSparseVecElem32f*)a)->idx - ((CvSparseVecElem32f*)b)->idx;
388}
389
390
391CvMat*
392cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates )
393{
394    CvMat* idx = 0;
395
396    CV_FUNCNAME( "cvPreprocessIndexArray" );
397
398    __BEGIN__;
399
400    int i, idx_total, idx_selected = 0, step, type, prev = INT_MIN, is_sorted = 1;
401    uchar* srcb = 0;
402    int* srci = 0;
403    int* dsti;
404
405    if( !CV_IS_MAT(idx_arr) )
406        CV_ERROR( CV_StsBadArg, "Invalid index array" );
407
408    if( idx_arr->rows != 1 && idx_arr->cols != 1 )
409        CV_ERROR( CV_StsBadSize, "the index array must be 1-dimensional" );
410
411    idx_total = idx_arr->rows + idx_arr->cols - 1;
412    srcb = idx_arr->data.ptr;
413    srci = idx_arr->data.i;
414
415    type = CV_MAT_TYPE(idx_arr->type);
416    step = CV_IS_MAT_CONT(idx_arr->type) ? 1 : idx_arr->step/CV_ELEM_SIZE(type);
417
418    switch( type )
419    {
420    case CV_8UC1:
421    case CV_8SC1:
422        // idx_arr is array of 1's and 0's -
423        // i.e. it is a mask of the selected components
424        if( idx_total != data_arr_size )
425            CV_ERROR( CV_StsUnmatchedSizes,
426            "Component mask should contain as many elements as the total number of input variables" );
427
428        for( i = 0; i < idx_total; i++ )
429            idx_selected += srcb[i*step] != 0;
430
431        if( idx_selected == 0 )
432            CV_ERROR( CV_StsOutOfRange, "No components/input_variables is selected!" );
433
434        break;
435    case CV_32SC1:
436        // idx_arr is array of integer indices of selected components
437        if( idx_total > data_arr_size )
438            CV_ERROR( CV_StsOutOfRange,
439            "index array may not contain more elements than the total number of input variables" );
440        idx_selected = idx_total;
441        // check if sorted already
442        for( i = 0; i < idx_total; i++ )
443        {
444            int val = srci[i*step];
445            if( val >= prev )
446            {
447                is_sorted = 0;
448                break;
449            }
450            prev = val;
451        }
452        break;
453    default:
454        CV_ERROR( CV_StsUnsupportedFormat, "Unsupported index array data type "
455                                           "(it should be 8uC1, 8sC1 or 32sC1)" );
456    }
457
458    CV_CALL( idx = cvCreateMat( 1, idx_selected, CV_32SC1 ));
459    dsti = idx->data.i;
460
461    if( type < CV_32SC1 )
462    {
463        for( i = 0; i < idx_total; i++ )
464            if( srcb[i*step] )
465                *dsti++ = i;
466    }
467    else
468    {
469        for( i = 0; i < idx_total; i++ )
470            dsti[i] = srci[i*step];
471
472        if( !is_sorted )
473            qsort( dsti, idx_total, sizeof(dsti[0]), icvCmpIntegers );
474
475        if( dsti[0] < 0 || dsti[idx_total-1] >= data_arr_size )
476            CV_ERROR( CV_StsOutOfRange, "the index array elements are out of range" );
477
478        if( check_for_duplicates )
479        {
480            for( i = 1; i < idx_total; i++ )
481                if( dsti[i] <= dsti[i-1] )
482                    CV_ERROR( CV_StsBadArg, "There are duplicated index array elements" );
483        }
484    }
485
486    __END__;
487
488    if( cvGetErrStatus() < 0 )
489        cvReleaseMat( &idx );
490
491    return idx;
492}
493
494
495CvMat*
496cvPreprocessVarType( const CvMat* var_type, const CvMat* var_idx,
497                     int var_count, int* response_type )
498{
499    CvMat* out_var_type = 0;
500    CV_FUNCNAME( "cvPreprocessVarType" );
501
502    if( response_type )
503        *response_type = -1;
504
505    __BEGIN__;
506
507    int i, tm_size, tm_step;
508    //int* map = 0;
509    const uchar* src;
510    uchar* dst;
511
512    if( !CV_IS_MAT(var_type) )
513        CV_ERROR( var_type ? CV_StsBadArg : CV_StsNullPtr, "Invalid or absent var_type array" );
514
515    if( var_type->rows != 1 && var_type->cols != 1 )
516        CV_ERROR( CV_StsBadSize, "var_type array must be 1-dimensional" );
517
518    if( !CV_IS_MASK_ARR(var_type))
519        CV_ERROR( CV_StsUnsupportedFormat, "type mask must be 8uC1 or 8sC1 array" );
520
521    tm_size = var_type->rows + var_type->cols - 1;
522    tm_step = var_type->rows == 1 ? 1 : var_type->step/CV_ELEM_SIZE(var_type->type);
523
524    if( /*tm_size != var_count &&*/ tm_size != var_count + 1 )
525        CV_ERROR( CV_StsBadArg,
526        "type mask must be of <input var count> + 1 size" );
527
528    if( response_type && tm_size > var_count )
529        *response_type = var_type->data.ptr[var_count*tm_step] != 0;
530
531    if( var_idx )
532    {
533        if( !CV_IS_MAT(var_idx) || CV_MAT_TYPE(var_idx->type) != CV_32SC1 ||
534            (var_idx->rows != 1 && var_idx->cols != 1) || !CV_IS_MAT_CONT(var_idx->type) )
535            CV_ERROR( CV_StsBadArg, "var index array should be continuous 1-dimensional integer vector" );
536        if( var_idx->rows + var_idx->cols - 1 > var_count )
537            CV_ERROR( CV_StsBadSize, "var index array is too large" );
538        //map = var_idx->data.i;
539        var_count = var_idx->rows + var_idx->cols - 1;
540    }
541
542    CV_CALL( out_var_type = cvCreateMat( 1, var_count, CV_8UC1 ));
543    src = var_type->data.ptr;
544    dst = out_var_type->data.ptr;
545
546    for( i = 0; i < var_count; i++ )
547    {
548        //int idx = map ? map[i] : i;
549        assert( (unsigned)/*idx*/i < (unsigned)tm_size );
550        dst[i] = (uchar)(src[/*idx*/i*tm_step] != 0);
551    }
552
553    __END__;
554
555    return out_var_type;
556}
557
558
559CvMat*
560cvPreprocessOrderedResponses( const CvMat* responses, const CvMat* sample_idx, int sample_all )
561{
562    CvMat* out_responses = 0;
563
564    CV_FUNCNAME( "cvPreprocessOrderedResponses" );
565
566    __BEGIN__;
567
568    int i, r_type, r_step;
569    const int* map = 0;
570    float* dst;
571    int sample_count = sample_all;
572
573    if( !CV_IS_MAT(responses) )
574        CV_ERROR( CV_StsBadArg, "Invalid response array" );
575
576    if( responses->rows != 1 && responses->cols != 1 )
577        CV_ERROR( CV_StsBadSize, "Response array must be 1-dimensional" );
578
579    if( responses->rows + responses->cols - 1 != sample_count )
580        CV_ERROR( CV_StsUnmatchedSizes,
581        "Response array must contain as many elements as the total number of samples" );
582
583    r_type = CV_MAT_TYPE(responses->type);
584    if( r_type != CV_32FC1 && r_type != CV_32SC1 )
585        CV_ERROR( CV_StsUnsupportedFormat, "Unsupported response type" );
586
587    r_step = responses->step ? responses->step / CV_ELEM_SIZE(responses->type) : 1;
588
589    if( r_type == CV_32FC1 && CV_IS_MAT_CONT(responses->type) && !sample_idx )
590    {
591        out_responses = cvCloneMat( responses );
592        EXIT;
593    }
594
595    if( sample_idx )
596    {
597        if( !CV_IS_MAT(sample_idx) || CV_MAT_TYPE(sample_idx->type) != CV_32SC1 ||
598            (sample_idx->rows != 1 && sample_idx->cols != 1) || !CV_IS_MAT_CONT(sample_idx->type) )
599            CV_ERROR( CV_StsBadArg, "sample index array should be continuous 1-dimensional integer vector" );
600        if( sample_idx->rows + sample_idx->cols - 1 > sample_count )
601            CV_ERROR( CV_StsBadSize, "sample index array is too large" );
602        map = sample_idx->data.i;
603        sample_count = sample_idx->rows + sample_idx->cols - 1;
604    }
605
606    CV_CALL( out_responses = cvCreateMat( 1, sample_count, CV_32FC1 ));
607
608    dst = out_responses->data.fl;
609    if( r_type == CV_32FC1 )
610    {
611        const float* src = responses->data.fl;
612        for( i = 0; i < sample_count; i++ )
613        {
614            int idx = map ? map[i] : i;
615            assert( (unsigned)idx < (unsigned)sample_all );
616            dst[i] = src[idx*r_step];
617        }
618    }
619    else
620    {
621        const int* src = responses->data.i;
622        for( i = 0; i < sample_count; i++ )
623        {
624            int idx = map ? map[i] : i;
625            assert( (unsigned)idx < (unsigned)sample_all );
626            dst[i] = (float)src[idx*r_step];
627        }
628    }
629
630    __END__;
631
632    return out_responses;
633}
634
635CvMat*
636cvPreprocessCategoricalResponses( const CvMat* responses,
637    const CvMat* sample_idx, int sample_all,
638    CvMat** out_response_map, CvMat** class_counts )
639{
640    CvMat* out_responses = 0;
641    int** response_ptr = 0;
642
643    CV_FUNCNAME( "cvPreprocessCategoricalResponses" );
644
645    if( out_response_map )
646        *out_response_map = 0;
647
648    if( class_counts )
649        *class_counts = 0;
650
651    __BEGIN__;
652
653    int i, r_type, r_step;
654    int cls_count = 1, prev_cls, prev_i;
655    const int* map = 0;
656    const int* srci;
657    const float* srcfl;
658    int* dst;
659    int* cls_map;
660    int* cls_counts = 0;
661    int sample_count = sample_all;
662
663    if( !CV_IS_MAT(responses) )
664        CV_ERROR( CV_StsBadArg, "Invalid response array" );
665
666    if( responses->rows != 1 && responses->cols != 1 )
667        CV_ERROR( CV_StsBadSize, "Response array must be 1-dimensional" );
668
669    if( responses->rows + responses->cols - 1 != sample_count )
670        CV_ERROR( CV_StsUnmatchedSizes,
671        "Response array must contain as many elements as the total number of samples" );
672
673    r_type = CV_MAT_TYPE(responses->type);
674    if( r_type != CV_32FC1 && r_type != CV_32SC1 )
675        CV_ERROR( CV_StsUnsupportedFormat, "Unsupported response type" );
676
677    r_step = responses->rows == 1 ? 1 : responses->step / CV_ELEM_SIZE(responses->type);
678
679    if( sample_idx )
680    {
681        if( !CV_IS_MAT(sample_idx) || CV_MAT_TYPE(sample_idx->type) != CV_32SC1 ||
682            (sample_idx->rows != 1 && sample_idx->cols != 1) || !CV_IS_MAT_CONT(sample_idx->type) )
683            CV_ERROR( CV_StsBadArg, "sample index array should be continuous 1-dimensional integer vector" );
684        if( sample_idx->rows + sample_idx->cols - 1 > sample_count )
685            CV_ERROR( CV_StsBadSize, "sample index array is too large" );
686        map = sample_idx->data.i;
687        sample_count = sample_idx->rows + sample_idx->cols - 1;
688    }
689
690    CV_CALL( out_responses = cvCreateMat( 1, sample_count, CV_32SC1 ));
691
692    if( !out_response_map )
693        CV_ERROR( CV_StsNullPtr, "out_response_map pointer is NULL" );
694
695    CV_CALL( response_ptr = (int**)cvAlloc( sample_count*sizeof(response_ptr[0])));
696
697    srci = responses->data.i;
698    srcfl = responses->data.fl;
699    dst = out_responses->data.i;
700
701    for( i = 0; i < sample_count; i++ )
702    {
703        int idx = map ? map[i] : i;
704        assert( (unsigned)idx < (unsigned)sample_all );
705        if( r_type == CV_32SC1 )
706            dst[i] = srci[idx*r_step];
707        else
708        {
709            float rf = srcfl[idx*r_step];
710            int ri = cvRound(rf);
711            if( ri != rf )
712            {
713                char buf[100];
714                sprintf( buf, "response #%d is not integral", idx );
715                CV_ERROR( CV_StsBadArg, buf );
716            }
717            dst[i] = ri;
718        }
719        response_ptr[i] = dst + i;
720    }
721
722    qsort( response_ptr, sample_count, sizeof(int*), icvCmpIntegersPtr );
723
724    // count the classes
725    for( i = 1; i < sample_count; i++ )
726        cls_count += *response_ptr[i] != *response_ptr[i-1];
727
728    if( cls_count < 2 )
729        CV_ERROR( CV_StsBadArg, "There is only a single class" );
730
731    CV_CALL( *out_response_map = cvCreateMat( 1, cls_count, CV_32SC1 ));
732
733    if( class_counts )
734    {
735        CV_CALL( *class_counts = cvCreateMat( 1, cls_count, CV_32SC1 ));
736        cls_counts = (*class_counts)->data.i;
737    }
738
739    // compact the class indices and build the map
740    prev_cls = ~*response_ptr[0];
741    cls_count = -1;
742    cls_map = (*out_response_map)->data.i;
743
744    for( i = 0, prev_i = -1; i < sample_count; i++ )
745    {
746        int cur_cls = *response_ptr[i];
747        if( cur_cls != prev_cls )
748        {
749            if( cls_counts && cls_count >= 0 )
750                cls_counts[cls_count] = i - prev_i;
751            cls_map[++cls_count] = prev_cls = cur_cls;
752            prev_i = i;
753        }
754        *response_ptr[i] = cls_count;
755    }
756
757    if( cls_counts )
758        cls_counts[cls_count] = i - prev_i;
759
760    __END__;
761
762    cvFree( &response_ptr );
763
764    return out_responses;
765}
766
767
768const float**
769cvGetTrainSamples( const CvMat* train_data, int tflag,
770                   const CvMat* var_idx, const CvMat* sample_idx,
771                   int* _var_count, int* _sample_count,
772                   bool always_copy_data )
773{
774    float** samples = 0;
775
776    CV_FUNCNAME( "cvGetTrainSamples" );
777
778    __BEGIN__;
779
780    int i, j, var_count, sample_count, s_step, v_step;
781    bool copy_data;
782    const float* data;
783    const int *s_idx, *v_idx;
784
785    if( !CV_IS_MAT(train_data) )
786        CV_ERROR( CV_StsBadArg, "Invalid or NULL training data matrix" );
787
788    var_count = var_idx ? var_idx->cols + var_idx->rows - 1 :
789                tflag == CV_ROW_SAMPLE ? train_data->cols : train_data->rows;
790    sample_count = sample_idx ? sample_idx->cols + sample_idx->rows - 1 :
791                   tflag == CV_ROW_SAMPLE ? train_data->rows : train_data->cols;
792
793    if( _var_count )
794        *_var_count = var_count;
795
796    if( _sample_count )
797        *_sample_count = sample_count;
798
799    copy_data = tflag != CV_ROW_SAMPLE || var_idx || always_copy_data;
800
801    CV_CALL( samples = (float**)cvAlloc(sample_count*sizeof(samples[0]) +
802                (copy_data ? 1 : 0)*var_count*sample_count*sizeof(samples[0][0])) );
803    data = train_data->data.fl;
804    s_step = train_data->step / sizeof(samples[0][0]);
805    v_step = 1;
806    s_idx = sample_idx ? sample_idx->data.i : 0;
807    v_idx = var_idx ? var_idx->data.i : 0;
808
809    if( !copy_data )
810    {
811        for( i = 0; i < sample_count; i++ )
812            samples[i] = (float*)(data + (s_idx ? s_idx[i] : i)*s_step);
813    }
814    else
815    {
816        samples[0] = (float*)(samples + sample_count);
817        if( tflag != CV_ROW_SAMPLE )
818            CV_SWAP( s_step, v_step, i );
819
820        for( i = 0; i < sample_count; i++ )
821        {
822            float* dst = samples[i] = samples[0] + i*var_count;
823            const float* src = data + (s_idx ? s_idx[i] : i)*s_step;
824
825            if( !v_idx )
826                for( j = 0; j < var_count; j++ )
827                    dst[j] = src[j*v_step];
828            else
829                for( j = 0; j < var_count; j++ )
830                    dst[j] = src[v_idx[j]*v_step];
831        }
832    }
833
834    __END__;
835
836    return (const float**)samples;
837}
838
839
840void
841cvCheckTrainData( const CvMat* train_data, int tflag,
842                  const CvMat* missing_mask,
843                  int* var_all, int* sample_all )
844{
845    CV_FUNCNAME( "cvCheckTrainData" );
846
847    if( var_all )
848        *var_all = 0;
849
850    if( sample_all )
851        *sample_all = 0;
852
853    __BEGIN__;
854
855    // check parameter types and sizes
856    if( !CV_IS_MAT(train_data) || CV_MAT_TYPE(train_data->type) != CV_32FC1 )
857        CV_ERROR( CV_StsBadArg, "train data must be floating-point matrix" );
858
859    if( missing_mask )
860    {
861        if( !CV_IS_MAT(missing_mask) || !CV_IS_MASK_ARR(missing_mask) ||
862            !CV_ARE_SIZES_EQ(train_data, missing_mask) )
863            CV_ERROR( CV_StsBadArg,
864            "missing value mask must be 8-bit matrix of the same size as training data" );
865    }
866
867    if( tflag != CV_ROW_SAMPLE && tflag != CV_COL_SAMPLE )
868        CV_ERROR( CV_StsBadArg,
869        "Unknown training data layout (must be CV_ROW_SAMPLE or CV_COL_SAMPLE)" );
870
871    if( var_all )
872        *var_all = tflag == CV_ROW_SAMPLE ? train_data->cols : train_data->rows;
873
874    if( sample_all )
875        *sample_all = tflag == CV_ROW_SAMPLE ? train_data->rows : train_data->cols;
876
877    __END__;
878}
879
880
881int
882cvPrepareTrainData( const char* /*funcname*/,
883                    const CvMat* train_data, int tflag,
884                    const CvMat* responses, int response_type,
885                    const CvMat* var_idx,
886                    const CvMat* sample_idx,
887                    bool always_copy_data,
888                    const float*** out_train_samples,
889                    int* _sample_count,
890                    int* _var_count,
891                    int* _var_all,
892                    CvMat** out_responses,
893                    CvMat** out_response_map,
894                    CvMat** out_var_idx,
895                    CvMat** out_sample_idx )
896{
897    int ok = 0;
898    CvMat* _var_idx = 0;
899    CvMat* _sample_idx = 0;
900    CvMat* _responses = 0;
901    int sample_all = 0, sample_count = 0, var_all = 0, var_count = 0;
902
903    CV_FUNCNAME( "cvPrepareTrainData" );
904
905    // step 0. clear all the output pointers to ensure we do not try
906    // to call free() with uninitialized pointers
907    if( out_responses )
908        *out_responses = 0;
909
910    if( out_response_map )
911        *out_response_map = 0;
912
913    if( out_var_idx )
914        *out_var_idx = 0;
915
916    if( out_sample_idx )
917        *out_sample_idx = 0;
918
919    if( out_train_samples )
920        *out_train_samples = 0;
921
922    if( _sample_count )
923        *_sample_count = 0;
924
925    if( _var_count )
926        *_var_count = 0;
927
928    if( _var_all )
929        *_var_all = 0;
930
931    __BEGIN__;
932
933    if( !out_train_samples )
934        CV_ERROR( CV_StsBadArg, "output pointer to train samples is NULL" );
935
936    CV_CALL( cvCheckTrainData( train_data, tflag, 0, &var_all, &sample_all ));
937
938    if( sample_idx )
939        CV_CALL( _sample_idx = cvPreprocessIndexArray( sample_idx, sample_all ));
940    if( var_idx )
941        CV_CALL( _var_idx = cvPreprocessIndexArray( var_idx, var_all ));
942
943    if( responses )
944    {
945        if( !out_responses )
946            CV_ERROR( CV_StsNullPtr, "output response pointer is NULL" );
947
948        if( response_type == CV_VAR_NUMERICAL )
949        {
950            CV_CALL( _responses = cvPreprocessOrderedResponses( responses,
951                                                _sample_idx, sample_all ));
952        }
953        else
954        {
955            CV_CALL( _responses = cvPreprocessCategoricalResponses( responses,
956                                _sample_idx, sample_all, out_response_map, 0 ));
957        }
958    }
959
960    CV_CALL( *out_train_samples =
961                cvGetTrainSamples( train_data, tflag, _var_idx, _sample_idx,
962                                   &var_count, &sample_count, always_copy_data ));
963
964    ok = 1;
965
966    __END__;
967
968    if( ok )
969    {
970        if( out_responses )
971            *out_responses = _responses, _responses = 0;
972
973        if( out_var_idx )
974            *out_var_idx = _var_idx, _var_idx = 0;
975
976        if( out_sample_idx )
977            *out_sample_idx = _sample_idx, _sample_idx = 0;
978
979        if( _sample_count )
980            *_sample_count = sample_count;
981
982        if( _var_count )
983            *_var_count = var_count;
984
985        if( _var_all )
986            *_var_all = var_all;
987    }
988    else
989    {
990        if( out_response_map )
991            cvReleaseMat( out_response_map );
992        cvFree( out_train_samples );
993    }
994
995    if( _responses != responses )
996        cvReleaseMat( &_responses );
997    cvReleaseMat( &_var_idx );
998    cvReleaseMat( &_sample_idx );
999
1000    return ok;
1001}
1002
1003
1004typedef struct CvSampleResponsePair
1005{
1006    const float* sample;
1007    const uchar* mask;
1008    int response;
1009    int index;
1010}
1011CvSampleResponsePair;
1012
1013
1014static int
1015CV_CDECL icvCmpSampleResponsePairs( const void* a, const void* b )
1016{
1017    int ra = ((const CvSampleResponsePair*)a)->response;
1018    int rb = ((const CvSampleResponsePair*)b)->response;
1019    int ia = ((const CvSampleResponsePair*)a)->index;
1020    int ib = ((const CvSampleResponsePair*)b)->index;
1021
1022    return ra < rb ? -1 : ra > rb ? 1 : ia - ib;
1023    //return (ra > rb ? -1 : 0)|(ra < rb);
1024}
1025
1026
1027void
1028cvSortSamplesByClasses( const float** samples, const CvMat* classes,
1029                        int* class_ranges, const uchar** mask )
1030{
1031    CvSampleResponsePair* pairs = 0;
1032    CV_FUNCNAME( "cvSortSamplesByClasses" );
1033
1034    __BEGIN__;
1035
1036    int i, k = 0, sample_count;
1037
1038    if( !samples || !classes || !class_ranges )
1039        CV_ERROR( CV_StsNullPtr, "INTERNAL ERROR: some of the args are NULL pointers" );
1040
1041    if( classes->rows != 1 || CV_MAT_TYPE(classes->type) != CV_32SC1 )
1042        CV_ERROR( CV_StsBadArg, "classes array must be a single row of integers" );
1043
1044    sample_count = classes->cols;
1045    CV_CALL( pairs = (CvSampleResponsePair*)cvAlloc( (sample_count+1)*sizeof(pairs[0])));
1046
1047    for( i = 0; i < sample_count; i++ )
1048    {
1049        pairs[i].sample = samples[i];
1050        pairs[i].mask = (mask) ? (mask[i]) : 0;
1051        pairs[i].response = classes->data.i[i];
1052        pairs[i].index = i;
1053        assert( classes->data.i[i] >= 0 );
1054    }
1055
1056    qsort( pairs, sample_count, sizeof(pairs[0]), icvCmpSampleResponsePairs );
1057    pairs[sample_count].response = -1;
1058    class_ranges[0] = 0;
1059
1060    for( i = 0; i < sample_count; i++ )
1061    {
1062        samples[i] = pairs[i].sample;
1063        if (mask)
1064            mask[i] = pairs[i].mask;
1065        classes->data.i[i] = pairs[i].response;
1066
1067        if( pairs[i].response != pairs[i+1].response )
1068            class_ranges[++k] = i+1;
1069    }
1070
1071    __END__;
1072
1073    cvFree( &pairs );
1074}
1075
1076
1077void
1078cvPreparePredictData( const CvArr* _sample, int dims_all,
1079                      const CvMat* comp_idx, int class_count,
1080                      const CvMat* prob, float** _row_sample,
1081                      int as_sparse )
1082{
1083    float* row_sample = 0;
1084    int* inverse_comp_idx = 0;
1085
1086    CV_FUNCNAME( "cvPreparePredictData" );
1087
1088    __BEGIN__;
1089
1090    const CvMat* sample = (const CvMat*)_sample;
1091    float* sample_data;
1092    int sample_step;
1093    int is_sparse = CV_IS_SPARSE_MAT(sample);
1094    int d, sizes[CV_MAX_DIM];
1095    int i, dims_selected;
1096    int vec_size;
1097
1098    if( !is_sparse && !CV_IS_MAT(sample) )
1099        CV_ERROR( !sample ? CV_StsNullPtr : CV_StsBadArg, "The sample is not a valid vector" );
1100
1101    if( cvGetElemType( sample ) != CV_32FC1 )
1102        CV_ERROR( CV_StsUnsupportedFormat, "Input sample must have 32fC1 type" );
1103
1104    CV_CALL( d = cvGetDims( sample, sizes ));
1105
1106    if( !((is_sparse && d == 1) || (!is_sparse && d == 2 && (sample->rows == 1 || sample->cols == 1))) )
1107        CV_ERROR( CV_StsBadSize, "Input sample must be 1-dimensional vector" );
1108
1109    if( d == 1 )
1110        sizes[1] = 1;
1111
1112    if( sizes[0] + sizes[1] - 1 != dims_all )
1113        CV_ERROR( CV_StsUnmatchedSizes,
1114        "The sample size is different from what has been used for training" );
1115
1116    if( !_row_sample )
1117        CV_ERROR( CV_StsNullPtr, "INTERNAL ERROR: The row_sample pointer is NULL" );
1118
1119    if( comp_idx && (!CV_IS_MAT(comp_idx) || comp_idx->rows != 1 ||
1120        CV_MAT_TYPE(comp_idx->type) != CV_32SC1) )
1121        CV_ERROR( CV_StsBadArg, "INTERNAL ERROR: invalid comp_idx" );
1122
1123    dims_selected = comp_idx ? comp_idx->cols : dims_all;
1124
1125    if( prob )
1126    {
1127        if( !CV_IS_MAT(prob) )
1128            CV_ERROR( CV_StsBadArg, "The output matrix of probabilities is invalid" );
1129
1130        if( (prob->rows != 1 && prob->cols != 1) ||
1131            (CV_MAT_TYPE(prob->type) != CV_32FC1 &&
1132            CV_MAT_TYPE(prob->type) != CV_64FC1) )
1133            CV_ERROR( CV_StsBadSize,
1134            "The matrix of probabilities must be 1-dimensional vector of 32fC1 type" );
1135
1136        if( prob->rows + prob->cols - 1 != class_count )
1137            CV_ERROR( CV_StsUnmatchedSizes,
1138            "The vector of probabilities must contain as many elements as "
1139            "the number of classes in the training set" );
1140    }
1141
1142    vec_size = !as_sparse ? dims_selected*sizeof(row_sample[0]) :
1143                (dims_selected + 1)*sizeof(CvSparseVecElem32f);
1144
1145    if( CV_IS_MAT(sample) )
1146    {
1147        sample_data = sample->data.fl;
1148        sample_step = CV_IS_MAT_CONT(sample->type) ? 1 : sample->step/sizeof(row_sample[0]);
1149
1150        if( !comp_idx && CV_IS_MAT_CONT(sample->type) && !as_sparse )
1151            *_row_sample = sample_data;
1152        else
1153        {
1154            CV_CALL( row_sample = (float*)cvAlloc( vec_size ));
1155
1156            if( !comp_idx )
1157                for( i = 0; i < dims_selected; i++ )
1158                    row_sample[i] = sample_data[sample_step*i];
1159            else
1160            {
1161                int* comp = comp_idx->data.i;
1162                for( i = 0; i < dims_selected; i++ )
1163                    row_sample[i] = sample_data[sample_step*comp[i]];
1164            }
1165
1166            *_row_sample = row_sample;
1167        }
1168
1169        if( as_sparse )
1170        {
1171            const float* src = (const float*)row_sample;
1172            CvSparseVecElem32f* dst = (CvSparseVecElem32f*)row_sample;
1173
1174            dst[dims_selected].idx = -1;
1175            for( i = dims_selected - 1; i >= 0; i-- )
1176            {
1177                dst[i].idx = i;
1178                dst[i].val = src[i];
1179            }
1180        }
1181    }
1182    else
1183    {
1184        CvSparseNode* node;
1185        CvSparseMatIterator mat_iterator;
1186        const CvSparseMat* sparse = (const CvSparseMat*)sample;
1187        assert( is_sparse );
1188
1189        node = cvInitSparseMatIterator( sparse, &mat_iterator );
1190        CV_CALL( row_sample = (float*)cvAlloc( vec_size ));
1191
1192        if( comp_idx )
1193        {
1194            CV_CALL( inverse_comp_idx = (int*)cvAlloc( dims_all*sizeof(int) ));
1195            memset( inverse_comp_idx, -1, dims_all*sizeof(int) );
1196            for( i = 0; i < dims_selected; i++ )
1197                inverse_comp_idx[comp_idx->data.i[i]] = i;
1198        }
1199
1200        if( !as_sparse )
1201        {
1202            memset( row_sample, 0, vec_size );
1203
1204            for( ; node != 0; node = cvGetNextSparseNode(&mat_iterator) )
1205            {
1206                int idx = *CV_NODE_IDX( sparse, node );
1207                if( inverse_comp_idx )
1208                {
1209                    idx = inverse_comp_idx[idx];
1210                    if( idx < 0 )
1211                        continue;
1212                }
1213                row_sample[idx] = *(float*)CV_NODE_VAL( sparse, node );
1214            }
1215        }
1216        else
1217        {
1218            CvSparseVecElem32f* ptr = (CvSparseVecElem32f*)row_sample;
1219
1220            for( ; node != 0; node = cvGetNextSparseNode(&mat_iterator) )
1221            {
1222                int idx = *CV_NODE_IDX( sparse, node );
1223                if( inverse_comp_idx )
1224                {
1225                    idx = inverse_comp_idx[idx];
1226                    if( idx < 0 )
1227                        continue;
1228                }
1229                ptr->idx = idx;
1230                ptr->val = *(float*)CV_NODE_VAL( sparse, node );
1231                ptr++;
1232            }
1233
1234            qsort( row_sample, ptr - (CvSparseVecElem32f*)row_sample,
1235                   sizeof(ptr[0]), icvCmpSparseVecElems );
1236            ptr->idx = -1;
1237        }
1238
1239        *_row_sample = row_sample;
1240    }
1241
1242    __END__;
1243
1244    if( inverse_comp_idx )
1245        cvFree( &inverse_comp_idx );
1246
1247    if( cvGetErrStatus() < 0 && _row_sample )
1248    {
1249        cvFree( &row_sample );
1250        *_row_sample = 0;
1251    }
1252}
1253
1254
1255static void
1256icvConvertDataToSparse( const uchar* src, int src_step, int src_type,
1257                        uchar* dst, int dst_step, int dst_type,
1258                        CvSize size, int* idx )
1259{
1260    CV_FUNCNAME( "icvConvertDataToSparse" );
1261
1262    __BEGIN__;
1263
1264    int i, j;
1265    src_type = CV_MAT_TYPE(src_type);
1266    dst_type = CV_MAT_TYPE(dst_type);
1267
1268    if( CV_MAT_CN(src_type) != 1 || CV_MAT_CN(dst_type) != 1 )
1269        CV_ERROR( CV_StsUnsupportedFormat, "The function supports only single-channel arrays" );
1270
1271    if( src_step == 0 )
1272        src_step = CV_ELEM_SIZE(src_type);
1273
1274    if( dst_step == 0 )
1275        dst_step = CV_ELEM_SIZE(dst_type);
1276
1277    // if there is no "idx" and if both arrays are continuous,
1278    // do the whole processing (copying or conversion) in a single loop
1279    if( !idx && CV_ELEM_SIZE(src_type)*size.width == src_step &&
1280        CV_ELEM_SIZE(dst_type)*size.width == dst_step )
1281    {
1282        size.width *= size.height;
1283        size.height = 1;
1284    }
1285
1286    if( src_type == dst_type )
1287    {
1288        int full_width = CV_ELEM_SIZE(dst_type)*size.width;
1289
1290        if( full_width == sizeof(int) ) // another common case: copy int's or float's
1291            for( i = 0; i < size.height; i++, src += src_step )
1292                *(int*)(dst + dst_step*(idx ? idx[i] : i)) = *(int*)src;
1293        else
1294            for( i = 0; i < size.height; i++, src += src_step )
1295                memcpy( dst + dst_step*(idx ? idx[i] : i), src, full_width );
1296    }
1297    else if( src_type == CV_32SC1 && (dst_type == CV_32FC1 || dst_type == CV_64FC1) )
1298        for( i = 0; i < size.height; i++, src += src_step )
1299        {
1300            uchar* _dst = dst + dst_step*(idx ? idx[i] : i);
1301            if( dst_type == CV_32FC1 )
1302                for( j = 0; j < size.width; j++ )
1303                    ((float*)_dst)[j] = (float)((int*)src)[j];
1304            else
1305                for( j = 0; j < size.width; j++ )
1306                    ((double*)_dst)[j] = ((int*)src)[j];
1307        }
1308    else if( (src_type == CV_32FC1 || src_type == CV_64FC1) && dst_type == CV_32SC1 )
1309        for( i = 0; i < size.height; i++, src += src_step )
1310        {
1311            uchar* _dst = dst + dst_step*(idx ? idx[i] : i);
1312            if( src_type == CV_32FC1 )
1313                for( j = 0; j < size.width; j++ )
1314                    ((int*)_dst)[j] = cvRound(((float*)src)[j]);
1315            else
1316                for( j = 0; j < size.width; j++ )
1317                    ((int*)_dst)[j] = cvRound(((double*)src)[j]);
1318        }
1319    else if( (src_type == CV_32FC1 && dst_type == CV_64FC1) ||
1320             (src_type == CV_64FC1 && dst_type == CV_32FC1) )
1321        for( i = 0; i < size.height; i++, src += src_step )
1322        {
1323            uchar* _dst = dst + dst_step*(idx ? idx[i] : i);
1324            if( src_type == CV_32FC1 )
1325                for( j = 0; j < size.width; j++ )
1326                    ((double*)_dst)[j] = ((float*)src)[j];
1327            else
1328                for( j = 0; j < size.width; j++ )
1329                    ((float*)_dst)[j] = (float)((double*)src)[j];
1330        }
1331    else
1332        CV_ERROR( CV_StsUnsupportedFormat, "Unsupported combination of input and output vectors" );
1333
1334    __END__;
1335}
1336
1337
1338void
1339cvWritebackLabels( const CvMat* labels, CvMat* dst_labels,
1340                   const CvMat* centers, CvMat* dst_centers,
1341                   const CvMat* probs, CvMat* dst_probs,
1342                   const CvMat* sample_idx, int samples_all,
1343                   const CvMat* comp_idx, int dims_all )
1344{
1345    CV_FUNCNAME( "cvWritebackLabels" );
1346
1347    __BEGIN__;
1348
1349    int samples_selected = samples_all, dims_selected = dims_all;
1350
1351    if( dst_labels && !CV_IS_MAT(dst_labels) )
1352        CV_ERROR( CV_StsBadArg, "Array of output labels is not a valid matrix" );
1353
1354    if( dst_centers )
1355        if( !ICV_IS_MAT_OF_TYPE(dst_centers, CV_32FC1) &&
1356            !ICV_IS_MAT_OF_TYPE(dst_centers, CV_64FC1) )
1357            CV_ERROR( CV_StsBadArg, "Array of cluster centers is not a valid matrix" );
1358
1359    if( dst_probs && !CV_IS_MAT(dst_probs) )
1360        CV_ERROR( CV_StsBadArg, "Probability matrix is not valid" );
1361
1362    if( sample_idx )
1363    {
1364        CV_ASSERT( sample_idx->rows == 1 && CV_MAT_TYPE(sample_idx->type) == CV_32SC1 );
1365        samples_selected = sample_idx->cols;
1366    }
1367
1368    if( comp_idx )
1369    {
1370        CV_ASSERT( comp_idx->rows == 1 && CV_MAT_TYPE(comp_idx->type) == CV_32SC1 );
1371        dims_selected = comp_idx->cols;
1372    }
1373
1374    if( dst_labels && (!labels || labels->data.ptr != dst_labels->data.ptr) )
1375    {
1376        if( !labels )
1377            CV_ERROR( CV_StsNullPtr, "NULL labels" );
1378
1379        CV_ASSERT( labels->rows == 1 );
1380
1381        if( dst_labels->rows != 1 && dst_labels->cols != 1 )
1382            CV_ERROR( CV_StsBadSize, "Array of output labels should be 1d vector" );
1383
1384        if( dst_labels->rows + dst_labels->cols - 1 != samples_all )
1385            CV_ERROR( CV_StsUnmatchedSizes,
1386            "Size of vector of output labels is not equal to the total number of input samples" );
1387
1388        CV_ASSERT( labels->cols == samples_selected );
1389
1390        CV_CALL( icvConvertDataToSparse( labels->data.ptr, labels->step, labels->type,
1391                        dst_labels->data.ptr, dst_labels->step, dst_labels->type,
1392                        cvSize( 1, samples_selected ), sample_idx ? sample_idx->data.i : 0 ));
1393    }
1394
1395    if( dst_centers && (!centers || centers->data.ptr != dst_centers->data.ptr) )
1396    {
1397        int i;
1398
1399        if( !centers )
1400            CV_ERROR( CV_StsNullPtr, "NULL centers" );
1401
1402        if( centers->rows != dst_centers->rows )
1403            CV_ERROR( CV_StsUnmatchedSizes, "Invalid number of rows in matrix of output centers" );
1404
1405        if( dst_centers->cols != dims_all )
1406            CV_ERROR( CV_StsUnmatchedSizes,
1407            "Number of columns in matrix of output centers is "
1408            "not equal to the total number of components in the input samples" );
1409
1410        CV_ASSERT( centers->cols == dims_selected );
1411
1412        for( i = 0; i < centers->rows; i++ )
1413            CV_CALL( icvConvertDataToSparse( centers->data.ptr + i*centers->step, 0, centers->type,
1414                        dst_centers->data.ptr + i*dst_centers->step, 0, dst_centers->type,
1415                        cvSize( 1, dims_selected ), comp_idx ? comp_idx->data.i : 0 ));
1416    }
1417
1418    if( dst_probs && (!probs || probs->data.ptr != dst_probs->data.ptr) )
1419    {
1420        if( !probs )
1421            CV_ERROR( CV_StsNullPtr, "NULL probs" );
1422
1423        if( probs->cols != dst_probs->cols )
1424            CV_ERROR( CV_StsUnmatchedSizes, "Invalid number of columns in output probability matrix" );
1425
1426        if( dst_probs->rows != samples_all )
1427            CV_ERROR( CV_StsUnmatchedSizes,
1428            "Number of rows in output probability matrix is "
1429            "not equal to the total number of input samples" );
1430
1431        CV_ASSERT( probs->rows == samples_selected );
1432
1433        CV_CALL( icvConvertDataToSparse( probs->data.ptr, probs->step, probs->type,
1434                        dst_probs->data.ptr, dst_probs->step, dst_probs->type,
1435                        cvSize( probs->cols, samples_selected ),
1436                        sample_idx ? sample_idx->data.i : 0 ));
1437    }
1438
1439    __END__;
1440}
1441
1442#if 0
1443CV_IMPL void
1444cvStatModelMultiPredict( const CvStatModel* stat_model,
1445                         const CvArr* predict_input,
1446                         int flags, CvMat* predict_output,
1447                         CvMat* probs, const CvMat* sample_idx )
1448{
1449    CvMemStorage* storage = 0;
1450    CvMat* sample_idx_buffer = 0;
1451    CvSparseMat** sparse_rows = 0;
1452    int samples_selected = 0;
1453
1454    CV_FUNCNAME( "cvStatModelMultiPredict" );
1455
1456    __BEGIN__;
1457
1458    int i;
1459    int predict_output_step = 1, sample_idx_step = 1;
1460    int type;
1461    int d, sizes[CV_MAX_DIM];
1462    int tflag = flags == CV_COL_SAMPLE;
1463    int samples_all, dims_all;
1464    int is_sparse = CV_IS_SPARSE_MAT(predict_input);
1465    CvMat predict_input_part;
1466    CvArr* sample = &predict_input_part;
1467    CvMat probs_part;
1468    CvMat* probs1 = probs ? &probs_part : 0;
1469
1470    if( !CV_IS_STAT_MODEL(stat_model) )
1471        CV_ERROR( !stat_model ? CV_StsNullPtr : CV_StsBadArg, "Invalid statistical model" );
1472
1473    if( !stat_model->predict )
1474        CV_ERROR( CV_StsNotImplemented, "There is no \"predict\" method" );
1475
1476    if( !predict_input || !predict_output )
1477        CV_ERROR( CV_StsNullPtr, "NULL input or output matrices" );
1478
1479    if( !is_sparse && !CV_IS_MAT(predict_input) )
1480        CV_ERROR( CV_StsBadArg, "predict_input should be a matrix or a sparse matrix" );
1481
1482    if( !CV_IS_MAT(predict_output) )
1483        CV_ERROR( CV_StsBadArg, "predict_output should be a matrix" );
1484
1485    type = cvGetElemType( predict_input );
1486    if( type != CV_32FC1 ||
1487        (CV_MAT_TYPE(predict_output->type) != CV_32FC1 &&
1488         CV_MAT_TYPE(predict_output->type) != CV_32SC1 ))
1489         CV_ERROR( CV_StsUnsupportedFormat, "The input or output matrix has unsupported format" );
1490
1491    CV_CALL( d = cvGetDims( predict_input, sizes ));
1492    if( d > 2 )
1493        CV_ERROR( CV_StsBadSize, "The input matrix should be 1- or 2-dimensional" );
1494
1495    if( !tflag )
1496    {
1497        samples_all = samples_selected = sizes[0];
1498        dims_all = sizes[1];
1499    }
1500    else
1501    {
1502        samples_all = samples_selected = sizes[1];
1503        dims_all = sizes[0];
1504    }
1505
1506    if( sample_idx )
1507    {
1508        if( !CV_IS_MAT(sample_idx) )
1509            CV_ERROR( CV_StsBadArg, "Invalid sample_idx matrix" );
1510
1511        if( sample_idx->cols != 1 && sample_idx->rows != 1 )
1512            CV_ERROR( CV_StsBadSize, "sample_idx must be 1-dimensional matrix" );
1513
1514        samples_selected = sample_idx->rows + sample_idx->cols - 1;
1515
1516        if( CV_MAT_TYPE(sample_idx->type) == CV_32SC1 )
1517        {
1518            if( samples_selected > samples_all )
1519                CV_ERROR( CV_StsBadSize, "sample_idx is too large vector" );
1520        }
1521        else if( samples_selected != samples_all )
1522            CV_ERROR( CV_StsUnmatchedSizes, "sample_idx has incorrect size" );
1523
1524        sample_idx_step = sample_idx->step ?
1525            sample_idx->step / CV_ELEM_SIZE(sample_idx->type) : 1;
1526    }
1527
1528    if( predict_output->rows != 1 && predict_output->cols != 1 )
1529        CV_ERROR( CV_StsBadSize, "predict_output should be a 1-dimensional matrix" );
1530
1531    if( predict_output->rows + predict_output->cols - 1 != samples_all )
1532        CV_ERROR( CV_StsUnmatchedSizes, "predict_output and predict_input have uncoordinated sizes" );
1533
1534    predict_output_step = predict_output->step ?
1535        predict_output->step / CV_ELEM_SIZE(predict_output->type) : 1;
1536
1537    if( probs )
1538    {
1539        if( !CV_IS_MAT(probs) )
1540            CV_ERROR( CV_StsBadArg, "Invalid matrix of probabilities" );
1541
1542        if( probs->rows != samples_all )
1543            CV_ERROR( CV_StsUnmatchedSizes,
1544            "matrix of probabilities must have as many rows as the total number of samples" );
1545
1546        if( CV_MAT_TYPE(probs->type) != CV_32FC1 )
1547            CV_ERROR( CV_StsUnsupportedFormat, "matrix of probabilities must have 32fC1 type" );
1548    }
1549
1550    if( is_sparse )
1551    {
1552        CvSparseNode* node;
1553        CvSparseMatIterator mat_iterator;
1554        CvSparseMat* sparse = (CvSparseMat*)predict_input;
1555
1556        if( sample_idx && CV_MAT_TYPE(sample_idx->type) == CV_32SC1 )
1557        {
1558            CV_CALL( sample_idx_buffer = cvCreateMat( 1, samples_all, CV_8UC1 ));
1559            cvZero( sample_idx_buffer );
1560            for( i = 0; i < samples_selected; i++ )
1561                sample_idx_buffer->data.ptr[sample_idx->data.i[i*sample_idx_step]] = 1;
1562            samples_selected = samples_all;
1563            sample_idx = sample_idx_buffer;
1564            sample_idx_step = 1;
1565        }
1566
1567        CV_CALL( sparse_rows = (CvSparseMat**)cvAlloc( samples_selected*sizeof(sparse_rows[0])));
1568        for( i = 0; i < samples_selected; i++ )
1569        {
1570            if( sample_idx && sample_idx->data.ptr[i*sample_idx_step] == 0 )
1571                continue;
1572            CV_CALL( sparse_rows[i] = cvCreateSparseMat( 1, &dims_all, type ));
1573            if( !storage )
1574                storage = sparse_rows[i]->heap->storage;
1575            else
1576            {
1577                // hack: to decrease memory footprint, make all the sparse matrices
1578                // reside in the same storage
1579                int elem_size = sparse_rows[i]->heap->elem_size;
1580                cvReleaseMemStorage( &sparse_rows[i]->heap->storage );
1581                sparse_rows[i]->heap = cvCreateSet( 0, sizeof(CvSet), elem_size, storage );
1582            }
1583        }
1584
1585        // put each row (or column) of predict_input into separate sparse matrix.
1586        node = cvInitSparseMatIterator( sparse, &mat_iterator );
1587        for( ; node != 0; node = cvGetNextSparseNode( &mat_iterator ))
1588        {
1589            int* idx = CV_NODE_IDX( sparse, node );
1590            int idx0 = idx[tflag ^ 1];
1591            int idx1 = idx[tflag];
1592
1593            if( sample_idx && sample_idx->data.ptr[idx0*sample_idx_step] == 0 )
1594                continue;
1595
1596            assert( sparse_rows[idx0] != 0 );
1597            *(float*)cvPtrND( sparse, &idx1, 0, 1, 0 ) = *(float*)CV_NODE_VAL( sparse, node );
1598        }
1599    }
1600
1601    for( i = 0; i < samples_selected; i++ )
1602    {
1603        int idx = i;
1604        float response;
1605
1606        if( sample_idx )
1607        {
1608            if( CV_MAT_TYPE(sample_idx->type) == CV_32SC1 )
1609            {
1610                idx = sample_idx->data.i[i*sample_idx_step];
1611                if( (unsigned)idx >= (unsigned)samples_all )
1612                    CV_ERROR( CV_StsOutOfRange, "Some of sample_idx elements are out of range" );
1613            }
1614            else if( CV_MAT_TYPE(sample_idx->type) == CV_8UC1 &&
1615                     sample_idx->data.ptr[i*sample_idx_step] == 0 )
1616                continue;
1617        }
1618
1619        if( !is_sparse )
1620        {
1621            if( !tflag )
1622                cvGetRow( predict_input, &predict_input_part, idx );
1623            else
1624            {
1625                cvGetCol( predict_input, &predict_input_part, idx );
1626            }
1627        }
1628        else
1629            sample = sparse_rows[idx];
1630
1631        if( probs )
1632            cvGetRow( probs, probs1, idx );
1633
1634        CV_CALL( response = stat_model->predict( stat_model, (CvMat*)sample, probs1 ));
1635
1636        if( CV_MAT_TYPE(predict_output->type) == CV_32FC1 )
1637            predict_output->data.fl[idx*predict_output_step] = response;
1638        else
1639        {
1640            CV_ASSERT( cvRound(response) == response );
1641            predict_output->data.i[idx*predict_output_step] = cvRound(response);
1642        }
1643    }
1644
1645    __END__;
1646
1647    if( sparse_rows )
1648    {
1649        int i;
1650        for( i = 0; i < samples_selected; i++ )
1651            if( sparse_rows[i] )
1652            {
1653                sparse_rows[i]->heap->storage = 0;
1654                cvReleaseSparseMat( &sparse_rows[i] );
1655            }
1656        cvFree( &sparse_rows );
1657    }
1658
1659    cvReleaseMat( &sample_idx_buffer );
1660    cvReleaseMemStorage( &storage );
1661}
1662#endif
1663
1664// By P. Yarykin - begin -
1665
1666void cvCombineResponseMaps (CvMat*  _responses,
1667                      const CvMat*  old_response_map,
1668                            CvMat*  new_response_map,
1669                            CvMat** out_response_map)
1670{
1671    int** old_data = NULL;
1672    int** new_data = NULL;
1673
1674        CV_FUNCNAME ("cvCombineResponseMaps");
1675        __BEGIN__
1676
1677    int i,j;
1678    int old_n, new_n, out_n;
1679    int samples, free_response;
1680    int* first;
1681    int* responses;
1682    int* out_data;
1683
1684    if( out_response_map )
1685        *out_response_map = 0;
1686
1687// Check input data.
1688    if ((!ICV_IS_MAT_OF_TYPE (_responses, CV_32SC1)) ||
1689        (!ICV_IS_MAT_OF_TYPE (old_response_map, CV_32SC1)) ||
1690        (!ICV_IS_MAT_OF_TYPE (new_response_map, CV_32SC1)))
1691    {
1692        CV_ERROR (CV_StsBadArg, "Some of input arguments is not the CvMat")
1693    }
1694
1695// Prepare sorted responses.
1696    first = new_response_map->data.i;
1697    new_n = new_response_map->cols;
1698    CV_CALL (new_data = (int**)cvAlloc (new_n * sizeof (new_data[0])));
1699    for (i = 0; i < new_n; i++)
1700        new_data[i] = first + i;
1701    qsort (new_data, new_n, sizeof(int*), icvCmpIntegersPtr);
1702
1703    first = old_response_map->data.i;
1704    old_n = old_response_map->cols;
1705    CV_CALL (old_data = (int**)cvAlloc (old_n * sizeof (old_data[0])));
1706    for (i = 0; i < old_n; i++)
1707        old_data[i] = first + i;
1708    qsort (old_data, old_n, sizeof(int*), icvCmpIntegersPtr);
1709
1710// Count the number of different responses.
1711    for (i = 0, j = 0, out_n = 0; i < old_n && j < new_n; out_n++)
1712    {
1713        if (*old_data[i] == *new_data[j])
1714        {
1715            i++;
1716            j++;
1717        }
1718        else if (*old_data[i] < *new_data[j])
1719            i++;
1720        else
1721            j++;
1722    }
1723    out_n += old_n - i + new_n - j;
1724
1725// Create and fill the result response maps.
1726    CV_CALL (*out_response_map = cvCreateMat (1, out_n, CV_32SC1));
1727    out_data = (*out_response_map)->data.i;
1728    memcpy (out_data, first, old_n * sizeof (int));
1729
1730    free_response = old_n;
1731    for (i = 0, j = 0; i < old_n && j < new_n; )
1732    {
1733        if (*old_data[i] == *new_data[j])
1734        {
1735            *new_data[j] = (int)(old_data[i] - first);
1736            i++;
1737            j++;
1738        }
1739        else if (*old_data[i] < *new_data[j])
1740            i++;
1741        else
1742        {
1743            out_data[free_response] = *new_data[j];
1744            *new_data[j] = free_response++;
1745            j++;
1746        }
1747    }
1748    for (; j < new_n; j++)
1749    {
1750        out_data[free_response] = *new_data[j];
1751        *new_data[j] = free_response++;
1752    }
1753    CV_ASSERT (free_response == out_n);
1754
1755// Change <responses> according to out response map.
1756    samples = _responses->cols + _responses->rows - 1;
1757    responses = _responses->data.i;
1758    first = new_response_map->data.i;
1759    for (i = 0; i < samples; i++)
1760    {
1761        responses[i] = first[responses[i]];
1762    }
1763
1764        __END__
1765
1766    cvFree(&old_data);
1767    cvFree(&new_data);
1768
1769}
1770
1771
1772static int icvGetNumberOfCluster( double* prob_vector, int num_of_clusters, float r,
1773                           float outlier_thresh, int normalize_probs )
1774{
1775    int max_prob_loc = 0;
1776
1777    CV_FUNCNAME("icvGetNumberOfCluster");
1778    __BEGIN__;
1779
1780    double prob, maxprob, sum;
1781    int i;
1782
1783    CV_ASSERT(prob_vector);
1784    CV_ASSERT(num_of_clusters >= 0);
1785
1786    maxprob = prob_vector[0];
1787    max_prob_loc = 0;
1788    sum = maxprob;
1789    for( i = 1; i < num_of_clusters; i++ )
1790    {
1791        prob = prob_vector[i];
1792        sum += prob;
1793        if( prob > maxprob )
1794        {
1795            max_prob_loc = i;
1796            maxprob = prob;
1797        }
1798    }
1799    if( normalize_probs && fabs(sum - 1.) > FLT_EPSILON )
1800    {
1801        for( i = 0; i < num_of_clusters; i++ )
1802            prob_vector[i] /= sum;
1803    }
1804    if( fabs(r - 1.) > FLT_EPSILON && fabs(sum - 1.) < outlier_thresh )
1805        max_prob_loc = -1;
1806
1807    __END__;
1808
1809    return max_prob_loc;
1810
1811} // End of icvGetNumberOfCluster
1812
1813
1814void icvFindClusterLabels( const CvMat* probs, float outlier_thresh, float r,
1815                          const CvMat* labels )
1816{
1817    CvMat* counts = 0;
1818
1819    CV_FUNCNAME("icvFindClusterLabels");
1820    __BEGIN__;
1821
1822    int nclusters, nsamples;
1823    int i, j;
1824    double* probs_data;
1825
1826    CV_ASSERT( ICV_IS_MAT_OF_TYPE(probs, CV_64FC1) );
1827    CV_ASSERT( ICV_IS_MAT_OF_TYPE(labels, CV_32SC1) );
1828
1829    nclusters = probs->cols;
1830    nsamples  = probs->rows;
1831    CV_ASSERT( nsamples == labels->cols );
1832
1833    CV_CALL( counts = cvCreateMat( 1, nclusters + 1, CV_32SC1 ) );
1834    CV_CALL( cvSetZero( counts ));
1835    for( i = 0; i < nsamples; i++ )
1836    {
1837        labels->data.i[i] = icvGetNumberOfCluster( probs->data.db + i*probs->cols,
1838            nclusters, r, outlier_thresh, 1 );
1839        counts->data.i[labels->data.i[i] + 1]++;
1840    }
1841    CV_ASSERT((int)cvSum(counts).val[0] == nsamples);
1842    // Filling empty clusters with the vector, that has the maximal probability
1843    for( j = 0; j < nclusters; j++ ) // outliers are ignored
1844    {
1845        int maxprob_loc = -1;
1846        double maxprob = 0;
1847
1848        if( counts->data.i[j+1] ) // j-th class is not empty
1849            continue;
1850        // look for the presentative, which is not lonely in it's cluster
1851        // and that has a maximal probability among all these vectors
1852        probs_data = probs->data.db;
1853        for( i = 0; i < nsamples; i++, probs_data++ )
1854        {
1855            int label = labels->data.i[i];
1856            double prob;
1857            if( counts->data.i[label+1] == 0 ||
1858                (counts->data.i[label+1] <= 1 && label != -1) )
1859                continue;
1860            prob = *probs_data;
1861            if( prob >= maxprob )
1862            {
1863                maxprob = prob;
1864                maxprob_loc = i;
1865            }
1866        }
1867        // maxprob_loc == 0 <=> number of vectors less then number of clusters
1868        CV_ASSERT( maxprob_loc >= 0 );
1869        counts->data.i[labels->data.i[maxprob_loc] + 1]--;
1870        labels->data.i[maxprob_loc] = j;
1871        counts->data.i[j + 1]++;
1872    }
1873
1874    __END__;
1875
1876    cvReleaseMat( &counts );
1877} // End of icvFindClusterLabels
1878
1879/* End of file */
1880