1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                        Intel License Agreement
11//
12// Copyright (C) 2000, Intel Corporation, all rights reserved.
13// Third party copyrights are property of their respective owners.
14//
15// Redistribution and use in source and binary forms, with or without modification,
16// are permitted provided that the following conditions are met:
17//
18//   * Redistribution's of source code must retain the above copyright notice,
19//     this list of conditions and the following disclaimer.
20//
21//   * Redistribution's in binary form must reproduce the above copyright notice,
22//     this list of conditions and the following disclaimer in the documentation
23//     and/or other materials provided with the distribution.
24//
25//   * The name of Intel Corporation may not be used to endorse or promote products
26//     derived from this software without specific prior written permission.
27//
28// This software is provided by the copyright holders and contributors "as is" and
29// any express or implied warranties, including, but not limited to, the implied
30// warranties of merchantability and fitness for a particular purpose are disclaimed.
31// In no event shall the Intel Corporation or contributors be liable for any direct,
32// indirect, incidental, special, exemplary, or consequential damages
33// (including, but not limited to, procurement of substitute goods or services;
34// loss of use, data, or profits; or business interruption) however caused
35// and on any theory of liability, whether in contract, strict liability,
36// or tort (including negligence or otherwise) arising in any way out of
37// the use of this software, even if advised of the possibility of such damage.
38//
39//M*/
40
41#include "_ml.h"
42
43
44CvStatModel::CvStatModel()
45{
46    default_model_name = "my_stat_model";
47}
48
49
50CvStatModel::~CvStatModel()
51{
52    clear();
53}
54
55
56void CvStatModel::clear()
57{
58}
59
60
61void CvStatModel::save( const char* filename, const char* name )
62{
63    CvFileStorage* fs = 0;
64
65    CV_FUNCNAME( "CvStatModel::save" );
66
67    __BEGIN__;
68
69    CV_CALL( fs = cvOpenFileStorage( filename, 0, CV_STORAGE_WRITE ));
70    if( !fs )
71        CV_ERROR( CV_StsError, "Could not open the file storage. Check the path and permissions" );
72
73    write( fs, name ? name : default_model_name );
74
75    __END__;
76
77    cvReleaseFileStorage( &fs );
78}
79
80
81void CvStatModel::load( const char* filename, const char* name )
82{
83    CvFileStorage* fs = 0;
84
85    CV_FUNCNAME( "CvStatModel::load" );
86
87    __BEGIN__;
88
89    CvFileNode* model_node = 0;
90
91    CV_CALL( fs = cvOpenFileStorage( filename, 0, CV_STORAGE_READ ));
92    if( !fs )
93        EXIT;
94
95    if( name )
96        model_node = cvGetFileNodeByName( fs, 0, name );
97    else
98    {
99        CvFileNode* root = cvGetRootFileNode( fs );
100        if( root->data.seq->total > 0 )
101            model_node = (CvFileNode*)cvGetSeqElem( root->data.seq, 0 );
102    }
103
104    read( fs, model_node );
105
106    __END__;
107
108    cvReleaseFileStorage( &fs );
109}
110
111
112void CvStatModel::write( CvFileStorage*, const char* )
113{
114    OPENCV_ERROR( CV_StsNotImplemented, "CvStatModel::write", "" );
115}
116
117
118void CvStatModel::read( CvFileStorage*, CvFileNode* )
119{
120    OPENCV_ERROR( CV_StsNotImplemented, "CvStatModel::read", "" );
121}
122
123
124/* Calculates upper triangular matrix S, where A is a symmetrical matrix A=S'*S */
125CV_IMPL void cvChol( CvMat* A, CvMat* S )
126{
127    int dim = A->rows;
128
129    int i, j, k;
130    float sum;
131
132    for( i = 0; i < dim; i++ )
133    {
134        for( j = 0; j < i; j++ )
135            CV_MAT_ELEM(*S, float, i, j) = 0;
136
137        sum = 0;
138        for( k = 0; k < i; k++ )
139            sum += CV_MAT_ELEM(*S, float, k, i) * CV_MAT_ELEM(*S, float, k, i);
140
141        CV_MAT_ELEM(*S, float, i, i) = (float)sqrt(CV_MAT_ELEM(*A, float, i, i) - sum);
142
143        for( j = i + 1; j < dim; j++ )
144        {
145            sum = 0;
146            for( k = 0; k < i; k++ )
147                sum += CV_MAT_ELEM(*S, float, k, i) * CV_MAT_ELEM(*S, float, k, j);
148
149            CV_MAT_ELEM(*S, float, i, j) =
150                (CV_MAT_ELEM(*A, float, i, j) - sum) / CV_MAT_ELEM(*S, float, i, i);
151
152        }
153    }
154}
155
156/* Generates <sample> from multivariate normal distribution, where <mean> - is an
157   average row vector, <cov> - symmetric covariation matrix */
158CV_IMPL void cvRandMVNormal( CvMat* mean, CvMat* cov, CvMat* sample, CvRNG* rng )
159{
160    int dim = sample->cols;
161    int amount = sample->rows;
162
163    CvRNG state = rng ? *rng : cvRNG(time(0));
164    cvRandArr(&state, sample, CV_RAND_NORMAL, cvScalarAll(0), cvScalarAll(1) );
165
166    CvMat* utmat = cvCreateMat(dim, dim, sample->type);
167    CvMat* vect = cvCreateMatHeader(1, dim, sample->type);
168
169    cvChol(cov, utmat);
170
171    int i;
172    for( i = 0; i < amount; i++ )
173    {
174        cvGetRow(sample, vect, i);
175        cvMatMulAdd(vect, utmat, mean, vect);
176    }
177
178    cvReleaseMat(&vect);
179    cvReleaseMat(&utmat);
180}
181
182
183/* Generates <sample> of <amount> points from a discrete variate xi,
184   where Pr{xi = k} == probs[k], 0 < k < len - 1. */
185CV_IMPL void cvRandSeries( float probs[], int len, int sample[], int amount )
186{
187    CvMat* univals = cvCreateMat(1, amount, CV_32FC1);
188    float* knots = (float*)cvAlloc( len * sizeof(float) );
189
190    int i, j;
191
192    CvRNG state = cvRNG(-1);
193    cvRandArr(&state, univals, CV_RAND_UNI, cvScalarAll(0), cvScalarAll(1) );
194
195    knots[0] = probs[0];
196    for( i = 1; i < len; i++ )
197        knots[i] = knots[i - 1] + probs[i];
198
199    for( i = 0; i < amount; i++ )
200        for( j = 0; j < len; j++ )
201        {
202            if ( CV_MAT_ELEM(*univals, float, 0, i) <= knots[j] )
203            {
204                sample[i] = j;
205                break;
206            }
207        }
208
209    cvFree(&knots);
210}
211
212/* Generates <sample> from gaussian mixture distribution */
213CV_IMPL void cvRandGaussMixture( CvMat* means[],
214                                 CvMat* covs[],
215                                 float weights[],
216                                 int clsnum,
217                                 CvMat* sample,
218                                 CvMat* sampClasses )
219{
220    int dim = sample->cols;
221    int amount = sample->rows;
222
223    int i, clss;
224
225    int* sample_clsnum = (int*)cvAlloc( amount * sizeof(int) );
226    CvMat** utmats = (CvMat**)cvAlloc( clsnum * sizeof(CvMat*) );
227    CvMat* vect = cvCreateMatHeader(1, dim, CV_32FC1);
228
229    CvMat* classes;
230    if( sampClasses )
231        classes = sampClasses;
232    else
233        classes = cvCreateMat(1, amount, CV_32FC1);
234
235    CvRNG state = cvRNG(-1);
236    cvRandArr(&state, sample, CV_RAND_NORMAL, cvScalarAll(0), cvScalarAll(1));
237
238    cvRandSeries(weights, clsnum, sample_clsnum, amount);
239
240    for( i = 0; i < clsnum; i++ )
241    {
242        utmats[i] = cvCreateMat(dim, dim, CV_32FC1);
243        cvChol(covs[i], utmats[i]);
244    }
245
246    for( i = 0; i < amount; i++ )
247    {
248        CV_MAT_ELEM(*classes, float, 0, i) = (float)sample_clsnum[i];
249        cvGetRow(sample, vect, i);
250        clss = sample_clsnum[i];
251        cvMatMulAdd(vect, utmats[clss], means[clss], vect);
252    }
253
254    if( !sampClasses )
255        cvReleaseMat(&classes);
256    for( i = 0; i < clsnum; i++ )
257        cvReleaseMat(&utmats[i]);
258    cvFree(&utmats);
259    cvFree(&sample_clsnum);
260    cvReleaseMat(&vect);
261}
262
263
264CvMat* icvGenerateRandomClusterCenters ( int seed, const CvMat* data,
265                                         int num_of_clusters, CvMat* _centers )
266{
267    CvMat* centers = _centers;
268
269    CV_FUNCNAME("icvGenerateRandomClusterCenters");
270    __BEGIN__;
271
272    CvRNG rng;
273    CvMat data_comp, centers_comp;
274    CvPoint minLoc, maxLoc; // Not used, just for function "cvMinMaxLoc"
275    double minVal, maxVal;
276    int i;
277    int dim = data ? data->cols : 0;
278
279    if( ICV_IS_MAT_OF_TYPE(data, CV_32FC1) )
280    {
281        if( _centers && !ICV_IS_MAT_OF_TYPE (_centers, CV_32FC1) )
282        {
283            CV_ERROR(CV_StsBadArg,"");
284        }
285        else if( !_centers )
286            CV_CALL(centers = cvCreateMat (num_of_clusters, dim, CV_32FC1));
287    }
288    else if( ICV_IS_MAT_OF_TYPE(data, CV_64FC1) )
289    {
290        if( _centers && !ICV_IS_MAT_OF_TYPE (_centers, CV_64FC1) )
291        {
292            CV_ERROR(CV_StsBadArg,"");
293        }
294        else if( !_centers )
295            CV_CALL(centers = cvCreateMat (num_of_clusters, dim, CV_64FC1));
296    }
297    else
298        CV_ERROR (CV_StsBadArg,"");
299
300    if( num_of_clusters < 1 )
301        CV_ERROR (CV_StsBadArg,"");
302
303    rng = cvRNG(seed);
304    for (i = 0; i < dim; i++)
305    {
306        CV_CALL(cvGetCol (data, &data_comp, i));
307        CV_CALL(cvMinMaxLoc (&data_comp, &minVal, &maxVal, &minLoc, &maxLoc));
308        CV_CALL(cvGetCol (centers, &centers_comp, i));
309        CV_CALL(cvRandArr (&rng, &centers_comp, CV_RAND_UNI, cvScalarAll(minVal), cvScalarAll(maxVal)));
310    }
311
312    __END__;
313
314    if( (cvGetErrStatus () < 0) || (centers != _centers) )
315        cvReleaseMat (&centers);
316
317    return _centers ? _centers : centers;
318} // end of icvGenerateRandomClusterCenters
319
320// By S. Dilman - begin -
321
322#define ICV_RAND_MAX    4294967296 // == 2^32
323
324CV_IMPL void cvRandRoundUni (CvMat* center,
325                             float radius_small,
326                             float radius_large,
327                             CvMat* desired_matrix,
328                             CvRNG* rng_state_ptr)
329{
330    float rad, norm, coefficient;
331    int dim, size, i, j;
332    CvMat *cov, sample;
333    CvRNG rng_local;
334
335    CV_FUNCNAME("cvRandRoundUni");
336    __BEGIN__
337
338    rng_local = *rng_state_ptr;
339
340    CV_ASSERT ((radius_small >= 0) &&
341               (radius_large > 0) &&
342               (radius_small <= radius_large));
343    CV_ASSERT (center && desired_matrix && rng_state_ptr);
344    CV_ASSERT (center->rows == 1);
345    CV_ASSERT (center->cols == desired_matrix->cols);
346
347    dim = desired_matrix->cols;
348    size = desired_matrix->rows;
349    cov = cvCreateMat (dim, dim, CV_32FC1);
350    cvSetIdentity (cov);
351    cvRandMVNormal (center, cov, desired_matrix, &rng_local);
352
353    for (i = 0; i < size; i++)
354    {
355        rad = (float)(cvRandReal(&rng_local)*(radius_large - radius_small) + radius_small);
356        cvGetRow (desired_matrix, &sample, i);
357        norm = (float) cvNorm (&sample, 0, CV_L2);
358        coefficient = rad / norm;
359        for (j = 0; j < dim; j++)
360             CV_MAT_ELEM (sample, float, 0, j) *= coefficient;
361    }
362
363    __END__
364
365}
366
367// By S. Dilman - end -
368
369/* Aij <- Aji for i > j if lower_to_upper != 0
370              for i < j if lower_to_upper = 0 */
371void cvCompleteSymm( CvMat* matrix, int lower_to_upper )
372{
373    CV_FUNCNAME("cvCompleteSymm");
374
375    __BEGIN__;
376
377    int rows, cols;
378    int i, j;
379    int step;
380
381    if( !CV_IS_MAT(matrix))
382        CV_ERROR(CV_StsBadArg, "Invalid matrix argument");
383
384    rows = matrix->rows;
385    cols = matrix->cols;
386    step = matrix->step / CV_ELEM_SIZE(matrix->type);
387
388    switch(CV_MAT_TYPE(matrix->type))
389    {
390    case CV_32FC1:
391        {
392        float* dst = matrix->data.fl;
393        if( !lower_to_upper )
394            for( i = 1; i < rows; i++ )
395            {
396                const float* src = (const float*)(matrix->data.fl + i);
397                dst += step;
398                for( j = 0; j < i; j++, src += step )
399                    dst[j] = src[0];
400            }
401        else
402            for( i = 0; i < rows-1; i++, dst += step )
403            {
404                const float* src = (const float*)(matrix->data.fl + (i+1)*step + i);
405                for( j = i+1; j < cols; j++, src += step )
406                    dst[j] = src[0];
407            }
408        }
409        break;
410    case CV_64FC1:
411        {
412        double* dst = matrix->data.db;
413        if( !lower_to_upper )
414            for( i = 1; i < rows; i++ )
415            {
416                const double* src = (const double*)(matrix->data.db + i);
417                dst += step;
418                for( j = 0; j < i; j++, src += step )
419                    dst[j] = src[0];
420            }
421        else
422            for( i = 0; i < rows-1; i++, dst += step )
423            {
424                const double* src = (const double*)(matrix->data.db + (i+1)*step + i);
425                for( j = i+1; j < cols; j++, src += step )
426                    dst[j] = src[0];
427            }
428        }
429        break;
430    }
431
432    __END__;
433}
434
435
436static int CV_CDECL
437icvCmpIntegers( const void* a, const void* b )
438{
439    return *(const int*)a - *(const int*)b;
440}
441
442
443static int CV_CDECL
444icvCmpIntegersPtr( const void* _a, const void* _b )
445{
446    int a = **(const int**)_a;
447    int b = **(const int**)_b;
448    return (a < b ? -1 : 0)|(a > b);
449}
450
451
452static int icvCmpSparseVecElems( const void* a, const void* b )
453{
454    return ((CvSparseVecElem32f*)a)->idx - ((CvSparseVecElem32f*)b)->idx;
455}
456
457
458CvMat*
459cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates )
460{
461    CvMat* idx = 0;
462
463    CV_FUNCNAME( "cvPreprocessIndexArray" );
464
465    __BEGIN__;
466
467    int i, idx_total, idx_selected = 0, step, type, prev = INT_MIN, is_sorted = 1;
468    uchar* srcb = 0;
469    int* srci = 0;
470    int* dsti;
471
472    if( !CV_IS_MAT(idx_arr) )
473        CV_ERROR( CV_StsBadArg, "Invalid index array" );
474
475    if( idx_arr->rows != 1 && idx_arr->cols != 1 )
476        CV_ERROR( CV_StsBadSize, "the index array must be 1-dimensional" );
477
478    idx_total = idx_arr->rows + idx_arr->cols - 1;
479    srcb = idx_arr->data.ptr;
480    srci = idx_arr->data.i;
481
482    type = CV_MAT_TYPE(idx_arr->type);
483    step = CV_IS_MAT_CONT(idx_arr->type) ? 1 : idx_arr->step/CV_ELEM_SIZE(type);
484
485    switch( type )
486    {
487    case CV_8UC1:
488    case CV_8SC1:
489        // idx_arr is array of 1's and 0's -
490        // i.e. it is a mask of the selected components
491        if( idx_total != data_arr_size )
492            CV_ERROR( CV_StsUnmatchedSizes,
493            "Component mask should contain as many elements as the total number of input variables" );
494
495        for( i = 0; i < idx_total; i++ )
496            idx_selected += srcb[i*step] != 0;
497
498        if( idx_selected == 0 )
499            CV_ERROR( CV_StsOutOfRange, "No components/input_variables is selected!" );
500
501        if( idx_selected == idx_total )
502            EXIT;
503        break;
504    case CV_32SC1:
505        // idx_arr is array of integer indices of selected components
506        if( idx_total > data_arr_size )
507            CV_ERROR( CV_StsOutOfRange,
508            "index array may not contain more elements than the total number of input variables" );
509        idx_selected = idx_total;
510        // check if sorted already
511        for( i = 0; i < idx_total; i++ )
512        {
513            int val = srci[i*step];
514            if( val >= prev )
515            {
516                is_sorted = 0;
517                break;
518            }
519            prev = val;
520        }
521        break;
522    default:
523        CV_ERROR( CV_StsUnsupportedFormat, "Unsupported index array data type "
524                                           "(it should be 8uC1, 8sC1 or 32sC1)" );
525    }
526
527    CV_CALL( idx = cvCreateMat( 1, idx_selected, CV_32SC1 ));
528    dsti = idx->data.i;
529
530    if( type < CV_32SC1 )
531    {
532        for( i = 0; i < idx_total; i++ )
533            if( srcb[i*step] )
534                *dsti++ = i;
535    }
536    else
537    {
538        for( i = 0; i < idx_total; i++ )
539            dsti[i] = srci[i*step];
540
541        if( !is_sorted )
542            qsort( dsti, idx_total, sizeof(dsti[0]), icvCmpIntegers );
543
544        if( dsti[0] < 0 || dsti[idx_total-1] >= data_arr_size )
545            CV_ERROR( CV_StsOutOfRange, "the index array elements are out of range" );
546
547        if( check_for_duplicates )
548        {
549            for( i = 1; i < idx_total; i++ )
550                if( dsti[i] <= dsti[i-1] )
551                    CV_ERROR( CV_StsBadArg, "There are duplicated index array elements" );
552        }
553    }
554
555    __END__;
556
557    if( cvGetErrStatus() < 0 )
558        cvReleaseMat( &idx );
559
560    return idx;
561}
562
563
564CvMat*
565cvPreprocessVarType( const CvMat* var_type, const CvMat* var_idx,
566                     int var_all, int* response_type )
567{
568    CvMat* out_var_type = 0;
569    CV_FUNCNAME( "cvPreprocessVarType" );
570
571    if( response_type )
572        *response_type = -1;
573
574    __BEGIN__;
575
576    int i, tm_size, tm_step;
577    int* map = 0;
578    const uchar* src;
579    uchar* dst;
580    int var_count = var_all;
581
582    if( !CV_IS_MAT(var_type) )
583        CV_ERROR( var_type ? CV_StsBadArg : CV_StsNullPtr, "Invalid or absent var_type array" );
584
585    if( var_type->rows != 1 && var_type->cols != 1 )
586        CV_ERROR( CV_StsBadSize, "var_type array must be 1-dimensional" );
587
588    if( !CV_IS_MASK_ARR(var_type))
589        CV_ERROR( CV_StsUnsupportedFormat, "type mask must be 8uC1 or 8sC1 array" );
590
591    tm_size = var_type->rows + var_type->cols - 1;
592    tm_step = var_type->step ? var_type->step/CV_ELEM_SIZE(var_type->type) : 1;
593
594    if( /*tm_size != var_count &&*/ tm_size != var_count + 1 )
595        CV_ERROR( CV_StsBadArg,
596        "type mask must be of <input var count> + 1 size" );
597
598    if( response_type && tm_size > var_count )
599        *response_type = var_type->data.ptr[var_count*tm_step] != 0;
600
601    if( var_idx )
602    {
603        if( !CV_IS_MAT(var_idx) || CV_MAT_TYPE(var_idx->type) != CV_32SC1 ||
604            var_idx->rows != 1 && var_idx->cols != 1 || !CV_IS_MAT_CONT(var_idx->type) )
605            CV_ERROR( CV_StsBadArg, "var index array should be continuous 1-dimensional integer vector" );
606        if( var_idx->rows + var_idx->cols - 1 > var_count )
607            CV_ERROR( CV_StsBadSize, "var index array is too large" );
608        map = var_idx->data.i;
609        var_count = var_idx->rows + var_idx->cols - 1;
610    }
611
612    CV_CALL( out_var_type = cvCreateMat( 1, var_count, CV_8UC1 ));
613    src = var_type->data.ptr;
614    dst = out_var_type->data.ptr;
615
616    for( i = 0; i < var_count; i++ )
617    {
618        int idx = map ? map[i] : i;
619        assert( (unsigned)idx < (unsigned)tm_size );
620        dst[i] = (uchar)(src[idx*tm_step] != 0);
621    }
622
623    __END__;
624
625    return out_var_type;
626}
627
628
629CvMat*
630cvPreprocessOrderedResponses( const CvMat* responses, const CvMat* sample_idx, int sample_all )
631{
632    CvMat* out_responses = 0;
633
634    CV_FUNCNAME( "cvPreprocessOrderedResponses" );
635
636    __BEGIN__;
637
638    int i, r_type, r_step;
639    const int* map = 0;
640    float* dst;
641    int sample_count = sample_all;
642
643    if( !CV_IS_MAT(responses) )
644        CV_ERROR( CV_StsBadArg, "Invalid response array" );
645
646    if( responses->rows != 1 && responses->cols != 1 )
647        CV_ERROR( CV_StsBadSize, "Response array must be 1-dimensional" );
648
649    if( responses->rows + responses->cols - 1 != sample_count )
650        CV_ERROR( CV_StsUnmatchedSizes,
651        "Response array must contain as many elements as the total number of samples" );
652
653    r_type = CV_MAT_TYPE(responses->type);
654    if( r_type != CV_32FC1 && r_type != CV_32SC1 )
655        CV_ERROR( CV_StsUnsupportedFormat, "Unsupported response type" );
656
657    r_step = responses->step ? responses->step / CV_ELEM_SIZE(responses->type) : 1;
658
659    if( r_type == CV_32FC1 && CV_IS_MAT_CONT(responses->type) && !sample_idx )
660    {
661        out_responses = (CvMat*)responses;
662        EXIT;
663    }
664
665    if( sample_idx )
666    {
667        if( !CV_IS_MAT(sample_idx) || CV_MAT_TYPE(sample_idx->type) != CV_32SC1 ||
668            sample_idx->rows != 1 && sample_idx->cols != 1 || !CV_IS_MAT_CONT(sample_idx->type) )
669            CV_ERROR( CV_StsBadArg, "sample index array should be continuous 1-dimensional integer vector" );
670        if( sample_idx->rows + sample_idx->cols - 1 > sample_count )
671            CV_ERROR( CV_StsBadSize, "sample index array is too large" );
672        map = sample_idx->data.i;
673        sample_count = sample_idx->rows + sample_idx->cols - 1;
674    }
675
676    CV_CALL( out_responses = cvCreateMat( 1, sample_count, CV_32FC1 ));
677
678    dst = out_responses->data.fl;
679    if( r_type == CV_32FC1 )
680    {
681        const float* src = responses->data.fl;
682        for( i = 0; i < sample_count; i++ )
683        {
684            int idx = map ? map[i] : i;
685            assert( (unsigned)idx < (unsigned)sample_all );
686            dst[i] = src[idx*r_step];
687        }
688    }
689    else
690    {
691        const int* src = responses->data.i;
692        for( i = 0; i < sample_count; i++ )
693        {
694            int idx = map ? map[i] : i;
695            assert( (unsigned)idx < (unsigned)sample_all );
696            dst[i] = (float)src[idx*r_step];
697        }
698    }
699
700    __END__;
701
702    return out_responses;
703}
704
705CvMat*
706cvPreprocessCategoricalResponses( const CvMat* responses,
707    const CvMat* sample_idx, int sample_all,
708    CvMat** out_response_map, CvMat** class_counts )
709{
710    CvMat* out_responses = 0;
711    int** response_ptr = 0;
712
713    CV_FUNCNAME( "cvPreprocessCategoricalResponses" );
714
715    if( out_response_map )
716        *out_response_map = 0;
717
718    if( class_counts )
719        *class_counts = 0;
720
721    __BEGIN__;
722
723    int i, r_type, r_step;
724    int cls_count = 1, prev_cls, prev_i;
725    const int* map = 0;
726    const int* srci;
727    const float* srcfl;
728    int* dst;
729    int* cls_map;
730    int* cls_counts = 0;
731    int sample_count = sample_all;
732
733    if( !CV_IS_MAT(responses) )
734        CV_ERROR( CV_StsBadArg, "Invalid response array" );
735
736    if( responses->rows != 1 && responses->cols != 1 )
737        CV_ERROR( CV_StsBadSize, "Response array must be 1-dimensional" );
738
739    if( responses->rows + responses->cols - 1 != sample_count )
740        CV_ERROR( CV_StsUnmatchedSizes,
741        "Response array must contain as many elements as the total number of samples" );
742
743    r_type = CV_MAT_TYPE(responses->type);
744    if( r_type != CV_32FC1 && r_type != CV_32SC1 )
745        CV_ERROR( CV_StsUnsupportedFormat, "Unsupported response type" );
746
747    r_step = responses->step ? responses->step / CV_ELEM_SIZE(responses->type) : 1;
748
749    if( sample_idx )
750    {
751        if( !CV_IS_MAT(sample_idx) || CV_MAT_TYPE(sample_idx->type) != CV_32SC1 ||
752            sample_idx->rows != 1 && sample_idx->cols != 1 || !CV_IS_MAT_CONT(sample_idx->type) )
753            CV_ERROR( CV_StsBadArg, "sample index array should be continuous 1-dimensional integer vector" );
754        if( sample_idx->rows + sample_idx->cols - 1 > sample_count )
755            CV_ERROR( CV_StsBadSize, "sample index array is too large" );
756        map = sample_idx->data.i;
757        sample_count = sample_idx->rows + sample_idx->cols - 1;
758    }
759
760    CV_CALL( out_responses = cvCreateMat( 1, sample_count, CV_32SC1 ));
761
762    if( !out_response_map )
763        CV_ERROR( CV_StsNullPtr, "out_response_map pointer is NULL" );
764
765    CV_CALL( response_ptr = (int**)cvAlloc( sample_count*sizeof(response_ptr[0])));
766
767    srci = responses->data.i;
768    srcfl = responses->data.fl;
769    dst = out_responses->data.i;
770
771    for( i = 0; i < sample_count; i++ )
772    {
773        int idx = map ? map[i] : i;
774        assert( (unsigned)idx < (unsigned)sample_all );
775        if( r_type == CV_32SC1 )
776            dst[i] = srci[idx*r_step];
777        else
778        {
779            float rf = srcfl[idx*r_step];
780            int ri = cvRound(rf);
781            if( ri != rf )
782            {
783                char buf[100];
784                sprintf( buf, "response #%d is not integral", idx );
785                CV_ERROR( CV_StsBadArg, buf );
786            }
787            dst[i] = ri;
788        }
789        response_ptr[i] = dst + i;
790    }
791
792    qsort( response_ptr, sample_count, sizeof(int*), icvCmpIntegersPtr );
793
794    // count the classes
795    for( i = 1; i < sample_count; i++ )
796        cls_count += *response_ptr[i] != *response_ptr[i-1];
797
798    if( cls_count < 2 )
799        CV_ERROR( CV_StsBadArg, "There is only a single class" );
800
801    CV_CALL( *out_response_map = cvCreateMat( 1, cls_count, CV_32SC1 ));
802
803    if( class_counts )
804    {
805        CV_CALL( *class_counts = cvCreateMat( 1, cls_count, CV_32SC1 ));
806        cls_counts = (*class_counts)->data.i;
807    }
808
809    // compact the class indices and build the map
810    prev_cls = ~*response_ptr[0];
811    cls_count = -1;
812    cls_map = (*out_response_map)->data.i;
813
814    for( i = 0, prev_i = -1; i < sample_count; i++ )
815    {
816        int cur_cls = *response_ptr[i];
817        if( cur_cls != prev_cls )
818        {
819            if( cls_counts && cls_count >= 0 )
820                cls_counts[cls_count] = i - prev_i;
821            cls_map[++cls_count] = prev_cls = cur_cls;
822            prev_i = i;
823        }
824        *response_ptr[i] = cls_count;
825    }
826
827    if( cls_counts )
828        cls_counts[cls_count] = i - prev_i;
829
830    __END__;
831
832    cvFree( &response_ptr );
833
834    return out_responses;
835}
836
837
838const float**
839cvGetTrainSamples( const CvMat* train_data, int tflag,
840                   const CvMat* var_idx, const CvMat* sample_idx,
841                   int* _var_count, int* _sample_count,
842                   bool always_copy_data )
843{
844    float** samples = 0;
845
846    CV_FUNCNAME( "cvGetTrainSamples" );
847
848    __BEGIN__;
849
850    int i, j, var_count, sample_count, s_step, v_step;
851    bool copy_data;
852    const float* data;
853    const int *s_idx, *v_idx;
854
855    if( !CV_IS_MAT(train_data) )
856        CV_ERROR( CV_StsBadArg, "Invalid or NULL training data matrix" );
857
858    var_count = var_idx ? var_idx->cols + var_idx->rows - 1 :
859                tflag == CV_ROW_SAMPLE ? train_data->cols : train_data->rows;
860    sample_count = sample_idx ? sample_idx->cols + sample_idx->rows - 1 :
861                   tflag == CV_ROW_SAMPLE ? train_data->rows : train_data->cols;
862
863    if( _var_count )
864        *_var_count = var_count;
865
866    if( _sample_count )
867        *_sample_count = sample_count;
868
869    copy_data = tflag != CV_ROW_SAMPLE || var_idx || always_copy_data;
870
871    CV_CALL( samples = (float**)cvAlloc(sample_count*sizeof(samples[0]) +
872                (copy_data ? 1 : 0)*var_count*sample_count*sizeof(samples[0][0])) );
873    data = train_data->data.fl;
874    s_step = train_data->step / sizeof(samples[0][0]);
875    v_step = 1;
876    s_idx = sample_idx ? sample_idx->data.i : 0;
877    v_idx = var_idx ? var_idx->data.i : 0;
878
879    if( !copy_data )
880    {
881        for( i = 0; i < sample_count; i++ )
882            samples[i] = (float*)(data + (s_idx ? s_idx[i] : i)*s_step);
883    }
884    else
885    {
886        samples[0] = (float*)(samples + sample_count);
887        if( tflag != CV_ROW_SAMPLE )
888            CV_SWAP( s_step, v_step, i );
889
890        for( i = 0; i < sample_count; i++ )
891        {
892            float* dst = samples[i] = samples[0] + i*var_count;
893            const float* src = data + (s_idx ? s_idx[i] : i)*s_step;
894
895            if( !v_idx )
896                for( j = 0; j < var_count; j++ )
897                    dst[j] = src[j*v_step];
898            else
899                for( j = 0; j < var_count; j++ )
900                    dst[j] = src[v_idx[j]*v_step];
901        }
902    }
903
904    __END__;
905
906    return (const float**)samples;
907}
908
909
910void
911cvCheckTrainData( const CvMat* train_data, int tflag,
912                  const CvMat* missing_mask,
913                  int* var_all, int* sample_all )
914{
915    CV_FUNCNAME( "cvCheckTrainData" );
916
917    if( var_all )
918        *var_all = 0;
919
920    if( sample_all )
921        *sample_all = 0;
922
923    __BEGIN__;
924
925    // check parameter types and sizes
926    if( !CV_IS_MAT(train_data) || CV_MAT_TYPE(train_data->type) != CV_32FC1 )
927        CV_ERROR( CV_StsBadArg, "train data must be floating-point matrix" );
928
929    if( missing_mask )
930    {
931        if( !CV_IS_MAT(missing_mask) || !CV_IS_MASK_ARR(missing_mask) ||
932            !CV_ARE_SIZES_EQ(train_data, missing_mask) )
933            CV_ERROR( CV_StsBadArg,
934            "missing value mask must be 8-bit matrix of the same size as training data" );
935    }
936
937    if( tflag != CV_ROW_SAMPLE && tflag != CV_COL_SAMPLE )
938        CV_ERROR( CV_StsBadArg,
939        "Unknown training data layout (must be CV_ROW_SAMPLE or CV_COL_SAMPLE)" );
940
941    if( var_all )
942        *var_all = tflag == CV_ROW_SAMPLE ? train_data->cols : train_data->rows;
943
944    if( sample_all )
945        *sample_all = tflag == CV_ROW_SAMPLE ? train_data->rows : train_data->cols;
946
947    __END__;
948}
949
950
951int
952cvPrepareTrainData( const char* /*funcname*/,
953                    const CvMat* train_data, int tflag,
954                    const CvMat* responses, int response_type,
955                    const CvMat* var_idx,
956                    const CvMat* sample_idx,
957                    bool always_copy_data,
958                    const float*** out_train_samples,
959                    int* _sample_count,
960                    int* _var_count,
961                    int* _var_all,
962                    CvMat** out_responses,
963                    CvMat** out_response_map,
964                    CvMat** out_var_idx,
965                    CvMat** out_sample_idx )
966{
967    int ok = 0;
968    CvMat* _var_idx = 0;
969    CvMat* _sample_idx = 0;
970    CvMat* _responses = 0;
971    int sample_all = 0, sample_count = 0, var_all = 0, var_count = 0;
972
973    CV_FUNCNAME( "cvPrepareTrainData" );
974
975    // step 0. clear all the output pointers to ensure we do not try
976    // to call free() with uninitialized pointers
977    if( out_responses )
978        *out_responses = 0;
979
980    if( out_response_map )
981        *out_response_map = 0;
982
983    if( out_var_idx )
984        *out_var_idx = 0;
985
986    if( out_sample_idx )
987        *out_sample_idx = 0;
988
989    if( out_train_samples )
990        *out_train_samples = 0;
991
992    if( _sample_count )
993        *_sample_count = 0;
994
995    if( _var_count )
996        *_var_count = 0;
997
998    if( _var_all )
999        *_var_all = 0;
1000
1001    __BEGIN__;
1002
1003    if( !out_train_samples )
1004        CV_ERROR( CV_StsBadArg, "output pointer to train samples is NULL" );
1005
1006    CV_CALL( cvCheckTrainData( train_data, tflag, 0, &var_all, &sample_all ));
1007
1008    if( sample_idx )
1009        CV_CALL( _sample_idx = cvPreprocessIndexArray( sample_idx, sample_all ));
1010    if( var_idx )
1011        CV_CALL( _var_idx = cvPreprocessIndexArray( var_idx, var_all ));
1012
1013    if( responses )
1014    {
1015        if( !out_responses )
1016            CV_ERROR( CV_StsNullPtr, "output response pointer is NULL" );
1017
1018        if( response_type == CV_VAR_NUMERICAL )
1019        {
1020            CV_CALL( _responses = cvPreprocessOrderedResponses( responses,
1021                                                _sample_idx, sample_all ));
1022        }
1023        else
1024        {
1025            CV_CALL( _responses = cvPreprocessCategoricalResponses( responses,
1026                                _sample_idx, sample_all, out_response_map, 0 ));
1027        }
1028    }
1029
1030    CV_CALL( *out_train_samples =
1031                cvGetTrainSamples( train_data, tflag, _var_idx, _sample_idx,
1032                                   &var_count, &sample_count, always_copy_data ));
1033
1034    ok = 1;
1035
1036    __END__;
1037
1038    if( ok )
1039    {
1040        if( out_responses )
1041            *out_responses = _responses, _responses = 0;
1042
1043        if( out_var_idx )
1044            *out_var_idx = _var_idx, _var_idx = 0;
1045
1046        if( out_sample_idx )
1047            *out_sample_idx = _sample_idx, _sample_idx = 0;
1048
1049        if( _sample_count )
1050            *_sample_count = sample_count;
1051
1052        if( _var_count )
1053            *_var_count = var_count;
1054
1055        if( _var_all )
1056            *_var_all = var_all;
1057    }
1058    else
1059    {
1060        if( out_response_map )
1061            cvReleaseMat( out_response_map );
1062        cvFree( out_train_samples );
1063    }
1064
1065    if( _responses != responses )
1066        cvReleaseMat( &_responses );
1067    cvReleaseMat( &_var_idx );
1068    cvReleaseMat( &_sample_idx );
1069
1070    return ok;
1071}
1072
1073
1074typedef struct CvSampleResponsePair
1075{
1076    const float* sample;
1077    const uchar* mask;
1078    int response;
1079    int index;
1080}
1081CvSampleResponsePair;
1082
1083
1084static int
1085CV_CDECL icvCmpSampleResponsePairs( const void* a, const void* b )
1086{
1087    int ra = ((const CvSampleResponsePair*)a)->response;
1088    int rb = ((const CvSampleResponsePair*)b)->response;
1089    int ia = ((const CvSampleResponsePair*)a)->index;
1090    int ib = ((const CvSampleResponsePair*)b)->index;
1091
1092    return ra < rb ? -1 : ra > rb ? 1 : ia - ib;
1093    //return (ra > rb ? -1 : 0)|(ra < rb);
1094}
1095
1096
1097void
1098cvSortSamplesByClasses( const float** samples, const CvMat* classes,
1099                        int* class_ranges, const uchar** mask )
1100{
1101    CvSampleResponsePair* pairs = 0;
1102    CV_FUNCNAME( "cvSortSamplesByClasses" );
1103
1104    __BEGIN__;
1105
1106    int i, k = 0, sample_count;
1107
1108    if( !samples || !classes || !class_ranges )
1109        CV_ERROR( CV_StsNullPtr, "INTERNAL ERROR: some of the args are NULL pointers" );
1110
1111    if( classes->rows != 1 || CV_MAT_TYPE(classes->type) != CV_32SC1 )
1112        CV_ERROR( CV_StsBadArg, "classes array must be a single row of integers" );
1113
1114    sample_count = classes->cols;
1115    CV_CALL( pairs = (CvSampleResponsePair*)cvAlloc( (sample_count+1)*sizeof(pairs[0])));
1116
1117    for( i = 0; i < sample_count; i++ )
1118    {
1119        pairs[i].sample = samples[i];
1120        pairs[i].mask = (mask) ? (mask[i]) : 0;
1121        pairs[i].response = classes->data.i[i];
1122        pairs[i].index = i;
1123        assert( classes->data.i[i] >= 0 );
1124    }
1125
1126    qsort( pairs, sample_count, sizeof(pairs[0]), icvCmpSampleResponsePairs );
1127    pairs[sample_count].response = -1;
1128    class_ranges[0] = 0;
1129
1130    for( i = 0; i < sample_count; i++ )
1131    {
1132        samples[i] = pairs[i].sample;
1133        if (mask)
1134            mask[i] = pairs[i].mask;
1135        classes->data.i[i] = pairs[i].response;
1136
1137        if( pairs[i].response != pairs[i+1].response )
1138            class_ranges[++k] = i+1;
1139    }
1140
1141    __END__;
1142
1143    cvFree( &pairs );
1144}
1145
1146
1147void
1148cvPreparePredictData( const CvArr* _sample, int dims_all,
1149                      const CvMat* comp_idx, int class_count,
1150                      const CvMat* prob, float** _row_sample,
1151                      int as_sparse )
1152{
1153    float* row_sample = 0;
1154    int* inverse_comp_idx = 0;
1155
1156    CV_FUNCNAME( "cvPreparePredictData" );
1157
1158    __BEGIN__;
1159
1160    const CvMat* sample = (const CvMat*)_sample;
1161    float* sample_data;
1162    int sample_step;
1163    int is_sparse = CV_IS_SPARSE_MAT(sample);
1164    int d, sizes[CV_MAX_DIM];
1165    int i, dims_selected;
1166    int vec_size;
1167
1168    if( !is_sparse && !CV_IS_MAT(sample) )
1169        CV_ERROR( !sample ? CV_StsNullPtr : CV_StsBadArg, "The sample is not a valid vector" );
1170
1171    if( cvGetElemType( sample ) != CV_32FC1 )
1172        CV_ERROR( CV_StsUnsupportedFormat, "Input sample must have 32fC1 type" );
1173
1174    CV_CALL( d = cvGetDims( sample, sizes ));
1175
1176    if( !(is_sparse && d == 1 || !is_sparse && d == 2 && (sample->rows == 1 || sample->cols == 1)) )
1177        CV_ERROR( CV_StsBadSize, "Input sample must be 1-dimensional vector" );
1178
1179    if( d == 1 )
1180        sizes[1] = 1;
1181
1182    if( sizes[0] + sizes[1] - 1 != dims_all )
1183        CV_ERROR( CV_StsUnmatchedSizes,
1184        "The sample size is different from what has been used for training" );
1185
1186    if( !_row_sample )
1187        CV_ERROR( CV_StsNullPtr, "INTERNAL ERROR: The row_sample pointer is NULL" );
1188
1189    if( comp_idx && (!CV_IS_MAT(comp_idx) || comp_idx->rows != 1 ||
1190        CV_MAT_TYPE(comp_idx->type) != CV_32SC1) )
1191        CV_ERROR( CV_StsBadArg, "INTERNAL ERROR: invalid comp_idx" );
1192
1193    dims_selected = comp_idx ? comp_idx->cols : dims_all;
1194
1195    if( prob )
1196    {
1197        if( !CV_IS_MAT(prob) )
1198            CV_ERROR( CV_StsBadArg, "The output matrix of probabilities is invalid" );
1199
1200        if( (prob->rows != 1 && prob->cols != 1) ||
1201            CV_MAT_TYPE(prob->type) != CV_32FC1 &&
1202            CV_MAT_TYPE(prob->type) != CV_64FC1 )
1203            CV_ERROR( CV_StsBadSize,
1204            "The matrix of probabilities must be 1-dimensional vector of 32fC1 type" );
1205
1206        if( prob->rows + prob->cols - 1 != class_count )
1207            CV_ERROR( CV_StsUnmatchedSizes,
1208            "The vector of probabilities must contain as many elements as "
1209            "the number of classes in the training set" );
1210    }
1211
1212    vec_size = !as_sparse ? dims_selected*sizeof(row_sample[0]) :
1213                (dims_selected + 1)*sizeof(CvSparseVecElem32f);
1214
1215    if( CV_IS_MAT(sample) )
1216    {
1217        sample_data = sample->data.fl;
1218        sample_step = sample->step / sizeof(row_sample[0]);
1219
1220        if( !comp_idx && sample_step <= 1 && !as_sparse )
1221            *_row_sample = sample_data;
1222        else
1223        {
1224            CV_CALL( row_sample = (float*)cvAlloc( vec_size ));
1225
1226            if( !comp_idx )
1227                for( i = 0; i < dims_selected; i++ )
1228                    row_sample[i] = sample_data[sample_step*i];
1229            else
1230            {
1231                int* comp = comp_idx->data.i;
1232                if( !sample_step )
1233                    for( i = 0; i < dims_selected; i++ )
1234                        row_sample[i] = sample_data[comp[i]];
1235                else
1236                    for( i = 0; i < dims_selected; i++ )
1237                        row_sample[i] = sample_data[sample_step*comp[i]];
1238            }
1239
1240            *_row_sample = row_sample;
1241        }
1242
1243        if( as_sparse )
1244        {
1245            const float* src = (const float*)row_sample;
1246            CvSparseVecElem32f* dst = (CvSparseVecElem32f*)row_sample;
1247
1248            dst[dims_selected].idx = -1;
1249            for( i = dims_selected - 1; i >= 0; i-- )
1250            {
1251                dst[i].idx = i;
1252                dst[i].val = src[i];
1253            }
1254        }
1255    }
1256    else
1257    {
1258        CvSparseNode* node;
1259        CvSparseMatIterator mat_iterator;
1260        const CvSparseMat* sparse = (const CvSparseMat*)sample;
1261        assert( is_sparse );
1262
1263        node = cvInitSparseMatIterator( sparse, &mat_iterator );
1264        CV_CALL( row_sample = (float*)cvAlloc( vec_size ));
1265
1266        if( comp_idx )
1267        {
1268            CV_CALL( inverse_comp_idx = (int*)cvAlloc( dims_all*sizeof(int) ));
1269            memset( inverse_comp_idx, -1, dims_all*sizeof(int) );
1270            for( i = 0; i < dims_selected; i++ )
1271                inverse_comp_idx[comp_idx->data.i[i]] = i;
1272        }
1273
1274        if( !as_sparse )
1275        {
1276            memset( row_sample, 0, vec_size );
1277
1278            for( ; node != 0; node = cvGetNextSparseNode(&mat_iterator) )
1279            {
1280                int idx = *CV_NODE_IDX( sparse, node );
1281                if( inverse_comp_idx )
1282                {
1283                    idx = inverse_comp_idx[idx];
1284                    if( idx < 0 )
1285                        continue;
1286                }
1287                row_sample[idx] = *(float*)CV_NODE_VAL( sparse, node );
1288            }
1289        }
1290        else
1291        {
1292            CvSparseVecElem32f* ptr = (CvSparseVecElem32f*)row_sample;
1293
1294            for( ; node != 0; node = cvGetNextSparseNode(&mat_iterator) )
1295            {
1296                int idx = *CV_NODE_IDX( sparse, node );
1297                if( inverse_comp_idx )
1298                {
1299                    idx = inverse_comp_idx[idx];
1300                    if( idx < 0 )
1301                        continue;
1302                }
1303                ptr->idx = idx;
1304                ptr->val = *(float*)CV_NODE_VAL( sparse, node );
1305                ptr++;
1306            }
1307
1308            qsort( row_sample, ptr - (CvSparseVecElem32f*)row_sample,
1309                   sizeof(ptr[0]), icvCmpSparseVecElems );
1310            ptr->idx = -1;
1311        }
1312
1313        *_row_sample = row_sample;
1314    }
1315
1316    __END__;
1317
1318    if( inverse_comp_idx )
1319        cvFree( &inverse_comp_idx );
1320
1321    if( cvGetErrStatus() < 0 && _row_sample )
1322    {
1323        cvFree( &row_sample );
1324        *_row_sample = 0;
1325    }
1326}
1327
1328
1329static void
1330icvConvertDataToSparse( const uchar* src, int src_step, int src_type,
1331                        uchar* dst, int dst_step, int dst_type,
1332                        CvSize size, int* idx )
1333{
1334    CV_FUNCNAME( "icvConvertDataToSparse" );
1335
1336    __BEGIN__;
1337
1338    int i, j;
1339    src_type = CV_MAT_TYPE(src_type);
1340    dst_type = CV_MAT_TYPE(dst_type);
1341
1342    if( CV_MAT_CN(src_type) != 1 || CV_MAT_CN(dst_type) != 1 )
1343        CV_ERROR( CV_StsUnsupportedFormat, "The function supports only single-channel arrays" );
1344
1345    if( src_step == 0 )
1346        src_step = CV_ELEM_SIZE(src_type);
1347
1348    if( dst_step == 0 )
1349        dst_step = CV_ELEM_SIZE(dst_type);
1350
1351    // if there is no "idx" and if both arrays are continuous,
1352    // do the whole processing (copying or conversion) in a single loop
1353    if( !idx && CV_ELEM_SIZE(src_type)*size.width == src_step &&
1354        CV_ELEM_SIZE(dst_type)*size.width == dst_step )
1355    {
1356        size.width *= size.height;
1357        size.height = 1;
1358    }
1359
1360    if( src_type == dst_type )
1361    {
1362        int full_width = CV_ELEM_SIZE(dst_type)*size.width;
1363
1364        if( full_width == sizeof(int) ) // another common case: copy int's or float's
1365            for( i = 0; i < size.height; i++, src += src_step )
1366                *(int*)(dst + dst_step*(idx ? idx[i] : i)) = *(int*)src;
1367        else
1368            for( i = 0; i < size.height; i++, src += src_step )
1369                memcpy( dst + dst_step*(idx ? idx[i] : i), src, full_width );
1370    }
1371    else if( src_type == CV_32SC1 && (dst_type == CV_32FC1 || dst_type == CV_64FC1) )
1372        for( i = 0; i < size.height; i++, src += src_step )
1373        {
1374            uchar* _dst = dst + dst_step*(idx ? idx[i] : i);
1375            if( dst_type == CV_32FC1 )
1376                for( j = 0; j < size.width; j++ )
1377                    ((float*)_dst)[j] = (float)((int*)src)[j];
1378            else
1379                for( j = 0; j < size.width; j++ )
1380                    ((double*)_dst)[j] = ((int*)src)[j];
1381        }
1382    else if( (src_type == CV_32FC1 || src_type == CV_64FC1) && dst_type == CV_32SC1 )
1383        for( i = 0; i < size.height; i++, src += src_step )
1384        {
1385            uchar* _dst = dst + dst_step*(idx ? idx[i] : i);
1386            if( src_type == CV_32FC1 )
1387                for( j = 0; j < size.width; j++ )
1388                    ((int*)_dst)[j] = cvRound(((float*)src)[j]);
1389            else
1390                for( j = 0; j < size.width; j++ )
1391                    ((int*)_dst)[j] = cvRound(((double*)src)[j]);
1392        }
1393    else if( src_type == CV_32FC1 && dst_type == CV_64FC1 ||
1394             src_type == CV_64FC1 && dst_type == CV_32FC1 )
1395        for( i = 0; i < size.height; i++, src += src_step )
1396        {
1397            uchar* _dst = dst + dst_step*(idx ? idx[i] : i);
1398            if( src_type == CV_32FC1 )
1399                for( j = 0; j < size.width; j++ )
1400                    ((double*)_dst)[j] = ((float*)src)[j];
1401            else
1402                for( j = 0; j < size.width; j++ )
1403                    ((float*)_dst)[j] = (float)((double*)src)[j];
1404        }
1405    else
1406        CV_ERROR( CV_StsUnsupportedFormat, "Unsupported combination of input and output vectors" );
1407
1408    __END__;
1409}
1410
1411
1412void
1413cvWritebackLabels( const CvMat* labels, CvMat* dst_labels,
1414                   const CvMat* centers, CvMat* dst_centers,
1415                   const CvMat* probs, CvMat* dst_probs,
1416                   const CvMat* sample_idx, int samples_all,
1417                   const CvMat* comp_idx, int dims_all )
1418{
1419    CV_FUNCNAME( "cvWritebackLabels" );
1420
1421    __BEGIN__;
1422
1423    int samples_selected = samples_all, dims_selected = dims_all;
1424
1425    if( dst_labels && !CV_IS_MAT(dst_labels) )
1426        CV_ERROR( CV_StsBadArg, "Array of output labels is not a valid matrix" );
1427
1428    if( dst_centers )
1429        if( !ICV_IS_MAT_OF_TYPE(dst_centers, CV_32FC1) &&
1430            !ICV_IS_MAT_OF_TYPE(dst_centers, CV_64FC1) )
1431            CV_ERROR( CV_StsBadArg, "Array of cluster centers is not a valid matrix" );
1432
1433    if( dst_probs && !CV_IS_MAT(dst_probs) )
1434        CV_ERROR( CV_StsBadArg, "Probability matrix is not valid" );
1435
1436    if( sample_idx )
1437    {
1438        CV_ASSERT( sample_idx->rows == 1 && CV_MAT_TYPE(sample_idx->type) == CV_32SC1 );
1439        samples_selected = sample_idx->cols;
1440    }
1441
1442    if( comp_idx )
1443    {
1444        CV_ASSERT( comp_idx->rows == 1 && CV_MAT_TYPE(comp_idx->type) == CV_32SC1 );
1445        dims_selected = comp_idx->cols;
1446    }
1447
1448    if( dst_labels && (!labels || labels->data.ptr != dst_labels->data.ptr) )
1449    {
1450        if( !labels )
1451            CV_ERROR( CV_StsNullPtr, "NULL labels" );
1452
1453        CV_ASSERT( labels->rows == 1 );
1454
1455        if( dst_labels->rows != 1 && dst_labels->cols != 1 )
1456            CV_ERROR( CV_StsBadSize, "Array of output labels should be 1d vector" );
1457
1458        if( dst_labels->rows + dst_labels->cols - 1 != samples_all )
1459            CV_ERROR( CV_StsUnmatchedSizes,
1460            "Size of vector of output labels is not equal to the total number of input samples" );
1461
1462        CV_ASSERT( labels->cols == samples_selected );
1463
1464        CV_CALL( icvConvertDataToSparse( labels->data.ptr, labels->step, labels->type,
1465                        dst_labels->data.ptr, dst_labels->step, dst_labels->type,
1466                        cvSize( 1, samples_selected ), sample_idx ? sample_idx->data.i : 0 ));
1467    }
1468
1469    if( dst_centers && (!centers || centers->data.ptr != dst_centers->data.ptr) )
1470    {
1471        int i;
1472
1473        if( !centers )
1474            CV_ERROR( CV_StsNullPtr, "NULL centers" );
1475
1476        if( centers->rows != dst_centers->rows )
1477            CV_ERROR( CV_StsUnmatchedSizes, "Invalid number of rows in matrix of output centers" );
1478
1479        if( dst_centers->cols != dims_all )
1480            CV_ERROR( CV_StsUnmatchedSizes,
1481            "Number of columns in matrix of output centers is "
1482            "not equal to the total number of components in the input samples" );
1483
1484        CV_ASSERT( centers->cols == dims_selected );
1485
1486        for( i = 0; i < centers->rows; i++ )
1487            CV_CALL( icvConvertDataToSparse( centers->data.ptr + i*centers->step, 0, centers->type,
1488                        dst_centers->data.ptr + i*dst_centers->step, 0, dst_centers->type,
1489                        cvSize( 1, dims_selected ), comp_idx ? comp_idx->data.i : 0 ));
1490    }
1491
1492    if( dst_probs && (!probs || probs->data.ptr != dst_probs->data.ptr) )
1493    {
1494        if( !probs )
1495            CV_ERROR( CV_StsNullPtr, "NULL probs" );
1496
1497        if( probs->cols != dst_probs->cols )
1498            CV_ERROR( CV_StsUnmatchedSizes, "Invalid number of columns in output probability matrix" );
1499
1500        if( dst_probs->rows != samples_all )
1501            CV_ERROR( CV_StsUnmatchedSizes,
1502            "Number of rows in output probability matrix is "
1503            "not equal to the total number of input samples" );
1504
1505        CV_ASSERT( probs->rows == samples_selected );
1506
1507        CV_CALL( icvConvertDataToSparse( probs->data.ptr, probs->step, probs->type,
1508                        dst_probs->data.ptr, dst_probs->step, dst_probs->type,
1509                        cvSize( probs->cols, samples_selected ),
1510                        sample_idx ? sample_idx->data.i : 0 ));
1511    }
1512
1513    __END__;
1514}
1515
1516#if 0
1517CV_IMPL void
1518cvStatModelMultiPredict( const CvStatModel* stat_model,
1519                         const CvArr* predict_input,
1520                         int flags, CvMat* predict_output,
1521                         CvMat* probs, const CvMat* sample_idx )
1522{
1523    CvMemStorage* storage = 0;
1524    CvMat* sample_idx_buffer = 0;
1525    CvSparseMat** sparse_rows = 0;
1526    int samples_selected = 0;
1527
1528    CV_FUNCNAME( "cvStatModelMultiPredict" );
1529
1530    __BEGIN__;
1531
1532    int i;
1533    int predict_output_step = 1, sample_idx_step = 1;
1534    int type;
1535    int d, sizes[CV_MAX_DIM];
1536    int tflag = flags == CV_COL_SAMPLE;
1537    int samples_all, dims_all;
1538    int is_sparse = CV_IS_SPARSE_MAT(predict_input);
1539    CvMat predict_input_part;
1540    CvArr* sample = &predict_input_part;
1541    CvMat probs_part;
1542    CvMat* probs1 = probs ? &probs_part : 0;
1543
1544    if( !CV_IS_STAT_MODEL(stat_model) )
1545        CV_ERROR( !stat_model ? CV_StsNullPtr : CV_StsBadArg, "Invalid statistical model" );
1546
1547    if( !stat_model->predict )
1548        CV_ERROR( CV_StsNotImplemented, "There is no \"predict\" method" );
1549
1550    if( !predict_input || !predict_output )
1551        CV_ERROR( CV_StsNullPtr, "NULL input or output matrices" );
1552
1553    if( !is_sparse && !CV_IS_MAT(predict_input) )
1554        CV_ERROR( CV_StsBadArg, "predict_input should be a matrix or a sparse matrix" );
1555
1556    if( !CV_IS_MAT(predict_output) )
1557        CV_ERROR( CV_StsBadArg, "predict_output should be a matrix" );
1558
1559    type = cvGetElemType( predict_input );
1560    if( type != CV_32FC1 ||
1561        (CV_MAT_TYPE(predict_output->type) != CV_32FC1 &&
1562         CV_MAT_TYPE(predict_output->type) != CV_32SC1 ))
1563         CV_ERROR( CV_StsUnsupportedFormat, "The input or output matrix has unsupported format" );
1564
1565    CV_CALL( d = cvGetDims( predict_input, sizes ));
1566    if( d > 2 )
1567        CV_ERROR( CV_StsBadSize, "The input matrix should be 1- or 2-dimensional" );
1568
1569    if( !tflag )
1570    {
1571        samples_all = samples_selected = sizes[0];
1572        dims_all = sizes[1];
1573    }
1574    else
1575    {
1576        samples_all = samples_selected = sizes[1];
1577        dims_all = sizes[0];
1578    }
1579
1580    if( sample_idx )
1581    {
1582        if( !CV_IS_MAT(sample_idx) )
1583            CV_ERROR( CV_StsBadArg, "Invalid sample_idx matrix" );
1584
1585        if( sample_idx->cols != 1 && sample_idx->rows != 1 )
1586            CV_ERROR( CV_StsBadSize, "sample_idx must be 1-dimensional matrix" );
1587
1588        samples_selected = sample_idx->rows + sample_idx->cols - 1;
1589
1590        if( CV_MAT_TYPE(sample_idx->type) == CV_32SC1 )
1591        {
1592            if( samples_selected > samples_all )
1593                CV_ERROR( CV_StsBadSize, "sample_idx is too large vector" );
1594        }
1595        else if( samples_selected != samples_all )
1596            CV_ERROR( CV_StsUnmatchedSizes, "sample_idx has incorrect size" );
1597
1598        sample_idx_step = sample_idx->step ?
1599            sample_idx->step / CV_ELEM_SIZE(sample_idx->type) : 1;
1600    }
1601
1602    if( predict_output->rows != 1 && predict_output->cols != 1 )
1603        CV_ERROR( CV_StsBadSize, "predict_output should be a 1-dimensional matrix" );
1604
1605    if( predict_output->rows + predict_output->cols - 1 != samples_all )
1606        CV_ERROR( CV_StsUnmatchedSizes, "predict_output and predict_input have uncoordinated sizes" );
1607
1608    predict_output_step = predict_output->step ?
1609        predict_output->step / CV_ELEM_SIZE(predict_output->type) : 1;
1610
1611    if( probs )
1612    {
1613        if( !CV_IS_MAT(probs) )
1614            CV_ERROR( CV_StsBadArg, "Invalid matrix of probabilities" );
1615
1616        if( probs->rows != samples_all )
1617            CV_ERROR( CV_StsUnmatchedSizes,
1618            "matrix of probabilities must have as many rows as the total number of samples" );
1619
1620        if( CV_MAT_TYPE(probs->type) != CV_32FC1 )
1621            CV_ERROR( CV_StsUnsupportedFormat, "matrix of probabilities must have 32fC1 type" );
1622    }
1623
1624    if( is_sparse )
1625    {
1626        CvSparseNode* node;
1627        CvSparseMatIterator mat_iterator;
1628        CvSparseMat* sparse = (CvSparseMat*)predict_input;
1629
1630        if( sample_idx && CV_MAT_TYPE(sample_idx->type) == CV_32SC1 )
1631        {
1632            CV_CALL( sample_idx_buffer = cvCreateMat( 1, samples_all, CV_8UC1 ));
1633            cvZero( sample_idx_buffer );
1634            for( i = 0; i < samples_selected; i++ )
1635                sample_idx_buffer->data.ptr[sample_idx->data.i[i*sample_idx_step]] = 1;
1636            samples_selected = samples_all;
1637            sample_idx = sample_idx_buffer;
1638            sample_idx_step = 1;
1639        }
1640
1641        CV_CALL( sparse_rows = (CvSparseMat**)cvAlloc( samples_selected*sizeof(sparse_rows[0])));
1642        for( i = 0; i < samples_selected; i++ )
1643        {
1644            if( sample_idx && sample_idx->data.ptr[i*sample_idx_step] == 0 )
1645                continue;
1646            CV_CALL( sparse_rows[i] = cvCreateSparseMat( 1, &dims_all, type ));
1647            if( !storage )
1648                storage = sparse_rows[i]->heap->storage;
1649            else
1650            {
1651                // hack: to decrease memory footprint, make all the sparse matrices
1652                // reside in the same storage
1653                int elem_size = sparse_rows[i]->heap->elem_size;
1654                cvReleaseMemStorage( &sparse_rows[i]->heap->storage );
1655                sparse_rows[i]->heap = cvCreateSet( 0, sizeof(CvSet), elem_size, storage );
1656            }
1657        }
1658
1659        // put each row (or column) of predict_input into separate sparse matrix.
1660        node = cvInitSparseMatIterator( sparse, &mat_iterator );
1661        for( ; node != 0; node = cvGetNextSparseNode( &mat_iterator ))
1662        {
1663            int* idx = CV_NODE_IDX( sparse, node );
1664            int idx0 = idx[tflag ^ 1];
1665            int idx1 = idx[tflag];
1666
1667            if( sample_idx && sample_idx->data.ptr[idx0*sample_idx_step] == 0 )
1668                continue;
1669
1670            assert( sparse_rows[idx0] != 0 );
1671            *(float*)cvPtrND( sparse, &idx1, 0, 1, 0 ) = *(float*)CV_NODE_VAL( sparse, node );
1672        }
1673    }
1674
1675    for( i = 0; i < samples_selected; i++ )
1676    {
1677        int idx = i;
1678        float response;
1679
1680        if( sample_idx )
1681        {
1682            if( CV_MAT_TYPE(sample_idx->type) == CV_32SC1 )
1683            {
1684                idx = sample_idx->data.i[i*sample_idx_step];
1685                if( (unsigned)idx >= (unsigned)samples_all )
1686                    CV_ERROR( CV_StsOutOfRange, "Some of sample_idx elements are out of range" );
1687            }
1688            else if( CV_MAT_TYPE(sample_idx->type) == CV_8UC1 &&
1689                     sample_idx->data.ptr[i*sample_idx_step] == 0 )
1690                continue;
1691        }
1692
1693        if( !is_sparse )
1694        {
1695            if( !tflag )
1696                cvGetRow( predict_input, &predict_input_part, idx );
1697            else
1698            {
1699                cvGetCol( predict_input, &predict_input_part, idx );
1700            }
1701        }
1702        else
1703            sample = sparse_rows[idx];
1704
1705        if( probs )
1706            cvGetRow( probs, probs1, idx );
1707
1708        CV_CALL( response = stat_model->predict( stat_model, (CvMat*)sample, probs1 ));
1709
1710        if( CV_MAT_TYPE(predict_output->type) == CV_32FC1 )
1711            predict_output->data.fl[idx*predict_output_step] = response;
1712        else
1713        {
1714            CV_ASSERT( cvRound(response) == response );
1715            predict_output->data.i[idx*predict_output_step] = cvRound(response);
1716        }
1717    }
1718
1719    __END__;
1720
1721    if( sparse_rows )
1722    {
1723        int i;
1724        for( i = 0; i < samples_selected; i++ )
1725            if( sparse_rows[i] )
1726            {
1727                sparse_rows[i]->heap->storage = 0;
1728                cvReleaseSparseMat( &sparse_rows[i] );
1729            }
1730        cvFree( &sparse_rows );
1731    }
1732
1733    cvReleaseMat( &sample_idx_buffer );
1734    cvReleaseMemStorage( &storage );
1735}
1736#endif
1737
1738// By P. Yarykin - begin -
1739
1740void cvCombineResponseMaps (CvMat*  _responses,
1741                      const CvMat*  old_response_map,
1742                            CvMat*  new_response_map,
1743                            CvMat** out_response_map)
1744{
1745    int** old_data = NULL;
1746    int** new_data = NULL;
1747
1748        CV_FUNCNAME ("cvCombineResponseMaps");
1749        __BEGIN__
1750
1751    int i,j;
1752    int old_n, new_n, out_n;
1753    int samples, free_response;
1754    int* first;
1755    int* responses;
1756    int* out_data;
1757
1758    if( out_response_map )
1759        *out_response_map = 0;
1760
1761// Check input data.
1762    if ((!ICV_IS_MAT_OF_TYPE (_responses, CV_32SC1)) ||
1763        (!ICV_IS_MAT_OF_TYPE (old_response_map, CV_32SC1)) ||
1764        (!ICV_IS_MAT_OF_TYPE (new_response_map, CV_32SC1)))
1765    {
1766        CV_ERROR (CV_StsBadArg, "Some of input arguments is not the CvMat")
1767    }
1768
1769// Prepare sorted responses.
1770    first = new_response_map->data.i;
1771    new_n = new_response_map->cols;
1772    CV_CALL (new_data = (int**)cvAlloc (new_n * sizeof (new_data[0])));
1773    for (i = 0; i < new_n; i++)
1774        new_data[i] = first + i;
1775    qsort (new_data, new_n, sizeof(int*), icvCmpIntegersPtr);
1776
1777    first = old_response_map->data.i;
1778    old_n = old_response_map->cols;
1779    CV_CALL (old_data = (int**)cvAlloc (old_n * sizeof (old_data[0])));
1780    for (i = 0; i < old_n; i++)
1781        old_data[i] = first + i;
1782    qsort (old_data, old_n, sizeof(int*), icvCmpIntegersPtr);
1783
1784// Count the number of different responses.
1785    for (i = 0, j = 0, out_n = 0; i < old_n && j < new_n; out_n++)
1786    {
1787        if (*old_data[i] == *new_data[j])
1788        {
1789            i++;
1790            j++;
1791        }
1792        else if (*old_data[i] < *new_data[j])
1793            i++;
1794        else
1795            j++;
1796    }
1797    out_n += old_n - i + new_n - j;
1798
1799// Create and fill the result response maps.
1800    CV_CALL (*out_response_map = cvCreateMat (1, out_n, CV_32SC1));
1801    out_data = (*out_response_map)->data.i;
1802    memcpy (out_data, first, old_n * sizeof (int));
1803
1804    free_response = old_n;
1805    for (i = 0, j = 0; i < old_n && j < new_n; )
1806    {
1807        if (*old_data[i] == *new_data[j])
1808        {
1809            *new_data[j] = (int)(old_data[i] - first);
1810            i++;
1811            j++;
1812        }
1813        else if (*old_data[i] < *new_data[j])
1814            i++;
1815        else
1816        {
1817            out_data[free_response] = *new_data[j];
1818            *new_data[j] = free_response++;
1819            j++;
1820        }
1821    }
1822    for (; j < new_n; j++)
1823    {
1824        out_data[free_response] = *new_data[j];
1825        *new_data[j] = free_response++;
1826    }
1827    CV_ASSERT (free_response == out_n);
1828
1829// Change <responses> according to out response map.
1830    samples = _responses->cols + _responses->rows - 1;
1831    responses = _responses->data.i;
1832    first = new_response_map->data.i;
1833    for (i = 0; i < samples; i++)
1834    {
1835        responses[i] = first[responses[i]];
1836    }
1837
1838        __END__
1839
1840    cvFree(&old_data);
1841    cvFree(&new_data);
1842
1843}
1844
1845
1846int icvGetNumberOfCluster( double* prob_vector, int num_of_clusters, float r,
1847                           float outlier_thresh, int normalize_probs )
1848{
1849    int max_prob_loc = 0;
1850
1851    CV_FUNCNAME("icvGetNumberOfCluster");
1852    __BEGIN__;
1853
1854    double prob, maxprob, sum;
1855    int i;
1856
1857    CV_ASSERT(prob_vector);
1858    CV_ASSERT(num_of_clusters >= 0);
1859
1860    maxprob = prob_vector[0];
1861    max_prob_loc = 0;
1862    sum = maxprob;
1863    for( i = 1; i < num_of_clusters; i++ )
1864    {
1865        prob = prob_vector[i];
1866        sum += prob;
1867        if( prob > maxprob )
1868        {
1869            max_prob_loc = i;
1870            maxprob = prob;
1871        }
1872    }
1873    if( normalize_probs && fabs(sum - 1.) > FLT_EPSILON )
1874    {
1875        for( i = 0; i < num_of_clusters; i++ )
1876            prob_vector[i] /= sum;
1877    }
1878    if( fabs(r - 1.) > FLT_EPSILON && fabs(sum - 1.) < outlier_thresh )
1879        max_prob_loc = -1;
1880
1881    __END__;
1882
1883    return max_prob_loc;
1884
1885} // End of icvGetNumberOfCluster
1886
1887
1888void icvFindClusterLabels( const CvMat* probs, float outlier_thresh, float r,
1889                          const CvMat* labels )
1890{
1891    CvMat* counts = 0;
1892
1893    CV_FUNCNAME("icvFindClusterLabels");
1894    __BEGIN__;
1895
1896    int nclusters, nsamples;
1897    int i, j;
1898    double* probs_data;
1899
1900    CV_ASSERT( ICV_IS_MAT_OF_TYPE(probs, CV_64FC1) );
1901    CV_ASSERT( ICV_IS_MAT_OF_TYPE(labels, CV_32SC1) );
1902
1903    nclusters = probs->cols;
1904    nsamples  = probs->rows;
1905    CV_ASSERT( nsamples == labels->cols );
1906
1907    CV_CALL( counts = cvCreateMat( 1, nclusters + 1, CV_32SC1 ) );
1908    CV_CALL( cvSetZero( counts ));
1909    for( i = 0; i < nsamples; i++ )
1910    {
1911        labels->data.i[i] = icvGetNumberOfCluster( probs->data.db + i*probs->cols,
1912            nclusters, r, outlier_thresh, 1 );
1913        counts->data.i[labels->data.i[i] + 1]++;
1914    }
1915    CV_ASSERT((int)cvSum(counts).val[0] == nsamples);
1916    // Filling empty clusters with the vector, that has the maximal probability
1917    for( j = 0; j < nclusters; j++ ) // outliers are ignored
1918    {
1919        int maxprob_loc = -1;
1920        double maxprob = 0;
1921
1922        if( counts->data.i[j+1] ) // j-th class is not empty
1923            continue;
1924        // look for the presentative, which is not lonely in it's cluster
1925        // and that has a maximal probability among all these vectors
1926        probs_data = probs->data.db;
1927        for( i = 0; i < nsamples; i++, probs_data++ )
1928        {
1929            int label = labels->data.i[i];
1930            double prob;
1931            if( counts->data.i[label+1] == 0 ||
1932                (counts->data.i[label+1] <= 1 && label != -1) )
1933                continue;
1934            prob = *probs_data;
1935            if( prob >= maxprob )
1936            {
1937                maxprob = prob;
1938                maxprob_loc = i;
1939            }
1940        }
1941        // maxprob_loc == 0 <=> number of vectors less then number of clusters
1942        CV_ASSERT( maxprob_loc >= 0 );
1943        counts->data.i[labels->data.i[maxprob_loc] + 1]--;
1944        labels->data.i[maxprob_loc] = j;
1945        counts->data.i[j + 1]++;
1946    }
1947
1948    __END__;
1949
1950    cvReleaseMat( &counts );
1951} // End of icvFindClusterLabels
1952
1953/* End of file */
1954