1793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler/*M/////////////////////////////////////////////////////////////////////////////////////// 2793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 3793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 5793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// By downloading, copying, installing or using the software you agree to this license. 6793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// If you do not agree to this license, do not download, install, 7793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// copy or use the software. 8793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 9793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 10793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// License Agreement 11793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// For Open Source Computer Vision Library 12793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 13793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2013, OpenCV Foundation, all rights reserved. 16793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Third party copyrights are property of their respective owners. 17793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 18793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Redistribution and use in source and binary forms, with or without modification, 19793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// are permitted provided that the following conditions are met: 20793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 21793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * Redistribution's of source code must retain the above copyright notice, 22793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// this list of conditions and the following disclaimer. 23793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 24793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * Redistribution's in binary form must reproduce the above copyright notice, 25793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// this list of conditions and the following disclaimer in the documentation 26793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and/or other materials provided with the distribution. 27793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 28793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * The name of the copyright holders may not be used to endorse or promote products 29793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// derived from this software without specific prior written permission. 30793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 31793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// This software is provided by the copyright holders and contributors "as is" and 32793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// any express or implied warranties, including, but not limited to, the implied 33793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// warranties of merchantability and fitness for a particular purpose are disclaimed. 34793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// In no event shall the Intel Corporation or contributors be liable for any direct, 35793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// indirect, incidental, special, exemplary, or consequential damages 36793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// (including, but not limited to, procurement of substitute goods or services; 37793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// loss of use, data, or profits; or business interruption) however caused 38793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and on any theory of liability, whether in contract, strict liability, 39793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// or tort (including negligence or otherwise) arising in any way out of 40793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// the use of this software, even if advised of the possibility of such damage. 41793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 42793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//M*/ 43793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 44793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "precomp.hpp" 45793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 46793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler////////////////////////////////////////// kmeans //////////////////////////////////////////// 47793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 48793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslernamespace cv 49793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 50793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 51793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerstatic void generateRandomCenter(const std::vector<Vec2f>& box, float* center, RNG& rng) 52793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 53793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler size_t j, dims = box.size(); 54793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float margin = 1.f/dims; 55793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( j = 0; j < dims; j++ ) 56793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler center[j] = ((float)rng*(1.f+margin*2.f)-margin)*(box[j][1] - box[j][0]) + box[j][0]; 57793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 58793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 59793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerclass KMeansPPDistanceComputer : public ParallelLoopBody 60793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 61793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerpublic: 62793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler KMeansPPDistanceComputer( float *_tdist2, 63793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float *_data, 64793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float *_dist, 65793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int _dims, 66793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler size_t _step, 67793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler size_t _stepci ) 68793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler : tdist2(_tdist2), 69793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler data(_data), 70793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler dist(_dist), 71793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler dims(_dims), 72793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler step(_step), 73793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler stepci(_stepci) { } 74793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 75793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler void operator()( const cv::Range& range ) const 76793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 77793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int begin = range.start; 78793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int end = range.end; 79793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 80793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for ( int i = begin; i<end; i++ ) 81793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 82793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler tdist2[i] = std::min(normL2Sqr(data + step*i, data + stepci, dims), dist[i]); 83793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 84793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 85793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 86793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerprivate: 87793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler KMeansPPDistanceComputer& operator=(const KMeansPPDistanceComputer&); // to quiet MSVC 88793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 89793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float *tdist2; 90793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float *data; 91793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float *dist; 92793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int dims; 93793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const size_t step; 94793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const size_t stepci; 95793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}; 96793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 97793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler/* 98793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerk-means center initialization using the following algorithm: 99793ee12c6df9cad3806238d32528c49a3ff9331dNoah PreslerArthur & Vassilvitskii (2007) k-means++: The Advantages of Careful Seeding 100793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler*/ 101793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerstatic void generateCentersPP(const Mat& _data, Mat& _out_centers, 102793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int K, RNG& rng, int trials) 103793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 104793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int i, j, k, dims = _data.cols, N = _data.rows; 105793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float* data = _data.ptr<float>(0); 106793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler size_t step = _data.step/sizeof(data[0]); 107793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler std::vector<int> _centers(K); 108793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int* centers = &_centers[0]; 109793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler std::vector<float> _dist(N*3); 110793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float* dist = &_dist[0], *tdist = dist + N, *tdist2 = tdist + N; 111793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler double sum0 = 0; 112793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 113793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler centers[0] = (unsigned)rng % N; 114793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 115793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( i = 0; i < N; i++ ) 116793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 117793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler dist[i] = normL2Sqr(data + step*i, data + step*centers[0], dims); 118793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum0 += dist[i]; 119793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 120793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 121793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( k = 1; k < K; k++ ) 122793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 123793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler double bestSum = DBL_MAX; 124793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int bestCenter = -1; 125793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 126793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( j = 0; j < trials; j++ ) 127793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 128793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler double p = (double)rng*sum0, s = 0; 129793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( i = 0; i < N-1; i++ ) 130793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( (p -= dist[i]) <= 0 ) 131793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler break; 132793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int ci = i; 133793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 134793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler parallel_for_(Range(0, N), 135793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci)); 136793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( i = 0; i < N; i++ ) 137793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 138793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler s += tdist2[i]; 139793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 140793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 141793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( s < bestSum ) 142793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 143793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler bestSum = s; 144793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler bestCenter = ci; 145793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler std::swap(tdist, tdist2); 146793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 147793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 148793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler centers[k] = bestCenter; 149793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum0 = bestSum; 150793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler std::swap(dist, tdist); 151793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 152793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 153793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( k = 0; k < K; k++ ) 154793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 155793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float* src = data + step*centers[k]; 156793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float* dst = _out_centers.ptr<float>(k); 157793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( j = 0; j < dims; j++ ) 158793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler dst[j] = src[j]; 159793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 160793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 161793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 162793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerclass KMeansDistanceComputer : public ParallelLoopBody 163793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 164793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerpublic: 165793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler KMeansDistanceComputer( double *_distances, 166793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int *_labels, 167793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const Mat& _data, 168793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const Mat& _centers ) 169793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler : distances(_distances), 170793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler labels(_labels), 171793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler data(_data), 172793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler centers(_centers) 173793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 174793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 175793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 176793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler void operator()( const Range& range ) const 177793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 178793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int begin = range.start; 179793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int end = range.end; 180793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int K = centers.rows; 181793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int dims = centers.cols; 182793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 183793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( int i = begin; i<end; ++i) 184793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 185793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float *sample = data.ptr<float>(i); 186793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int k_best = 0; 187793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler double min_dist = DBL_MAX; 188793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 189793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( int k = 0; k < K; k++ ) 190793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 191793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float* center = centers.ptr<float>(k); 192793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const double dist = normL2Sqr(sample, center, dims); 193793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 194793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( min_dist > dist ) 195793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 196793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler min_dist = dist; 197793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler k_best = k; 198793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 199793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 200793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 201793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler distances[i] = min_dist; 202793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler labels[i] = k_best; 203793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 204793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 205793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 206793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerprivate: 207793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler KMeansDistanceComputer& operator=(const KMeansDistanceComputer&); // to quiet MSVC 208793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 209793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler double *distances; 210793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int *labels; 211793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const Mat& data; 212793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const Mat& centers; 213793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}; 214793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 215793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 216793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 217793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerdouble cv::kmeans( InputArray _data, int K, 218793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler InputOutputArray _bestLabels, 219793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler TermCriteria criteria, int attempts, 220793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int flags, OutputArray _centers ) 221793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 222793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int SPP_TRIALS = 3; 223793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler Mat data0 = _data.getMat(); 224793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler bool isrow = data0.rows == 1 && data0.channels() > 1; 225793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int N = !isrow ? data0.rows : data0.cols; 226793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dims = (!isrow ? data0.cols : 1)*data0.channels(); 227793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int type = data0.depth(); 228793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 229793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler attempts = std::max(attempts, 1); 230793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler CV_Assert( data0.dims <= 2 && type == CV_32F && K > 0 ); 231793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler CV_Assert( N >= K ); 232793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 233793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler Mat data(N, dims, CV_32F, data0.ptr(), isrow ? dims * sizeof(float) : static_cast<size_t>(data0.step)); 234793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 235793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler _bestLabels.create(N, 1, CV_32S, -1, true); 236793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 237793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler Mat _labels, best_labels = _bestLabels.getMat(); 238793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( flags & CV_KMEANS_USE_INITIAL_LABELS ) 239793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 240793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler CV_Assert( (best_labels.cols == 1 || best_labels.rows == 1) && 241793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler best_labels.cols*best_labels.rows == N && 242793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler best_labels.type() == CV_32S && 243793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler best_labels.isContinuous()); 244793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler best_labels.copyTo(_labels); 245793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 246793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 247793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 248793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( !((best_labels.cols == 1 || best_labels.rows == 1) && 249793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler best_labels.cols*best_labels.rows == N && 250793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler best_labels.type() == CV_32S && 251793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler best_labels.isContinuous())) 252793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler best_labels.create(N, 1, CV_32S); 253793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler _labels.create(best_labels.size(), best_labels.type()); 254793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 255793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int* labels = _labels.ptr<int>(); 256793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 257793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type); 258793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler std::vector<int> counters(K); 259793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler std::vector<Vec2f> _box(dims); 260793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler Vec2f* box = &_box[0]; 261793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler double best_compactness = DBL_MAX, compactness = 0; 262793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler RNG& rng = theRNG(); 263793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int a, iter, i, j, k; 264793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 265793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( criteria.type & TermCriteria::EPS ) 266793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler criteria.epsilon = std::max(criteria.epsilon, 0.); 267793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 268793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler criteria.epsilon = FLT_EPSILON; 269793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler criteria.epsilon *= criteria.epsilon; 270793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 271793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( criteria.type & TermCriteria::COUNT ) 272793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100); 273793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 274793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler criteria.maxCount = 100; 275793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 276793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( K == 1 ) 277793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 278793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler attempts = 1; 279793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler criteria.maxCount = 2; 280793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 281793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 282793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float* sample = data.ptr<float>(0); 283793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( j = 0; j < dims; j++ ) 284793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler box[j] = Vec2f(sample[j], sample[j]); 285793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 286793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( i = 1; i < N; i++ ) 287793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 288793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sample = data.ptr<float>(i); 289793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( j = 0; j < dims; j++ ) 290793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 291793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float v = sample[j]; 292793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler box[j][0] = std::min(box[j][0], v); 293793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler box[j][1] = std::max(box[j][1], v); 294793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 295793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 296793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 297793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( a = 0; a < attempts; a++ ) 298793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 299793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler double max_center_shift = DBL_MAX; 300793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( iter = 0;; ) 301793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 302793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler swap(centers, old_centers); 303793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 304793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)) ) 305793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 306793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( flags & KMEANS_PP_CENTERS ) 307793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler generateCentersPP(data, centers, K, rng, SPP_TRIALS); 308793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 309793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 310793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( k = 0; k < K; k++ ) 311793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler generateRandomCenter(_box, centers.ptr<float>(k), rng); 312793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 313793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 314793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 315793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 316793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( iter == 0 && a == 0 && (flags & KMEANS_USE_INITIAL_LABELS) ) 317793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 318793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( i = 0; i < N; i++ ) 319793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler CV_Assert( (unsigned)labels[i] < (unsigned)K ); 320793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 321793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 322793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler // compute centers 323793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler centers = Scalar(0); 324793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( k = 0; k < K; k++ ) 325793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler counters[k] = 0; 326793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 327793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( i = 0; i < N; i++ ) 328793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 329793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sample = data.ptr<float>(i); 330793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler k = labels[i]; 331793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float* center = centers.ptr<float>(k); 332793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler j=0; 333793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #if CV_ENABLE_UNROLLED 334793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for(; j <= dims - 4; j += 4 ) 335793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 336793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float t0 = center[j] + sample[j]; 337793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float t1 = center[j+1] + sample[j+1]; 338793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 339793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler center[j] = t0; 340793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler center[j+1] = t1; 341793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 342793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler t0 = center[j+2] + sample[j+2]; 343793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler t1 = center[j+3] + sample[j+3]; 344793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 345793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler center[j+2] = t0; 346793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler center[j+3] = t1; 347793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 348793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #endif 349793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( ; j < dims; j++ ) 350793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler center[j] += sample[j]; 351793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler counters[k]++; 352793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 353793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 354793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( iter > 0 ) 355793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler max_center_shift = 0; 356793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 357793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( k = 0; k < K; k++ ) 358793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 359793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( counters[k] != 0 ) 360793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler continue; 361793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 362793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler // if some cluster appeared to be empty then: 363793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler // 1. find the biggest cluster 364793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler // 2. find the farthest from the center point in the biggest cluster 365793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler // 3. exclude the farthest point from the biggest cluster and form a new 1-point cluster. 366793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int max_k = 0; 367793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( int k1 = 1; k1 < K; k1++ ) 368793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 369793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( counters[max_k] < counters[k1] ) 370793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler max_k = k1; 371793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 372793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 373793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler double max_dist = 0; 374793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int farthest_i = -1; 375793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float* new_center = centers.ptr<float>(k); 376793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float* old_center = centers.ptr<float>(max_k); 377793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float* _old_center = temp.ptr<float>(); // normalized 378793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float scale = 1.f/counters[max_k]; 379793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( j = 0; j < dims; j++ ) 380793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler _old_center[j] = old_center[j]*scale; 381793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 382793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( i = 0; i < N; i++ ) 383793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 384793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( labels[i] != max_k ) 385793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler continue; 386793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sample = data.ptr<float>(i); 387793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler double dist = normL2Sqr(sample, _old_center, dims); 388793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 389793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( max_dist <= dist ) 390793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 391793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler max_dist = dist; 392793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler farthest_i = i; 393793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 394793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 395793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 396793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler counters[max_k]--; 397793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler counters[k]++; 398793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler labels[farthest_i] = k; 399793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sample = data.ptr<float>(farthest_i); 400793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 401793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( j = 0; j < dims; j++ ) 402793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 403793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler old_center[j] -= sample[j]; 404793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler new_center[j] += sample[j]; 405793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 406793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 407793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 408793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( k = 0; k < K; k++ ) 409793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 410793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float* center = centers.ptr<float>(k); 411793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler CV_Assert( counters[k] != 0 ); 412793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 413793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float scale = 1.f/counters[k]; 414793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( j = 0; j < dims; j++ ) 415793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler center[j] *= scale; 416793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 417793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( iter > 0 ) 418793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 419793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler double dist = 0; 420793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float* old_center = old_centers.ptr<float>(k); 421793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( j = 0; j < dims; j++ ) 422793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 423793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler double t = center[j] - old_center[j]; 424793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler dist += t*t; 425793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 426793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler max_center_shift = std::max(max_center_shift, dist); 427793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 428793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 429793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 430793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 431793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon ) 432793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler break; 433793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 434793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler // assign labels 435793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler Mat dists(1, N, CV_64F); 436793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler double* dist = dists.ptr<double>(0); 437793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler parallel_for_(Range(0, N), 438793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler KMeansDistanceComputer(dist, labels, data, centers)); 439793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler compactness = 0; 440793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for( i = 0; i < N; i++ ) 441793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 442793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler compactness += dist[i]; 443793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 444793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 445793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 446793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( compactness < best_compactness ) 447793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 448793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler best_compactness = compactness; 449793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( _centers.needed() ) 450793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler centers.copyTo(_centers); 451793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler _labels.copyTo(best_labels); 452793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 453793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 454793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 455793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler return best_compactness; 456793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 457