1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                           License Agreement
11//                For Open Source Computer Vision Library
12//
13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15// Third party copyrights are property of their respective owners.
16//
17// Redistribution and use in source and binary forms, with or without modification,
18// are permitted provided that the following conditions are met:
19//
20//   * Redistribution's of source code must retain the above copyright notice,
21//     this list of conditions and the following disclaimer.
22//
23//   * Redistribution's in binary form must reproduce the above copyright notice,
24//     this list of conditions and the following disclaimer in the documentation
25//     and/or other materials provided with the distribution.
26//
27//   * The name of the copyright holders may not be used to endorse or promote products
28//     derived from this software without specific prior written permission.
29//
30// This software is provided by the copyright holders and contributors "as is" and
31// any express or implied warranties, including, but not limited to, the implied
32// warranties of merchantability and fitness for a particular purpose are disclaimed.
33// In no event shall the Intel Corporation or contributors be liable for any direct,
34// indirect, incidental, special, exemplary, or consequential damages
35// (including, but not limited to, procurement of substitute goods or services;
36// loss of use, data, or profits; or business interruption) however caused
37// and on any theory of liability, whether in contract, strict liability,
38// or tort (including negligence or otherwise) arising in any way out of
39// the use of this software, even if advised of the possibility of such damage.
40//
41//M*/
42
43#if !defined CUDA_DISABLER
44
45#include "opencv2/core/cuda/common.hpp"
46#include "opencv2/core/cuda/vec_traits.hpp"
47#include "opencv2/core/cuda/vec_math.hpp"
48#include "opencv2/core/cuda/limits.hpp"
49
50namespace cv { namespace cuda { namespace device
51{
52    namespace mog2
53    {
54        ///////////////////////////////////////////////////////////////
55        // Utility
56
57        __device__ __forceinline__ float cvt(uchar val)
58        {
59            return val;
60        }
61        __device__ __forceinline__ float3 cvt(const uchar3& val)
62        {
63            return make_float3(val.x, val.y, val.z);
64        }
65        __device__ __forceinline__ float4 cvt(const uchar4& val)
66        {
67            return make_float4(val.x, val.y, val.z, val.w);
68        }
69
70        __device__ __forceinline__ float sqr(float val)
71        {
72            return val * val;
73        }
74        __device__ __forceinline__ float sqr(const float3& val)
75        {
76            return val.x * val.x + val.y * val.y + val.z * val.z;
77        }
78        __device__ __forceinline__ float sqr(const float4& val)
79        {
80            return val.x * val.x + val.y * val.y + val.z * val.z;
81        }
82
83        __device__ __forceinline__ float sum(float val)
84        {
85            return val;
86        }
87        __device__ __forceinline__ float sum(const float3& val)
88        {
89            return val.x + val.y + val.z;
90        }
91        __device__ __forceinline__ float sum(const float4& val)
92        {
93            return val.x + val.y + val.z;
94        }
95
96        template <class Ptr2D>
97        __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows)
98        {
99            typename Ptr2D::elem_type val = ptr(k * rows + y, x);
100            ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
101            ptr((k + 1) * rows + y, x) = val;
102        }
103
104        ///////////////////////////////////////////////////////////////
105        // MOG2
106
107        __constant__ int           c_nmixtures;
108        __constant__ float         c_Tb;
109        __constant__ float         c_TB;
110        __constant__ float         c_Tg;
111        __constant__ float         c_varInit;
112        __constant__ float         c_varMin;
113        __constant__ float         c_varMax;
114        __constant__ float         c_tau;
115        __constant__ unsigned char c_shadowVal;
116
117        void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
118        {
119            varMin = ::fminf(varMin, varMax);
120            varMax = ::fmaxf(varMin, varMax);
121
122            cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
123            cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
124            cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
125            cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
126            cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
127            cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
128            cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
129            cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
130            cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
131        }
132
133        template <bool detectShadows, typename SrcT, typename WorkT>
134        __global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
135                             PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
136                             const float alphaT, const float alpha1, const float prune)
137        {
138            const int x = blockIdx.x * blockDim.x + threadIdx.x;
139            const int y = blockIdx.y * blockDim.y + threadIdx.y;
140
141            if (x >= frame.cols || y >= frame.rows)
142                return;
143
144            WorkT pix = cvt(frame(y, x));
145
146            //calculate distances to the modes (+ sort)
147            //here we need to go in descending order!!!
148
149            bool background = false; // true - the pixel classified as background
150
151            //internal:
152
153            bool fitsPDF = false; //if it remains zero a new GMM mode will be added
154
155            int nmodes = modesUsed(y, x);
156            int nNewModes = nmodes; //current number of modes in GMM
157
158            float totalWeight = 0.0f;
159
160            //go through all modes
161
162            for (int mode = 0; mode < nmodes; ++mode)
163            {
164                //need only weight if fit is found
165                float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
166                int swap_count = 0;
167                //fit not found yet
168                if (!fitsPDF)
169                {
170                    //check if it belongs to some of the remaining modes
171                    float var = gmm_variance(mode * frame.rows + y, x);
172
173                    WorkT mean = gmm_mean(mode * frame.rows + y, x);
174
175                    //calculate difference and distance
176                    WorkT diff = mean - pix;
177                    float dist2 = sqr(diff);
178
179                    //background? - Tb - usually larger than Tg
180                    if (totalWeight < c_TB && dist2 < c_Tb * var)
181                        background = true;
182
183                    //check fit
184                    if (dist2 < c_Tg * var)
185                    {
186                        //belongs to the mode
187                        fitsPDF = true;
188
189                        //update distribution
190
191                        //update weight
192                        weight += alphaT;
193                        float k = alphaT / weight;
194
195                        //update mean
196                        gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
197
198                        //update variance
199                        float varnew = var + k * (dist2 - var);
200
201                        //limit the variance
202                        varnew = ::fmaxf(varnew, c_varMin);
203                        varnew = ::fminf(varnew, c_varMax);
204
205                        gmm_variance(mode * frame.rows + y, x) = varnew;
206
207                        //sort
208                        //all other weights are at the same place and
209                        //only the matched (iModes) is higher -> just find the new place for it
210
211                        for (int i = mode; i > 0; --i)
212                        {
213                            //check one up
214                            if (weight < gmm_weight((i - 1) * frame.rows + y, x))
215                                break;
216
217                            swap_count++;
218                            //swap one up
219                            swap(gmm_weight, x, y, i - 1, frame.rows);
220                            swap(gmm_variance, x, y, i - 1, frame.rows);
221                            swap(gmm_mean, x, y, i - 1, frame.rows);
222                        }
223
224                        //belongs to the mode - bFitsPDF becomes 1
225                    }
226                } // !fitsPDF
227
228                //check prune
229                if (weight < -prune)
230                {
231                    weight = 0.0f;
232                    nmodes--;
233                }
234
235                gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value
236                totalWeight += weight;
237            }
238
239            //renormalize weights
240
241            totalWeight = 1.f / totalWeight;
242            for (int mode = 0; mode < nmodes; ++mode)
243                gmm_weight(mode * frame.rows + y, x) *= totalWeight;
244
245            nmodes = nNewModes;
246
247            //make new mode if needed and exit
248
249            if (!fitsPDF)
250            {
251                // replace the weakest or add a new one
252                int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++;
253
254                if (nmodes == 1)
255                    gmm_weight(mode * frame.rows + y, x) = 1.f;
256                else
257                {
258                    gmm_weight(mode * frame.rows + y, x) = alphaT;
259
260                    // renormalize all other weights
261
262                    for (int i = 0; i < nmodes - 1; ++i)
263                        gmm_weight(i * frame.rows + y, x) *= alpha1;
264                }
265
266                // init
267
268                gmm_mean(mode * frame.rows + y, x) = pix;
269                gmm_variance(mode * frame.rows + y, x) = c_varInit;
270
271                //sort
272                //find the new place for it
273
274                for (int i = nmodes - 1; i > 0; --i)
275                {
276                    // check one up
277                    if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
278                        break;
279
280                    //swap one up
281                    swap(gmm_weight, x, y, i - 1, frame.rows);
282                    swap(gmm_variance, x, y, i - 1, frame.rows);
283                    swap(gmm_mean, x, y, i - 1, frame.rows);
284                }
285            }
286
287            //set the number of modes
288            modesUsed(y, x) = nmodes;
289
290            bool isShadow = false;
291            if (detectShadows && !background)
292            {
293                float tWeight = 0.0f;
294
295                // check all the components  marked as background:
296                for (int mode = 0; mode < nmodes; ++mode)
297                {
298                    WorkT mean = gmm_mean(mode * frame.rows + y, x);
299
300                    WorkT pix_mean = pix * mean;
301
302                    float numerator = sum(pix_mean);
303                    float denominator = sqr(mean);
304
305                    // no division by zero allowed
306                    if (denominator == 0)
307                        break;
308
309                    // if tau < a < 1 then also check the color distortion
310                    if (numerator <= denominator && numerator >= c_tau * denominator)
311                    {
312                        float a = numerator / denominator;
313
314                        WorkT dD = a * mean - pix;
315
316                        if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a)
317                        {
318                            isShadow = true;
319                            break;
320                        }
321                    };
322
323                    tWeight += gmm_weight(mode * frame.rows + y, x);
324                    if (tWeight > c_TB)
325                        break;
326                }
327            }
328
329            fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
330        }
331
332        template <typename SrcT, typename WorkT>
333        void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
334                         float alphaT, float prune, bool detectShadows, cudaStream_t stream)
335        {
336            dim3 block(32, 8);
337            dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
338
339            const float alpha1 = 1.0f - alphaT;
340
341            if (detectShadows)
342            {
343                cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
344
345                mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
346                                                                    weight, variance, (PtrStepSz<WorkT>) mean,
347                                                                    alphaT, alpha1, prune);
348            }
349            else
350            {
351                cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
352
353                mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
354                                                                    weight, variance, (PtrStepSz<WorkT>) mean,
355                                                                    alphaT, alpha1, prune);
356            }
357
358            cudaSafeCall( cudaGetLastError() );
359
360            if (stream == 0)
361                cudaSafeCall( cudaDeviceSynchronize() );
362        }
363
364        void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
365                      float alphaT, float prune, bool detectShadows, cudaStream_t stream)
366        {
367            typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
368
369            static const func_t funcs[] =
370            {
371                0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>
372            };
373
374            funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream);
375        }
376
377        template <typename WorkT, typename OutT>
378        __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst)
379        {
380            const int x = blockIdx.x * blockDim.x + threadIdx.x;
381            const int y = blockIdx.y * blockDim.y + threadIdx.y;
382
383            if (x >= modesUsed.cols || y >= modesUsed.rows)
384                return;
385
386            int nmodes = modesUsed(y, x);
387
388            WorkT meanVal = VecTraits<WorkT>::all(0.0f);
389            float totalWeight = 0.0f;
390
391            for (int mode = 0; mode < nmodes; ++mode)
392            {
393                float weight = gmm_weight(mode * modesUsed.rows + y, x);
394
395                WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
396                meanVal = meanVal + weight * mean;
397
398                totalWeight += weight;
399
400                if(totalWeight > c_TB)
401                    break;
402            }
403
404            meanVal = meanVal * (1.f / totalWeight);
405
406            dst(y, x) = saturate_cast<OutT>(meanVal);
407        }
408
409        template <typename WorkT, typename OutT>
410        void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
411        {
412            dim3 block(32, 8);
413            dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
414
415            cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
416
417            getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
418            cudaSafeCall( cudaGetLastError() );
419
420            if (stream == 0)
421                cudaSafeCall( cudaDeviceSynchronize() );
422        }
423
424        void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
425        {
426            typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
427
428            static const func_t funcs[] =
429            {
430                0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4>
431            };
432
433            funcs[cn](modesUsed, weight, mean, dst, stream);
434        }
435    }
436}}}
437
438
439#endif /* CUDA_DISABLER */
440