1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                        Intel License Agreement
11//                For Open Source Computer Vision Library
12//
13// Copyright (C) 2000, Intel Corporation, all rights reserved.
14// Third party copyrights are property of their respective owners.
15//
16// Redistribution and use in source and binary forms, with or without modification,
17// are permitted provided that the following conditions are met:
18//
19//   * Redistribution's of source code must retain the above copyright notice,
20//     this list of conditions and the following disclaimer.
21//
22//   * Redistribution's in binary form must reproduce the above copyright notice,
23//     this list of conditions and the following disclaimer in the documentation
24//     and/or other materials provided with the distribution.
25//
26//   * The name of Intel Corporation may not be used to endorse or promote products
27//     derived from this software without specific prior written permission.
28//
29// This software is provided by the copyright holders and contributors "as is" and
30// any express or implied warranties, including, but not limited to, the implied
31// warranties of merchantability and fitness for a particular purpose are disclaimed.
32// In no event shall the Intel Corporation or contributors be liable for any direct,
33// indirect, incidental, special, exemplary, or consequential damages
34// (including, but not limited to, procurement of substitute goods or services;
35// loss of use, data, or profits; or business interruption) however caused
36// and on any theory of liability, whether in contract, strict liability,
37// or tort (including negligence or otherwise) arising in any way out of
38// the use of this software, even if advised of the possibility of such damage.
39//
40//M*/
41
42#include "_cv.h"
43
44/*
45 * This file includes the code, contributed by Simon Perreault
46 * (the function icvMedianBlur_8u_CnR_O1)
47 *
48 * Constant-time median filtering -- http://nomis80.org/ctmf.html
49 * Copyright (C) 2006 Simon Perreault
50 *
51 * Contact:
52 *  Laboratoire de vision et systemes numeriques
53 *  Pavillon Adrien-Pouliot
54 *  Universite Laval
55 *  Sainte-Foy, Quebec, Canada
56 *  G1K 7P4
57 *
58 *  perreaul@gel.ulaval.ca
59 */
60
61// uncomment the line below to force SSE2 mode
62//#define CV_SSE2 1
63
64/****************************************************************************************\
65                                         Box Filter
66\****************************************************************************************/
67
68static void icvSumRow_8u32s( const uchar* src0, int* dst, void* params );
69static void icvSumRow_32f64f( const float* src0, double* dst, void* params );
70static void icvSumCol_32s8u( const int** src, uchar* dst, int dst_step,
71                             int count, void* params );
72static void icvSumCol_32s16s( const int** src, short* dst, int dst_step,
73                             int count, void* params );
74static void icvSumCol_32s32s( const int** src, int* dst, int dst_step,
75                             int count, void* params );
76static void icvSumCol_64f32f( const double** src, float* dst, int dst_step,
77                              int count, void* params );
78
79CvBoxFilter::CvBoxFilter()
80{
81    min_depth = CV_32S;
82    sum = 0;
83    sum_count = 0;
84    normalized = false;
85}
86
87
88CvBoxFilter::CvBoxFilter( int _max_width, int _src_type, int _dst_type,
89                          bool _normalized, CvSize _ksize,
90                          CvPoint _anchor, int _border_mode,
91                          CvScalar _border_value )
92{
93    min_depth = CV_32S;
94    sum = 0;
95    sum_count = 0;
96    normalized = false;
97    init( _max_width, _src_type, _dst_type, _normalized,
98          _ksize, _anchor, _border_mode, _border_value );
99}
100
101
102CvBoxFilter::~CvBoxFilter()
103{
104    clear();
105}
106
107
108void CvBoxFilter::init( int _max_width, int _src_type, int _dst_type,
109                        bool _normalized, CvSize _ksize,
110                        CvPoint _anchor, int _border_mode,
111                        CvScalar _border_value )
112{
113    CV_FUNCNAME( "CvBoxFilter::init" );
114
115    __BEGIN__;
116
117    sum = 0;
118    normalized = _normalized;
119
120    if( (normalized && CV_MAT_TYPE(_src_type) != CV_MAT_TYPE(_dst_type)) ||
121        (!normalized && CV_MAT_CN(_src_type) != CV_MAT_CN(_dst_type)))
122        CV_ERROR( CV_StsUnmatchedFormats,
123        "In case of normalized box filter input and output must have the same type.\n"
124        "In case of unnormalized box filter the number of input and output channels must be the same" );
125
126    min_depth = CV_MAT_DEPTH(_src_type) == CV_8U ? CV_32S : CV_64F;
127
128    CvBaseImageFilter::init( _max_width, _src_type, _dst_type, 1, _ksize,
129                             _anchor, _border_mode, _border_value );
130
131    scale = normalized ? 1./(ksize.width*ksize.height) : 1;
132
133    if( CV_MAT_DEPTH(src_type) == CV_8U )
134        x_func = (CvRowFilterFunc)icvSumRow_8u32s;
135    else if( CV_MAT_DEPTH(src_type) == CV_32F )
136        x_func = (CvRowFilterFunc)icvSumRow_32f64f;
137    else
138        CV_ERROR( CV_StsUnsupportedFormat, "Unknown/unsupported input image format" );
139
140    if( CV_MAT_DEPTH(dst_type) == CV_8U )
141    {
142        if( !normalized )
143            CV_ERROR( CV_StsBadArg, "Only normalized box filter can be used for 8u->8u transformation" );
144        y_func = (CvColumnFilterFunc)icvSumCol_32s8u;
145    }
146    else if( CV_MAT_DEPTH(dst_type) == CV_16S )
147    {
148        if( normalized || CV_MAT_DEPTH(src_type) != CV_8U )
149            CV_ERROR( CV_StsBadArg, "Only 8u->16s unnormalized box filter is supported in case of 16s output" );
150        y_func = (CvColumnFilterFunc)icvSumCol_32s16s;
151    }
152	else if( CV_MAT_DEPTH(dst_type) == CV_32S )
153	{
154		if( normalized || CV_MAT_DEPTH(src_type) != CV_8U )
155			CV_ERROR( CV_StsBadArg, "Only 8u->32s unnormalized box filter is supported in case of 32s output");
156
157		y_func = (CvColumnFilterFunc)icvSumCol_32s32s;
158	}
159    else if( CV_MAT_DEPTH(dst_type) == CV_32F )
160    {
161        if( CV_MAT_DEPTH(src_type) != CV_32F )
162            CV_ERROR( CV_StsBadArg, "Only 32f->32f box filter (normalized or not) is supported in case of 32f output" );
163        y_func = (CvColumnFilterFunc)icvSumCol_64f32f;
164    }
165	else{
166		CV_ERROR( CV_StsBadArg, "Unknown/unsupported destination image format" );
167	}
168
169    __END__;
170}
171
172
173void CvBoxFilter::start_process( CvSlice x_range, int width )
174{
175    CvBaseImageFilter::start_process( x_range, width );
176    int i, psz = CV_ELEM_SIZE(work_type);
177    uchar* s;
178    buf_end -= buf_step;
179    buf_max_count--;
180    assert( buf_max_count >= max_ky*2 + 1 );
181    s = sum = buf_end + cvAlign((width + ksize.width - 1)*CV_ELEM_SIZE(src_type), ALIGN);
182    sum_count = 0;
183
184    width *= psz;
185    for( i = 0; i < width; i++ )
186        s[i] = (uchar)0;
187}
188
189
190static void
191icvSumRow_8u32s( const uchar* src, int* dst, void* params )
192{
193    const CvBoxFilter* state = (const CvBoxFilter*)params;
194    int ksize = state->get_kernel_size().width;
195    int width = state->get_width();
196    int cn = CV_MAT_CN(state->get_src_type());
197    int i, k;
198
199    width = (width - 1)*cn; ksize *= cn;
200
201    for( k = 0; k < cn; k++, src++, dst++ )
202    {
203        int s = 0;
204        for( i = 0; i < ksize; i += cn )
205            s += src[i];
206        dst[0] = s;
207        for( i = 0; i < width; i += cn )
208        {
209            s += src[i+ksize] - src[i];
210            dst[i+cn] = s;
211        }
212    }
213}
214
215
216static void
217icvSumRow_32f64f( const float* src, double* dst, void* params )
218{
219    const CvBoxFilter* state = (const CvBoxFilter*)params;
220    int ksize = state->get_kernel_size().width;
221    int width = state->get_width();
222    int cn = CV_MAT_CN(state->get_src_type());
223    int i, k;
224
225    width = (width - 1)*cn; ksize *= cn;
226
227    for( k = 0; k < cn; k++, src++, dst++ )
228    {
229        double s = 0;
230        for( i = 0; i < ksize; i += cn )
231            s += src[i];
232        dst[0] = s;
233        for( i = 0; i < width; i += cn )
234        {
235            s += (double)src[i+ksize] - src[i];
236            dst[i+cn] = s;
237        }
238    }
239}
240
241
242static void
243icvSumCol_32s8u( const int** src, uchar* dst,
244                 int dst_step, int count, void* params )
245{
246#define BLUR_SHIFT 24
247    CvBoxFilter* state = (CvBoxFilter*)params;
248    int ksize = state->get_kernel_size().height;
249    int i, width = state->get_width();
250    int cn = CV_MAT_CN(state->get_src_type());
251    double scale = state->get_scale();
252    int iscale = cvFloor(scale*(1 << BLUR_SHIFT));
253    int* sum = (int*)state->get_sum_buf();
254    int* _sum_count = state->get_sum_count_ptr();
255    int sum_count = *_sum_count;
256
257    width *= cn;
258    src += sum_count;
259    count += ksize - 1 - sum_count;
260
261    for( ; count--; src++ )
262    {
263        const int* sp = src[0];
264        if( sum_count+1 < ksize )
265        {
266            for( i = 0; i <= width - 2; i += 2 )
267            {
268                int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
269                sum[i] = s0; sum[i+1] = s1;
270            }
271
272            for( ; i < width; i++ )
273                sum[i] += sp[i];
274
275            sum_count++;
276        }
277        else
278        {
279            const int* sm = src[-ksize+1];
280            for( i = 0; i <= width - 2; i += 2 )
281            {
282                int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
283                int t0 = CV_DESCALE(s0*iscale, BLUR_SHIFT), t1 = CV_DESCALE(s1*iscale, BLUR_SHIFT);
284                s0 -= sm[i]; s1 -= sm[i+1];
285                sum[i] = s0; sum[i+1] = s1;
286                dst[i] = (uchar)t0; dst[i+1] = (uchar)t1;
287            }
288
289            for( ; i < width; i++ )
290            {
291                int s0 = sum[i] + sp[i], t0 = CV_DESCALE(s0*iscale, BLUR_SHIFT);
292                sum[i] = s0 - sm[i]; dst[i] = (uchar)t0;
293            }
294            dst += dst_step;
295        }
296    }
297
298    *_sum_count = sum_count;
299#undef BLUR_SHIFT
300}
301
302
303static void
304icvSumCol_32s16s( const int** src, short* dst,
305                  int dst_step, int count, void* params )
306{
307    CvBoxFilter* state = (CvBoxFilter*)params;
308    int ksize = state->get_kernel_size().height;
309    int ktotal = ksize*state->get_kernel_size().width;
310    int i, width = state->get_width();
311    int cn = CV_MAT_CN(state->get_src_type());
312    int* sum = (int*)state->get_sum_buf();
313    int* _sum_count = state->get_sum_count_ptr();
314    int sum_count = *_sum_count;
315
316    dst_step /= sizeof(dst[0]);
317    width *= cn;
318    src += sum_count;
319    count += ksize - 1 - sum_count;
320
321    for( ; count--; src++ )
322    {
323        const int* sp = src[0];
324        if( sum_count+1 < ksize )
325        {
326            for( i = 0; i <= width - 2; i += 2 )
327            {
328                int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
329                sum[i] = s0; sum[i+1] = s1;
330            }
331
332            for( ; i < width; i++ )
333                sum[i] += sp[i];
334
335            sum_count++;
336        }
337        else if( ktotal < 128 )
338        {
339            const int* sm = src[-ksize+1];
340            for( i = 0; i <= width - 2; i += 2 )
341            {
342                int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
343                dst[i] = (short)s0; dst[i+1] = (short)s1;
344                s0 -= sm[i]; s1 -= sm[i+1];
345                sum[i] = s0; sum[i+1] = s1;
346            }
347
348            for( ; i < width; i++ )
349            {
350                int s0 = sum[i] + sp[i];
351                dst[i] = (short)s0;
352                sum[i] = s0 - sm[i];
353            }
354            dst += dst_step;
355        }
356        else
357        {
358            const int* sm = src[-ksize+1];
359            for( i = 0; i <= width - 2; i += 2 )
360            {
361                int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
362                dst[i] = CV_CAST_16S(s0); dst[i+1] = CV_CAST_16S(s1);
363                s0 -= sm[i]; s1 -= sm[i+1];
364                sum[i] = s0; sum[i+1] = s1;
365            }
366
367            for( ; i < width; i++ )
368            {
369                int s0 = sum[i] + sp[i];
370                dst[i] = CV_CAST_16S(s0);
371                sum[i] = s0 - sm[i];
372            }
373            dst += dst_step;
374        }
375    }
376
377    *_sum_count = sum_count;
378}
379
380static void
381icvSumCol_32s32s( const int** src, int * dst,
382                  int dst_step, int count, void* params )
383{
384    CvBoxFilter* state = (CvBoxFilter*)params;
385    int ksize = state->get_kernel_size().height;
386    int i, width = state->get_width();
387    int cn = CV_MAT_CN(state->get_src_type());
388    int* sum = (int*)state->get_sum_buf();
389    int* _sum_count = state->get_sum_count_ptr();
390    int sum_count = *_sum_count;
391
392    dst_step /= sizeof(dst[0]);
393    width *= cn;
394    src += sum_count;
395    count += ksize - 1 - sum_count;
396
397    for( ; count--; src++ )
398    {
399        const int* sp = src[0];
400        if( sum_count+1 < ksize )
401        {
402            for( i = 0; i <= width - 2; i += 2 )
403            {
404                int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
405                sum[i] = s0; sum[i+1] = s1;
406            }
407
408            for( ; i < width; i++ )
409                sum[i] += sp[i];
410
411            sum_count++;
412        }
413        else
414        {
415            const int* sm = src[-ksize+1];
416            for( i = 0; i <= width - 2; i += 2 )
417            {
418                int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
419                dst[i] = s0; dst[i+1] = s1;
420                s0 -= sm[i]; s1 -= sm[i+1];
421                sum[i] = s0; sum[i+1] = s1;
422            }
423
424            for( ; i < width; i++ )
425            {
426                int s0 = sum[i] + sp[i];
427                dst[i] = s0;
428                sum[i] = s0 - sm[i];
429            }
430            dst += dst_step;
431        }
432    }
433
434    *_sum_count = sum_count;
435}
436
437
438static void
439icvSumCol_64f32f( const double** src, float* dst,
440                  int dst_step, int count, void* params )
441{
442    CvBoxFilter* state = (CvBoxFilter*)params;
443    int ksize = state->get_kernel_size().height;
444    int i, width = state->get_width();
445    int cn = CV_MAT_CN(state->get_src_type());
446    double scale = state->get_scale();
447    bool normalized = state->is_normalized();
448    double* sum = (double*)state->get_sum_buf();
449    int* _sum_count = state->get_sum_count_ptr();
450    int sum_count = *_sum_count;
451
452    dst_step /= sizeof(dst[0]);
453    width *= cn;
454    src += sum_count;
455    count += ksize - 1 - sum_count;
456
457    for( ; count--; src++ )
458    {
459        const double* sp = src[0];
460        if( sum_count+1 < ksize )
461        {
462            for( i = 0; i <= width - 2; i += 2 )
463            {
464                double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
465                sum[i] = s0; sum[i+1] = s1;
466            }
467
468            for( ; i < width; i++ )
469                sum[i] += sp[i];
470
471            sum_count++;
472        }
473        else
474        {
475            const double* sm = src[-ksize+1];
476            if( normalized )
477                for( i = 0; i <= width - 2; i += 2 )
478                {
479                    double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
480                    double t0 = s0*scale, t1 = s1*scale;
481                    s0 -= sm[i]; s1 -= sm[i+1];
482                    dst[i] = (float)t0; dst[i+1] = (float)t1;
483                    sum[i] = s0; sum[i+1] = s1;
484                }
485            else
486                for( i = 0; i <= width - 2; i += 2 )
487                {
488                    double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
489                    dst[i] = (float)s0; dst[i+1] = (float)s1;
490                    s0 -= sm[i]; s1 -= sm[i+1];
491                    sum[i] = s0; sum[i+1] = s1;
492                }
493
494            for( ; i < width; i++ )
495            {
496                double s0 = sum[i] + sp[i], t0 = s0*scale;
497                sum[i] = s0 - sm[i]; dst[i] = (float)t0;
498            }
499            dst += dst_step;
500        }
501    }
502
503    *_sum_count = sum_count;
504}
505
506
507/****************************************************************************************\
508                                      Median Filter
509\****************************************************************************************/
510
511#define CV_MINMAX_8U(a,b) \
512    (t = CV_FAST_CAST_8U((a) - (b)), (b) += t, a -= t)
513
514#if CV_SSE2 && !defined __SSE2__
515#define __SSE2__ 1
516#include "emmintrin.h"
517#endif
518
519#if defined(__VEC__) || defined(__ALTIVEC__)
520#include <altivec.h>
521#undef bool
522#endif
523
524#if defined(__GNUC__)
525#define align(x) __attribute__ ((aligned (x)))
526#elif CV_SSE2 && (defined(__ICL) || (_MSC_VER >= 1300))
527#define align(x) __declspec(align(x))
528#else
529#define align(x)
530#endif
531
532#if _MSC_VER >= 1200
533#pragma warning( disable: 4244 )
534#endif
535
536/**
537 * This structure represents a two-tier histogram. The first tier (known as the
538 * "coarse" level) is 4 bit wide and the second tier (known as the "fine" level)
539 * is 8 bit wide. Pixels inserted in the fine level also get inserted into the
540 * coarse bucket designated by the 4 MSBs of the fine bucket value.
541 *
542 * The structure is aligned on 16 bits, which is a prerequisite for SIMD
543 * instructions. Each bucket is 16 bit wide, which means that extra care must be
544 * taken to prevent overflow.
545 */
546typedef struct align(16)
547{
548    ushort coarse[16];
549    ushort fine[16][16];
550} Histogram;
551
552/**
553 * HOP is short for Histogram OPeration. This macro makes an operation \a op on
554 * histogram \a h for pixel value \a x. It takes care of handling both levels.
555 */
556#define HOP(h,x,op) \
557    h.coarse[x>>4] op; \
558    *((ushort*) h.fine + x) op;
559
560#define COP(c,j,x,op) \
561    h_coarse[ 16*(n*c+j) + (x>>4) ] op; \
562    h_fine[ 16 * (n*(16*c+(x>>4)) + j) + (x & 0xF) ] op;
563
564#if defined __SSE2__ || defined __MMX__ || defined __ALTIVEC__
565#define MEDIAN_HAVE_SIMD 1
566#else
567#define MEDIAN_HAVE_SIMD 0
568#endif
569
570/**
571 * Adds histograms \a x and \a y and stores the result in \a y. Makes use of
572 * SSE2, MMX or Altivec, if available.
573 */
574#if defined(__SSE2__)
575static inline void histogram_add( const ushort x[16], ushort y[16] )
576{
577    _mm_store_si128( (__m128i*) &y[0], _mm_add_epi16(
578        _mm_load_si128((__m128i*) &y[0]), _mm_load_si128((__m128i*) &x[0] )));
579    _mm_store_si128( (__m128i*) &y[8], _mm_add_epi16(
580        _mm_load_si128((__m128i*) &y[8]), _mm_load_si128((__m128i*) &x[8] )));
581}
582#elif defined(__MMX__)
583static inline void histogram_add( const ushort x[16], ushort y[16] )
584{
585    *(__m64*) &y[0]  = _mm_add_pi16( *(__m64*) &y[0],  *(__m64*) &x[0]  );
586    *(__m64*) &y[4]  = _mm_add_pi16( *(__m64*) &y[4],  *(__m64*) &x[4]  );
587    *(__m64*) &y[8]  = _mm_add_pi16( *(__m64*) &y[8],  *(__m64*) &x[8]  );
588    *(__m64*) &y[12] = _mm_add_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
589}
590#elif defined(__ALTIVEC__)
591static inline void histogram_add( const ushort x[16], ushort y[16] )
592{
593    *(vector ushort*) &y[0] = vec_add( *(vector ushort*) &y[0], *(vector ushort*) &x[0] );
594    *(vector ushort*) &y[8] = vec_add( *(vector ushort*) &y[8], *(vector ushort*) &x[8] );
595}
596#else
597static inline void histogram_add( const ushort x[16], ushort y[16] )
598{
599    int i;
600    for( i = 0; i < 16; ++i )
601        y[i] = (ushort)(y[i] + x[i]);
602}
603#endif
604
605/**
606 * Subtracts histogram \a x from \a y and stores the result in \a y. Makes use
607 * of SSE2, MMX or Altivec, if available.
608 */
609#if defined(__SSE2__)
610static inline void histogram_sub( const ushort x[16], ushort y[16] )
611{
612    _mm_store_si128( (__m128i*) &y[0], _mm_sub_epi16(
613        _mm_load_si128((__m128i*) &y[0]), _mm_load_si128((__m128i*) &x[0] )));
614    _mm_store_si128( (__m128i*) &y[8], _mm_sub_epi16(
615        _mm_load_si128((__m128i*) &y[8]), _mm_load_si128((__m128i*) &x[8] )));
616}
617#elif defined(__MMX__)
618static inline void histogram_sub( const ushort x[16], ushort y[16] )
619{
620    *(__m64*) &y[0]  = _mm_sub_pi16( *(__m64*) &y[0],  *(__m64*) &x[0]  );
621    *(__m64*) &y[4]  = _mm_sub_pi16( *(__m64*) &y[4],  *(__m64*) &x[4]  );
622    *(__m64*) &y[8]  = _mm_sub_pi16( *(__m64*) &y[8],  *(__m64*) &x[8]  );
623    *(__m64*) &y[12] = _mm_sub_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
624}
625#elif defined(__ALTIVEC__)
626static inline void histogram_sub( const ushort x[16], ushort y[16] )
627{
628    *(vector ushort*) &y[0] = vec_sub( *(vector ushort*) &y[0], *(vector ushort*) &x[0] );
629    *(vector ushort*) &y[8] = vec_sub( *(vector ushort*) &y[8], *(vector ushort*) &x[8] );
630}
631#else
632static inline void histogram_sub( const ushort x[16], ushort y[16] )
633{
634    int i;
635    for( i = 0; i < 16; ++i )
636        y[i] = (ushort)(y[i] - x[i]);
637}
638#endif
639
640static inline void histogram_muladd( int a, const ushort x[16],
641        ushort y[16] )
642{
643    int i;
644    for ( i = 0; i < 16; ++i )
645        y[i] = (ushort)(y[i] + a * x[i]);
646}
647
648static CvStatus CV_STDCALL
649icvMedianBlur_8u_CnR_O1( uchar* src, int src_step, uchar* dst, int dst_step,
650                         CvSize size, int kernel_size, int cn, int pad_left, int pad_right )
651{
652    int r = (kernel_size-1)/2;
653    const int m = size.height, n = size.width;
654    int i, j, k, c;
655    const unsigned char *p, *q;
656    Histogram H[4];
657    ushort *h_coarse, *h_fine, luc[4][16];
658
659    if( size.height < r || size.width < r )
660        return CV_BADSIZE_ERR;
661
662    assert( src );
663    assert( dst );
664    assert( r >= 0 );
665    assert( size.width >= 2*r+1 );
666    assert( size.height >= 2*r+1 );
667    assert( src_step != 0 );
668    assert( dst_step != 0 );
669
670    h_coarse = (ushort*) cvAlloc(  1 * 16 * n * cn * sizeof(ushort) );
671    h_fine   = (ushort*) cvAlloc( 16 * 16 * n * cn * sizeof(ushort) );
672    memset( h_coarse, 0,  1 * 16 * n * cn * sizeof(ushort) );
673    memset( h_fine,   0, 16 * 16 * n * cn * sizeof(ushort) );
674
675    /* First row initialization */
676    for ( j = 0; j < n; ++j ) {
677        for ( c = 0; c < cn; ++c ) {
678            COP( c, j, src[cn*j+c], += r+1 );
679        }
680    }
681    for ( i = 0; i < r; ++i ) {
682        for ( j = 0; j < n; ++j ) {
683            for ( c = 0; c < cn; ++c ) {
684                COP( c, j, src[src_step*i+cn*j+c], ++ );
685            }
686        }
687    }
688
689    for ( i = 0; i < m; ++i ) {
690
691        /* Update column histograms for entire row. */
692        p = src + src_step * MAX( 0, i-r-1 );
693        q = p + cn * n;
694        for ( j = 0; p != q; ++j ) {
695            for ( c = 0; c < cn; ++c, ++p ) {
696                COP( c, j, *p, -- );
697            }
698        }
699
700        p = src + src_step * MIN( m-1, i+r );
701        q = p + cn * n;
702        for ( j = 0; p != q; ++j ) {
703            for ( c = 0; c < cn; ++c, ++p ) {
704                COP( c, j, *p, ++ );
705            }
706        }
707
708        /* First column initialization */
709        memset( H, 0, cn*sizeof(H[0]) );
710        memset( luc, 0, cn*sizeof(luc[0]) );
711        if ( pad_left ) {
712            for ( c = 0; c < cn; ++c ) {
713                histogram_muladd( r, &h_coarse[16*n*c], H[c].coarse );
714            }
715        }
716        for ( j = 0; j < (pad_left ? r : 2*r); ++j ) {
717            for ( c = 0; c < cn; ++c ) {
718                histogram_add( &h_coarse[16*(n*c+j)], H[c].coarse );
719            }
720        }
721        for ( c = 0; c < cn; ++c ) {
722            for ( k = 0; k < 16; ++k ) {
723                histogram_muladd( 2*r+1, &h_fine[16*n*(16*c+k)], &H[c].fine[k][0] );
724            }
725        }
726
727        for ( j = pad_left ? 0 : r; j < (pad_right ? n : n-r); ++j ) {
728            for ( c = 0; c < cn; ++c ) {
729                int t = 2*r*r + 2*r, b, sum = 0;
730                ushort* segment;
731
732                histogram_add( &h_coarse[16*(n*c + MIN(j+r,n-1))], H[c].coarse );
733
734                /* Find median at coarse level */
735                for ( k = 0; k < 16 ; ++k ) {
736                    sum += H[c].coarse[k];
737                    if ( sum > t ) {
738                        sum -= H[c].coarse[k];
739                        break;
740                    }
741                }
742                assert( k < 16 );
743
744                /* Update corresponding histogram segment */
745                if ( luc[c][k] <= j-r ) {
746                    memset( &H[c].fine[k], 0, 16 * sizeof(ushort) );
747                    for ( luc[c][k] = j-r; luc[c][k] < MIN(j+r+1,n); ++luc[c][k] ) {
748                        histogram_add( &h_fine[16*(n*(16*c+k)+luc[c][k])], H[c].fine[k] );
749                    }
750                    if ( luc[c][k] < j+r+1 ) {
751                        histogram_muladd( j+r+1 - n, &h_fine[16*(n*(16*c+k)+(n-1))], &H[c].fine[k][0] );
752                        luc[c][k] = (ushort)(j+r+1);
753                    }
754                }
755                else {
756                    for ( ; luc[c][k] < j+r+1; ++luc[c][k] ) {
757                        histogram_sub( &h_fine[16*(n*(16*c+k)+MAX(luc[c][k]-2*r-1,0))], H[c].fine[k] );
758                        histogram_add( &h_fine[16*(n*(16*c+k)+MIN(luc[c][k],n-1))], H[c].fine[k] );
759                    }
760                }
761
762                histogram_sub( &h_coarse[16*(n*c+MAX(j-r,0))], H[c].coarse );
763
764                /* Find median in segment */
765                segment = H[c].fine[k];
766                for ( b = 0; b < 16 ; ++b ) {
767                    sum += segment[b];
768                    if ( sum > t ) {
769                        dst[dst_step*i+cn*j+c] = (uchar)(16*k + b);
770                        break;
771                    }
772                }
773                assert( b < 16 );
774            }
775        }
776    }
777
778#if defined(__MMX__)
779    _mm_empty();
780#endif
781
782    cvFree(&h_coarse);
783    cvFree(&h_fine);
784
785#undef HOP
786#undef COP
787    return CV_OK;
788}
789
790
791#if _MSC_VER >= 1200
792#pragma warning( default: 4244 )
793#endif
794
795
796static CvStatus CV_STDCALL
797icvMedianBlur_8u_CnR_Om( uchar* src, int src_step, uchar* dst, int dst_step,
798                         CvSize size, int m, int cn )
799{
800    #define N  16
801    int     zone0[4][N];
802    int     zone1[4][N*N];
803    int     x, y;
804    int     n2 = m*m/2;
805    int     nx = (m + 1)/2 - 1;
806    uchar*  src_max = src + size.height*src_step;
807    uchar*  src_right = src + size.width*cn;
808
809    #define UPDATE_ACC01( pix, cn, op ) \
810    {                                   \
811        int p = (pix);                  \
812        zone1[cn][p] op;                \
813        zone0[cn][p >> 4] op;           \
814    }
815
816    if( size.height < nx || size.width < nx )
817        return CV_BADSIZE_ERR;
818
819    if( m == 3 )
820    {
821        size.width *= cn;
822
823        for( y = 0; y < size.height; y++, dst += dst_step )
824        {
825            const uchar* src0 = src + src_step*(y-1);
826            const uchar* src1 = src0 + src_step;
827            const uchar* src2 = src1 + src_step;
828            if( y == 0 )
829                src0 = src1;
830            else if( y == size.height - 1 )
831                src2 = src1;
832
833            for( x = 0; x < 2*cn; x++ )
834            {
835                int x0 = x < cn ? x : size.width - 3*cn + x;
836                int x2 = x < cn ? x + cn : size.width - 2*cn + x;
837                int x1 = x < cn ? x0 : x2, t;
838
839                int p0 = src0[x0], p1 = src0[x1], p2 = src0[x2];
840                int p3 = src1[x0], p4 = src1[x1], p5 = src1[x2];
841                int p6 = src2[x0], p7 = src2[x1], p8 = src2[x2];
842
843                CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
844                CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p1);
845                CV_MINMAX_8U(p3, p4); CV_MINMAX_8U(p6, p7);
846                CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
847                CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p3);
848                CV_MINMAX_8U(p5, p8); CV_MINMAX_8U(p4, p7);
849                CV_MINMAX_8U(p3, p6); CV_MINMAX_8U(p1, p4);
850                CV_MINMAX_8U(p2, p5); CV_MINMAX_8U(p4, p7);
851                CV_MINMAX_8U(p4, p2); CV_MINMAX_8U(p6, p4);
852                CV_MINMAX_8U(p4, p2);
853                dst[x1] = (uchar)p4;
854            }
855
856            for( x = cn; x < size.width - cn; x++ )
857            {
858                int p0 = src0[x-cn], p1 = src0[x], p2 = src0[x+cn];
859                int p3 = src1[x-cn], p4 = src1[x], p5 = src1[x+cn];
860                int p6 = src2[x-cn], p7 = src2[x], p8 = src2[x+cn];
861                int t;
862
863                CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
864                CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p1);
865                CV_MINMAX_8U(p3, p4); CV_MINMAX_8U(p6, p7);
866                CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
867                CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p3);
868                CV_MINMAX_8U(p5, p8); CV_MINMAX_8U(p4, p7);
869                CV_MINMAX_8U(p3, p6); CV_MINMAX_8U(p1, p4);
870                CV_MINMAX_8U(p2, p5); CV_MINMAX_8U(p4, p7);
871                CV_MINMAX_8U(p4, p2); CV_MINMAX_8U(p6, p4);
872                CV_MINMAX_8U(p4, p2);
873
874                dst[x] = (uchar)p4;
875            }
876        }
877
878        return CV_OK;
879    }
880
881    for( x = 0; x < size.width; x++, dst += cn )
882    {
883        uchar* dst_cur = dst;
884        uchar* src_top = src;
885        uchar* src_bottom = src;
886        int    k, c;
887        int    x0 = -1;
888        int    src_step1 = src_step, dst_step1 = dst_step;
889
890        if( x % 2 != 0 )
891        {
892            src_bottom = src_top += src_step*(size.height-1);
893            dst_cur += dst_step*(size.height-1);
894            src_step1 = -src_step1;
895            dst_step1 = -dst_step1;
896        }
897
898        if( x <= m/2 )
899            nx++;
900
901        if( nx < m )
902            x0 = x < m/2 ? 0 : (nx-1)*cn;
903
904        // init accumulator
905        memset( zone0, 0, sizeof(zone0[0])*cn );
906        memset( zone1, 0, sizeof(zone1[0])*cn );
907
908        for( y = 0; y <= m/2; y++ )
909        {
910            for( c = 0; c < cn; c++ )
911            {
912                if( y > 0 )
913                {
914                    if( x0 >= 0 )
915                        UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx) );
916                    for( k = 0; k < nx*cn; k += cn )
917                        UPDATE_ACC01( src_bottom[k+c], c, ++ );
918                }
919                else
920                {
921                    if( x0 >= 0 )
922                        UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx)*(m/2+1) );
923                    for( k = 0; k < nx*cn; k += cn )
924                        UPDATE_ACC01( src_bottom[k+c], c, += m/2+1 );
925                }
926            }
927
928            if( (src_step1 > 0 && y < size.height-1) ||
929                (src_step1 < 0 && size.height-y-1 > 0) )
930                src_bottom += src_step1;
931        }
932
933        for( y = 0; y < size.height; y++, dst_cur += dst_step1 )
934        {
935            // find median
936            for( c = 0; c < cn; c++ )
937            {
938                int s = 0;
939                for( k = 0; ; k++ )
940                {
941                    int t = s + zone0[c][k];
942                    if( t > n2 ) break;
943                    s = t;
944                }
945
946                for( k *= N; ;k++ )
947                {
948                    s += zone1[c][k];
949                    if( s > n2 ) break;
950                }
951
952                dst_cur[c] = (uchar)k;
953            }
954
955            if( y+1 == size.height )
956                break;
957
958            if( cn == 1 )
959            {
960                for( k = 0; k < nx; k++ )
961                {
962                    int p = src_top[k];
963                    int q = src_bottom[k];
964                    zone1[0][p]--;
965                    zone0[0][p>>4]--;
966                    zone1[0][q]++;
967                    zone0[0][q>>4]++;
968                }
969            }
970            else if( cn == 3 )
971            {
972                for( k = 0; k < nx*3; k += 3 )
973                {
974                    UPDATE_ACC01( src_top[k], 0, -- );
975                    UPDATE_ACC01( src_top[k+1], 1, -- );
976                    UPDATE_ACC01( src_top[k+2], 2, -- );
977
978                    UPDATE_ACC01( src_bottom[k], 0, ++ );
979                    UPDATE_ACC01( src_bottom[k+1], 1, ++ );
980                    UPDATE_ACC01( src_bottom[k+2], 2, ++ );
981                }
982            }
983            else
984            {
985                assert( cn == 4 );
986                for( k = 0; k < nx*4; k += 4 )
987                {
988                    UPDATE_ACC01( src_top[k], 0, -- );
989                    UPDATE_ACC01( src_top[k+1], 1, -- );
990                    UPDATE_ACC01( src_top[k+2], 2, -- );
991                    UPDATE_ACC01( src_top[k+3], 3, -- );
992
993                    UPDATE_ACC01( src_bottom[k], 0, ++ );
994                    UPDATE_ACC01( src_bottom[k+1], 1, ++ );
995                    UPDATE_ACC01( src_bottom[k+2], 2, ++ );
996                    UPDATE_ACC01( src_bottom[k+3], 3, ++ );
997                }
998            }
999
1000            if( x0 >= 0 )
1001            {
1002                for( c = 0; c < cn; c++ )
1003                {
1004                    UPDATE_ACC01( src_top[x0+c], c, -= (m - nx) );
1005                    UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx) );
1006                }
1007            }
1008
1009            if( (src_step1 > 0 && src_bottom + src_step1 < src_max) ||
1010                (src_step1 < 0 && src_bottom + src_step1 >= src) )
1011                src_bottom += src_step1;
1012
1013            if( y >= m/2 )
1014                src_top += src_step1;
1015        }
1016
1017        if( x >= m/2 )
1018            src += cn;
1019        if( src + nx*cn > src_right ) nx--;
1020    }
1021#undef N
1022#undef UPDATE_ACC
1023    return CV_OK;
1024}
1025
1026
1027/****************************************************************************************\
1028                                   Bilateral Filtering
1029\****************************************************************************************/
1030
1031static void
1032icvBilateralFiltering_8u( const CvMat* src, CvMat* dst, int d,
1033                          double sigma_color, double sigma_space )
1034{
1035    CvMat* temp = 0;
1036    float* color_weight = 0;
1037    float* space_weight = 0;
1038    int* space_ofs = 0;
1039
1040    CV_FUNCNAME( "icvBilateralFiltering_8u" );
1041
1042    __BEGIN__;
1043
1044    double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
1045    double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
1046    int cn = CV_MAT_CN(src->type);
1047    int i, j, k, maxk, radius;
1048    CvSize size = cvGetMatSize(src);
1049
1050    if( (CV_MAT_TYPE(src->type) != CV_8UC1 &&
1051        CV_MAT_TYPE(src->type) != CV_8UC3) ||
1052        !CV_ARE_TYPES_EQ(src, dst) )
1053        CV_ERROR( CV_StsUnsupportedFormat,
1054        "Both source and destination must be 8-bit, single-channel or 3-channel images" );
1055
1056    if( sigma_color <= 0 )
1057        sigma_color = 1;
1058    if( sigma_space <= 0 )
1059        sigma_space = 1;
1060
1061    if( d == 0 )
1062        radius = cvRound(sigma_space*1.5);
1063    else
1064        radius = d/2;
1065    radius = MAX(radius, 1);
1066    d = radius*2 + 1;
1067
1068    CV_CALL( temp = cvCreateMat( src->rows + radius*2,
1069        src->cols + radius*2, src->type ));
1070    CV_CALL( cvCopyMakeBorder( src, temp, cvPoint(radius,radius), IPL_BORDER_REPLICATE ));
1071    CV_CALL( color_weight = (float*)cvAlloc(cn*256*sizeof(color_weight[0])));
1072    CV_CALL( space_weight = (float*)cvAlloc(d*d*sizeof(space_weight[0])));
1073    CV_CALL( space_ofs = (int*)cvAlloc(d*d*sizeof(space_ofs[0])));
1074
1075    // initialize color-related bilateral filter coefficients
1076    for( i = 0; i < 256*cn; i++ )
1077        color_weight[i] = (float)exp(i*i*gauss_color_coeff);
1078
1079    // initialize space-related bilateral filter coefficients
1080    for( i = -radius, maxk = 0; i <= radius; i++ )
1081        for( j = -radius; j <= radius; j++ )
1082        {
1083            double r = sqrt((double)i*i + (double)j*j);
1084            if( r > radius )
1085                continue;
1086            space_weight[maxk] = (float)exp(r*r*gauss_space_coeff);
1087            space_ofs[maxk++] = i*temp->step + j*cn;
1088        }
1089
1090    for( i = 0; i < size.height; i++ )
1091    {
1092        const uchar* sptr = temp->data.ptr + (i+radius)*temp->step + radius*cn;
1093        uchar* dptr = dst->data.ptr + i*dst->step;
1094
1095        if( cn == 1 )
1096        {
1097            for( j = 0; j < size.width; j++ )
1098            {
1099                float sum = 0, wsum = 0;
1100                int val0 = sptr[j];
1101                for( k = 0; k < maxk; k++ )
1102                {
1103                    int val = sptr[j + space_ofs[k]];
1104                    float w = space_weight[k]*color_weight[abs(val - val0)];
1105                    sum += val*w;
1106                    wsum += w;
1107                }
1108                // overflow is not possible here => there is no need to use CV_CAST_8U
1109                dptr[j] = (uchar)cvRound(sum/wsum);
1110            }
1111        }
1112        else
1113        {
1114            assert( cn == 3 );
1115            for( j = 0; j < size.width*3; j += 3 )
1116            {
1117                float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
1118                int b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
1119                for( k = 0; k < maxk; k++ )
1120                {
1121                    const uchar* sptr_k = sptr + j + space_ofs[k];
1122                    int b = sptr_k[0], g = sptr_k[1], r = sptr_k[2];
1123                    float w = space_weight[k]*color_weight[abs(b - b0) +
1124                        abs(g - g0) + abs(r - r0)];
1125                    sum_b += b*w; sum_g += g*w; sum_r += r*w;
1126                    wsum += w;
1127                }
1128                wsum = 1.f/wsum;
1129                b0 = cvRound(sum_b*wsum);
1130                g0 = cvRound(sum_g*wsum);
1131                r0 = cvRound(sum_r*wsum);
1132                dptr[j] = (uchar)b0; dptr[j+1] = (uchar)g0; dptr[j+2] = (uchar)r0;
1133            }
1134        }
1135    }
1136
1137    __END__;
1138
1139    cvReleaseMat( &temp );
1140    cvFree( &color_weight );
1141    cvFree( &space_weight );
1142    cvFree( &space_ofs );
1143}
1144
1145
1146static void icvBilateralFiltering_32f( const CvMat* src, CvMat* dst, int d,
1147                                       double sigma_color, double sigma_space )
1148{
1149  	CvMat* temp = 0;
1150    float* space_weight = 0;
1151    int* space_ofs = 0;
1152    float *expLUT = 0;
1153
1154    CV_FUNCNAME( "icvBilateralFiltering_32f" );
1155
1156    __BEGIN__;
1157
1158    double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
1159    double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
1160    int cn = CV_MAT_CN(src->type);
1161    int i, j, k, maxk, radius;
1162    double minValSrc=-1, maxValSrc=1;
1163    const int kExpNumBinsPerChannel = 1 << 12;
1164    int kExpNumBins = 0;
1165    float lastExpVal = 1.f;
1166    int temp_step;
1167    float len, scale_index;
1168    CvMat src_reshaped;
1169
1170    CvSize size = cvGetMatSize(src);
1171
1172    if( (CV_MAT_TYPE(src->type) != CV_32FC1 &&
1173        CV_MAT_TYPE(src->type) != CV_32FC3) ||
1174        !CV_ARE_TYPES_EQ(src, dst) )
1175        CV_ERROR( CV_StsUnsupportedFormat,
1176        "Both source and destination must be 32-bit float, single-channel or 3-channel images" );
1177
1178    if( sigma_color <= 0 )
1179        sigma_color = 1;
1180    if( sigma_space <= 0 )
1181        sigma_space = 1;
1182
1183    if( d == 0 )
1184        radius = cvRound(sigma_space*1.5);
1185    else
1186        radius = d/2;
1187    radius = MAX(radius, 1);
1188    d = radius*2 + 1;
1189    // compute the min/max range for the input image (even if multichannel)
1190
1191    CV_CALL( cvReshape( src, &src_reshaped, 1 ) );
1192    CV_CALL( cvMinMaxLoc(&src_reshaped, &minValSrc, &maxValSrc) );
1193
1194    // temporary copy of the image with borders for easy processing
1195    CV_CALL( temp = cvCreateMat( src->rows + radius*2,
1196        src->cols + radius*2, src->type ));
1197    temp_step = temp->step/sizeof(float);
1198    CV_CALL( cvCopyMakeBorder( src, temp, cvPoint(radius,radius), IPL_BORDER_REPLICATE ));
1199    // allocate lookup tables
1200    CV_CALL( space_weight = (float*)cvAlloc(d*d*sizeof(space_weight[0])));
1201    CV_CALL( space_ofs = (int*)cvAlloc(d*d*sizeof(space_ofs[0])));
1202
1203    // assign a length which is slightly more than needed
1204    len = (float)(maxValSrc - minValSrc) * cn;
1205    kExpNumBins = kExpNumBinsPerChannel * cn;
1206    CV_CALL( expLUT = (float*)cvAlloc((kExpNumBins+2) * sizeof(expLUT[0])));
1207    scale_index = kExpNumBins/len;
1208
1209    // initialize the exp LUT
1210    for( i = 0; i < kExpNumBins+2; i++ )
1211    {
1212        if( lastExpVal > 0.f )
1213        {
1214            double val =  i / scale_index;
1215            expLUT[i] = (float)exp(val * val * gauss_color_coeff);
1216            lastExpVal = expLUT[i];
1217        }
1218        else
1219            expLUT[i] = 0.f;
1220    }
1221
1222    // initialize space-related bilateral filter coefficients
1223    for( i = -radius, maxk = 0; i <= radius; i++ )
1224        for( j = -radius; j <= radius; j++ )
1225        {
1226            double r = sqrt((double)i*i + (double)j*j);
1227            if( r > radius )
1228                continue;
1229            space_weight[maxk] = (float)exp(r*r*gauss_space_coeff);
1230            space_ofs[maxk++] = i*temp_step + j*cn;
1231        }
1232
1233    for( i = 0; i < size.height; i++ )
1234    {
1235	    const float* sptr = temp->data.fl + (i+radius)*temp_step + radius*cn;
1236        float* dptr = (float*)(dst->data.ptr + i*dst->step);
1237
1238        if( cn == 1 )
1239        {
1240            for( j = 0; j < size.width; j++ )
1241            {
1242                float sum = 0, wsum = 0;
1243                float val0 = sptr[j];
1244                for( k = 0; k < maxk; k++ )
1245                {
1246                    float val = sptr[j + space_ofs[k]];
1247					float alpha = (float)(fabs(val - val0)*scale_index);
1248                    int idx = cvFloor(alpha);
1249                    alpha -= idx;
1250                    float w = space_weight[k]*(expLUT[idx] + alpha*(expLUT[idx+1] - expLUT[idx]));
1251	                sum += val*w;
1252                    wsum += w;
1253                }
1254                dptr[j] = (float)(sum/wsum);
1255            }
1256        }
1257        else
1258        {
1259            assert( cn == 3 );
1260            for( j = 0; j < size.width*3; j += 3 )
1261            {
1262                float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
1263                float b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
1264                for( k = 0; k < maxk; k++ )
1265                {
1266                    const float* sptr_k = sptr + j + space_ofs[k];
1267                    float b = sptr_k[0], g = sptr_k[1], r = sptr_k[2];
1268					float alpha = (float)((fabs(b - b0) + fabs(g - g0) + fabs(r - r0))*scale_index);
1269                    int idx = cvFloor(alpha);
1270                    alpha -= idx;
1271                    float w = space_weight[k]*(expLUT[idx] + alpha*(expLUT[idx+1] - expLUT[idx]));
1272                    sum_b += b*w; sum_g += g*w; sum_r += r*w;
1273                    wsum += w;
1274                }
1275                wsum = 1.f/wsum;
1276                b0 = sum_b*wsum;
1277                g0 = sum_g*wsum;
1278                r0 = sum_r*wsum;
1279                dptr[j] = b0; dptr[j+1] = g0; dptr[j+2] = r0;
1280            }
1281        }
1282    }
1283
1284    __END__;
1285
1286    cvReleaseMat( &temp );
1287    cvFree( &space_weight );
1288    cvFree( &space_ofs );
1289    cvFree( &expLUT );
1290}
1291
1292//////////////////////////////// IPP smoothing functions /////////////////////////////////
1293
1294icvFilterMedian_8u_C1R_t icvFilterMedian_8u_C1R_p = 0;
1295icvFilterMedian_8u_C3R_t icvFilterMedian_8u_C3R_p = 0;
1296icvFilterMedian_8u_C4R_t icvFilterMedian_8u_C4R_p = 0;
1297
1298icvFilterBox_8u_C1R_t icvFilterBox_8u_C1R_p = 0;
1299icvFilterBox_8u_C3R_t icvFilterBox_8u_C3R_p = 0;
1300icvFilterBox_8u_C4R_t icvFilterBox_8u_C4R_p = 0;
1301icvFilterBox_32f_C1R_t icvFilterBox_32f_C1R_p = 0;
1302icvFilterBox_32f_C3R_t icvFilterBox_32f_C3R_p = 0;
1303icvFilterBox_32f_C4R_t icvFilterBox_32f_C4R_p = 0;
1304
1305typedef CvStatus (CV_STDCALL * CvSmoothFixedIPPFunc)
1306( const void* src, int srcstep, void* dst, int dststep,
1307  CvSize size, CvSize ksize, CvPoint anchor );
1308
1309//////////////////////////////////////////////////////////////////////////////////////////
1310
1311CV_IMPL void
1312cvSmooth( const void* srcarr, void* dstarr, int smooth_type,
1313          int param1, int param2, double param3, double param4 )
1314{
1315    CvBoxFilter box_filter;
1316    CvSepFilter gaussian_filter;
1317
1318    CvMat* temp = 0;
1319
1320    CV_FUNCNAME( "cvSmooth" );
1321
1322    __BEGIN__;
1323
1324    int coi1 = 0, coi2 = 0;
1325    CvMat srcstub, *src = (CvMat*)srcarr;
1326    CvMat dststub, *dst = (CvMat*)dstarr;
1327    CvSize size;
1328    int src_type, dst_type, depth, cn;
1329    double sigma1 = 0, sigma2 = 0;
1330    bool have_ipp = icvFilterMedian_8u_C1R_p != 0;
1331
1332    CV_CALL( src = cvGetMat( src, &srcstub, &coi1 ));
1333    CV_CALL( dst = cvGetMat( dst, &dststub, &coi2 ));
1334
1335    if( coi1 != 0 || coi2 != 0 )
1336        CV_ERROR( CV_BadCOI, "" );
1337
1338    src_type = CV_MAT_TYPE( src->type );
1339    dst_type = CV_MAT_TYPE( dst->type );
1340    depth = CV_MAT_DEPTH(src_type);
1341    cn = CV_MAT_CN(src_type);
1342    size = cvGetMatSize(src);
1343
1344    if( !CV_ARE_SIZES_EQ( src, dst ))
1345        CV_ERROR( CV_StsUnmatchedSizes, "" );
1346
1347    if( smooth_type != CV_BLUR_NO_SCALE && !CV_ARE_TYPES_EQ( src, dst ))
1348        CV_ERROR( CV_StsUnmatchedFormats,
1349        "The specified smoothing algorithm requires input and ouput arrays be of the same type" );
1350
1351    if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE ||
1352        smooth_type == CV_GAUSSIAN || smooth_type == CV_MEDIAN )
1353    {
1354        // automatic detection of kernel size from sigma
1355        if( smooth_type == CV_GAUSSIAN )
1356        {
1357            sigma1 = param3;
1358            sigma2 = param4 ? param4 : param3;
1359
1360            if( param1 == 0 && sigma1 > 0 )
1361                param1 = cvRound(sigma1*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
1362            if( param2 == 0 && sigma2 > 0 )
1363                param2 = cvRound(sigma2*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
1364        }
1365
1366        if( param2 == 0 )
1367            param2 = size.height == 1 ? 1 : param1;
1368        if( param1 < 1 || (param1 & 1) == 0 || param2 < 1 || (param2 & 1) == 0 )
1369            CV_ERROR( CV_StsOutOfRange,
1370                "Both mask width and height must be >=1 and odd" );
1371
1372        if( param1 == 1 && param2 == 1 )
1373        {
1374            cvConvert( src, dst );
1375            EXIT;
1376        }
1377    }
1378
1379    if( have_ipp && (smooth_type == CV_BLUR || (smooth_type == CV_MEDIAN && param1 <= 15)) &&
1380        size.width >= param1 && size.height >= param2 && param1 > 1 && param2 > 1 )
1381    {
1382        CvSmoothFixedIPPFunc ipp_median_box_func = 0;
1383
1384        if( smooth_type == CV_BLUR )
1385        {
1386            ipp_median_box_func =
1387                src_type == CV_8UC1 ? icvFilterBox_8u_C1R_p :
1388                src_type == CV_8UC3 ? icvFilterBox_8u_C3R_p :
1389                src_type == CV_8UC4 ? icvFilterBox_8u_C4R_p :
1390                src_type == CV_32FC1 ? icvFilterBox_32f_C1R_p :
1391                src_type == CV_32FC3 ? icvFilterBox_32f_C3R_p :
1392                src_type == CV_32FC4 ? icvFilterBox_32f_C4R_p : 0;
1393        }
1394        else if( smooth_type == CV_MEDIAN )
1395        {
1396            ipp_median_box_func =
1397                src_type == CV_8UC1 ? icvFilterMedian_8u_C1R_p :
1398                src_type == CV_8UC3 ? icvFilterMedian_8u_C3R_p :
1399                src_type == CV_8UC4 ? icvFilterMedian_8u_C4R_p : 0;
1400        }
1401
1402        if( ipp_median_box_func )
1403        {
1404            CvSize el_size = { param1, param2 };
1405            CvPoint el_anchor = { param1/2, param2/2 };
1406            int stripe_size = 1 << 14; // the optimal value may depend on CPU cache,
1407                                       // overhead of the current IPP code etc.
1408            const uchar* shifted_ptr;
1409            int y, dy = 0;
1410            int temp_step, dst_step = dst->step;
1411
1412            CV_CALL( temp = icvIPPFilterInit( src, stripe_size, el_size ));
1413
1414            shifted_ptr = temp->data.ptr +
1415                el_anchor.y*temp->step + el_anchor.x*CV_ELEM_SIZE(src_type);
1416            temp_step = temp->step ? temp->step : CV_STUB_STEP;
1417
1418            for( y = 0; y < src->rows; y += dy )
1419            {
1420                dy = icvIPPFilterNextStripe( src, temp, y, el_size, el_anchor );
1421                IPPI_CALL( ipp_median_box_func( shifted_ptr, temp_step,
1422                    dst->data.ptr + y*dst_step, dst_step, cvSize(src->cols, dy),
1423                    el_size, el_anchor ));
1424            }
1425            EXIT;
1426        }
1427    }
1428
1429    if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE )
1430    {
1431        CV_CALL( box_filter.init( src->cols, src_type, dst_type,
1432            smooth_type == CV_BLUR, cvSize(param1, param2) ));
1433        CV_CALL( box_filter.process( src, dst ));
1434    }
1435    else if( smooth_type == CV_MEDIAN )
1436    {
1437        int img_size_mp = size.width*size.height;
1438        img_size_mp = (img_size_mp + (1<<19)) >> 20;
1439
1440        if( depth != CV_8U || (cn != 1 && cn != 3 && cn != 4) )
1441            CV_ERROR( CV_StsUnsupportedFormat,
1442            "Median filter only supports 8uC1, 8uC3 and 8uC4 images" );
1443
1444        if( size.width < param1*2 || size.height < param1*2 ||
1445            param1 <= 3 + (img_size_mp < 1 ? 12 : img_size_mp < 4 ? 6 : 2)*(MEDIAN_HAVE_SIMD ? 1 : 3))
1446        {
1447            // Special case optimized for 3x3
1448            IPPI_CALL( icvMedianBlur_8u_CnR_Om( src->data.ptr, src->step,
1449                dst->data.ptr, dst->step, size, param1, cn ));
1450        }
1451        else
1452        {
1453            const int r = (param1 - 1) / 2;
1454            const int CACHE_SIZE = (int) ( 0.95 * 256 * 1024 / cn );  // assume a 256 kB cache size
1455            const int STRIPES = (int) cvCeil( (double) (size.width - 2*r) /
1456                    (CACHE_SIZE / sizeof(Histogram) - 2*r) );
1457            const int STRIPE_SIZE = (int) cvCeil(
1458                    (double) ( size.width + STRIPES*2*r - 2*r ) / STRIPES );
1459
1460            for( int i = 0; i < size.width; i += STRIPE_SIZE - 2*r )
1461            {
1462                int stripe = STRIPE_SIZE;
1463                // Make sure that the filter kernel fits into one stripe.
1464                if( i + STRIPE_SIZE - 2*r >= size.width ||
1465                    size.width - (i + STRIPE_SIZE - 2*r) < 2*r+1 )
1466                    stripe = size.width - i;
1467
1468                IPPI_CALL( icvMedianBlur_8u_CnR_O1( src->data.ptr + cn*i, src->step,
1469                    dst->data.ptr + cn*i, dst->step, cvSize(stripe, size.height),
1470                    param1, cn, i == 0, stripe == size.width - i ));
1471
1472                if( stripe == size.width - i )
1473                    break;
1474            }
1475        }
1476    }
1477    else if( smooth_type == CV_GAUSSIAN )
1478    {
1479        CvSize ksize = { param1, param2 };
1480        float* kx = (float*)cvStackAlloc( ksize.width*sizeof(kx[0]) );
1481        float* ky = (float*)cvStackAlloc( ksize.height*sizeof(ky[0]) );
1482        CvMat KX = cvMat( 1, ksize.width, CV_32F, kx );
1483        CvMat KY = cvMat( 1, ksize.height, CV_32F, ky );
1484
1485        CvSepFilter::init_gaussian_kernel( &KX, sigma1 );
1486        if( ksize.width != ksize.height || fabs(sigma1 - sigma2) > FLT_EPSILON )
1487            CvSepFilter::init_gaussian_kernel( &KY, sigma2 );
1488        else
1489            KY.data.fl = kx;
1490
1491        if( have_ipp && size.width >= param1*3 &&
1492            size.height >= param2 && param1 > 1 && param2 > 1 )
1493        {
1494            int done;
1495            CV_CALL( done = icvIPPSepFilter( src, dst, &KX, &KY,
1496                        cvPoint(ksize.width/2,ksize.height/2)));
1497            if( done )
1498                EXIT;
1499        }
1500
1501        CV_CALL( gaussian_filter.init( src->cols, src_type, dst_type, &KX, &KY ));
1502        CV_CALL( gaussian_filter.process( src, dst ));
1503    }
1504    else if( smooth_type == CV_BILATERAL )
1505    {
1506        if( param2 != 0 && (param2 != param1 || param1 % 2 == 0) )
1507            CV_ERROR( CV_StsBadSize, "Bilateral filter only supports square windows of odd size" );
1508
1509        switch( src_type )
1510        {
1511        case CV_32FC1:
1512        case CV_32FC3:
1513            CV_CALL( icvBilateralFiltering_32f( src, dst, param1, param3, param4 ));
1514            break;
1515        case CV_8UC1:
1516        case CV_8UC3:
1517            CV_CALL( icvBilateralFiltering_8u( src, dst, param1, param3, param4 ));
1518            break;
1519        default:
1520            CV_ERROR( CV_StsUnsupportedFormat,
1521                "Unknown/unsupported format: bilateral filter only supports 8uC1, 8uC3, 32fC1 and 32fC3 formats" );
1522        }
1523    }
1524
1525    __END__;
1526
1527    cvReleaseMat( &temp );
1528}
1529
1530/* End of file. */
1531