1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                           License Agreement
11//                For Open Source Computer Vision Library
12//
13// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
14// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16// Third party copyrights are property of their respective owners.
17//
18// Redistribution and use in source and binary forms, with or without modification,
19// are permitted provided that the following conditions are met:
20//
21//   * Redistribution's of source code must retain the above copyright notice,
22//     this list of conditions and the following disclaimer.
23//
24//   * Redistribution's in binary form must reproduce the above copyright notice,
25//     this list of conditions and the following disclaimer in the documentation
26//     and/or other materials provided with the distribution.
27//
28//   * The name of the copyright holders may not be used to endorse or promote products
29//     derived from this software without specific prior written permission.
30//
31// This software is provided by the copyright holders and contributors as is and
32// any express or implied warranties, including, but not limited to, the implied
33// warranties of merchantability and fitness for a particular purpose are disclaimed.
34// In no event shall the copyright holders or contributors be liable for any direct,
35// indirect, incidental, special, exemplary, or consequential damages
36// (including, but not limited to, procurement of substitute goods or services;
37// loss of use, data, or profits; or business interruption) however caused
38// and on any theory of liability, whether in contract, strict liability,
39// or tort (including negligence or otherwise) arising in any way out of
40// the use of this software, even if advised of the possibility of such damage.
41//
42//M*/
43
44#ifdef DOUBLE_SUPPORT
45#ifdef cl_amd_fp64
46#pragma OPENCL EXTENSION cl_amd_fp64:enable
47#elif defined (cl_khr_fp64)
48#pragma OPENCL EXTENSION cl_khr_fp64:enable
49#endif
50#endif
51
52#if ddepth == 0
53#define MIN_VAL 0
54#define MAX_VAL 255
55#elif ddepth == 1
56#define MIN_VAL -128
57#define MAX_VAL 127
58#elif ddepth == 2
59#define MIN_VAL 0
60#define MAX_VAL 65535
61#elif ddepth == 3
62#define MIN_VAL -32768
63#define MAX_VAL 32767
64#elif ddepth == 4
65#define MIN_VAL INT_MIN
66#define MAX_VAL INT_MAX
67#elif ddepth == 5
68#define MIN_VAL (-FLT_MAX)
69#define MAX_VAL FLT_MAX
70#elif ddepth == 6
71#define MIN_VAL (-DBL_MAX)
72#define MAX_VAL DBL_MAX
73#else
74#error "Unsupported depth"
75#endif
76
77#define noconvert
78
79#if defined OCL_CV_REDUCE_SUM || defined OCL_CV_REDUCE_AVG
80#define INIT_VALUE 0
81#define PROCESS_ELEM(acc, value) acc += value
82#elif defined OCL_CV_REDUCE_MAX
83#define INIT_VALUE MIN_VAL
84#define PROCESS_ELEM(acc, value) acc = max(value, acc)
85#elif defined OCL_CV_REDUCE_MIN
86#define INIT_VALUE MAX_VAL
87#define PROCESS_ELEM(acc, value) acc = min(value, acc)
88#else
89#error "No operation is specified"
90#endif
91
92#ifdef OP_REDUCE_PRE
93
94__kernel void reduce_horz_opt(__global const uchar * srcptr, int src_step, int src_offset, int rows, int cols,
95                     __global uchar * dstptr, int dst_step, int dst_offset
96#ifdef OCL_CV_REDUCE_AVG
97                     , float fscale
98#endif
99                     )
100{
101    __local bufT lsmem[TILE_HEIGHT][BUF_COLS][cn];
102
103    int x = get_global_id(0);
104    int y = get_global_id(1);
105    int liy = get_local_id(1);
106    if ((x < BUF_COLS) && (y < rows))
107    {
108        int src_index = mad24(y, src_step, mad24(x, (int)sizeof(srcT) * cn, src_offset));
109
110        __global const srcT * src = (__global const srcT *)(srcptr + src_index);
111        bufT tmp[cn];
112        #pragma unroll
113        for (int c = 0; c < cn; ++c)
114            tmp[c] = INIT_VALUE;
115
116        int src_step_mul = BUF_COLS * cn;
117        for (int idx = x; idx < cols; idx += BUF_COLS, src += src_step_mul)
118        {
119            #pragma unroll
120            for (int c = 0; c < cn; ++c)
121            {
122                bufT value = convertToBufT(src[c]);
123                PROCESS_ELEM(tmp[c], value);
124            }
125        }
126
127        #pragma unroll
128        for (int c = 0; c < cn; ++c)
129            lsmem[liy][x][c] = tmp[c];
130    }
131    barrier(CLK_LOCAL_MEM_FENCE);
132    if ((x < BUF_COLS / 2) && (y < rows))
133    {
134        #pragma unroll
135        for (int c = 0; c < cn; ++c)
136        {
137            PROCESS_ELEM(lsmem[liy][x][c], lsmem[liy][x +  BUF_COLS / 2][c]);
138        }
139    }
140    barrier(CLK_LOCAL_MEM_FENCE);
141    if ((x == 0) && (y < rows))
142    {
143        int dst_index = mad24(y, dst_step, dst_offset);
144
145        __global dstT * dst = (__global dstT *)(dstptr + dst_index);
146        bufT tmp[cn];
147        #pragma unroll
148        for (int c = 0; c < cn; ++c)
149            tmp[c] = INIT_VALUE;
150
151        #pragma unroll
152        for (int xin = 0; xin < BUF_COLS / 2; xin ++)
153        {
154            #pragma unroll
155            for (int c = 0; c < cn; ++c)
156            {
157                PROCESS_ELEM(tmp[c], lsmem[liy][xin][c]);
158            }
159        }
160
161        #pragma unroll
162        for (int c = 0; c < cn; ++c)
163#ifdef OCL_CV_REDUCE_AVG
164            dst[c] = convertToDT(convertToWT(tmp[c]) * fscale);
165#else
166            dst[c] = convertToDT(tmp[c]);
167#endif
168    }
169}
170
171#else
172
173__kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset, int rows, int cols,
174                     __global uchar * dstptr, int dst_step, int dst_offset
175#ifdef OCL_CV_REDUCE_AVG
176                     , float fscale
177#endif
178                     )
179{
180#if dim == 0 // reduce to a single row
181    int x = get_global_id(0);
182    if (x < cols)
183    {
184        int src_index = mad24(x, (int)sizeof(srcT) * cn, src_offset);
185        int dst_index = mad24(x, (int)sizeof(dstT0) * cn, dst_offset);
186
187        __global dstT0 * dst = (__global dstT0 *)(dstptr + dst_index);
188        dstT tmp[cn];
189        #pragma unroll
190        for (int c = 0; c < cn; ++c)
191            tmp[c] = INIT_VALUE;
192
193        for (int y = 0; y < rows; ++y, src_index += src_step)
194        {
195            __global const srcT * src = (__global const srcT *)(srcptr + src_index);
196            #pragma unroll
197            for (int c = 0; c < cn; ++c)
198            {
199                dstT value = convertToDT(src[c]);
200                PROCESS_ELEM(tmp[c], value);
201            }
202        }
203
204        #pragma unroll
205        for (int c = 0; c < cn; ++c)
206#ifdef OCL_CV_REDUCE_AVG
207            dst[c] = convertToDT0(convertToWT(tmp[c]) * fscale);
208#else
209            dst[c] = convertToDT0(tmp[c]);
210#endif
211    }
212#elif dim == 1 // reduce to a single column
213    int y = get_global_id(0);
214    if (y < rows)
215    {
216        int src_index = mad24(y, src_step, src_offset);
217        int dst_index = mad24(y, dst_step, dst_offset);
218
219        __global const srcT * src = (__global const srcT *)(srcptr + src_index);
220        __global dstT * dst = (__global dstT *)(dstptr + dst_index);
221        dstT tmp[cn];
222        #pragma unroll
223        for (int c = 0; c < cn; ++c)
224            tmp[c] = INIT_VALUE;
225
226        for (int x = 0; x < cols; ++x, src += cn)
227        {
228            #pragma unroll
229            for (int c = 0; c < cn; ++c)
230            {
231                dstT value = convertToDT(src[c]);
232                PROCESS_ELEM(tmp[c], value);
233            }
234        }
235
236        #pragma unroll
237        for (int c = 0; c < cn; ++c)
238#ifdef OCL_CV_REDUCE_AVG
239            dst[c] = convertToDT0(convertToWT(tmp[c]) * fscale);
240#else
241            dst[c] = convertToDT0(tmp[c]);
242#endif
243    }
244#else
245#error "Dims must be either 0 or 1"
246#endif
247}
248
249#endif
250