1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html.
4//
5// Copyright (C) 2014, Itseez, Inc, all rights reserved.
6
7//
8// Common preprocessors macro
9//
10
11//
12// TODO: Move this common code into "header" file
13//
14
15#ifndef NL // New Line: for preprocessor debugging
16#define NL
17#endif
18
19#define REF(x) x
20#define __CAT(x, y) x##y
21#define CAT(x, y) __CAT(x, y)
22
23//
24// All matrixes are come with this description ("name" is a name of matrix):
25// * name_CN - number of channels (1,2,3,4)
26// * name_DEPTH - numeric value of CV_MAT_DEPTH(type). See CV_8U, CV_32S, etc macro below.
27//
28// Currently we also pass these attributes (to reduce this macro block):
29// * name_T - datatype (int, float, uchar4, float4)
30// * name_T1 - datatype for one channel (int, float, uchar).
31//   It is equal to result of "T1(name_T)" macro
32// * name_TSIZE - CV_ELEM_SIZE(type).
33//   We can't use sizeof(name_T) here, because sizeof(float3) is usually equal to 8, not 6.
34// * name_T1SIZE - CV_ELEM_SIZE1(type)
35//
36
37//
38// Usage sample:
39//
40// #define workType TYPE(float, src_CN)
41// #define convertToWorkType CONVERT_TO(workType)
42// #define convertWorkTypeToDstType CONVERT(workType, dst_T)
43//
44// __kernel void kernelFn(DECLARE_MAT_ARG(src), DECLARE_MAT_ARG(dst))
45// {
46//     const int x = get_global_id(0);
47//     const int y = get_global_id(1);
48//
49//     if (x < srcWidth && y < srcHeight)
50//     {
51//         int src_byteOffset = MAT_BYTE_OFFSET(src, x, y);
52//         int dst_byteOffset = MAT_BYTE_OFFSET(dst, x, y);
53//         workType value = convertToWorkType(LOAD_MAT_AT(src, src_byteOffset));
54//
55//         ... value processing ...
56//
57//         STORE_MAT_AT(dst, dst_byteOffset, convertWorkTypeToDstType(value));
58//     }
59// }
60//
61
62#define DECLARE_MAT_ARG(name) \
63    __global uchar* restrict name ## Ptr, \
64    int name ## StepBytes, \
65    int name ## Offset, \
66    int name ## Height, \
67    int name ## Width NL
68
69#define MAT_BYTE_OFFSET(name, x, y) mad24((y)/* + name ## OffsetY*/, name ## StepBytes, ((x)/* + name ## OffsetX*/) * (int)(name ## _TSIZE) + name ## Offset)
70#define MAT_RELATIVE_BYTE_OFFSET(name, x, y) mad24(y, name ## StepBytes, (x) * (int)(name ## _TSIZE))
71
72#define __LOAD_MAT_AT(name, byteOffset) *((const __global name ## _T*)(name ## Ptr + (byteOffset)))
73#define __vload_CN__(name_cn) vload ## name_cn
74#define __vload_CN_(name_cn) __vload_CN__(name_cn)
75#define __vload_CN(name) __vload_CN_(name ## _CN)
76#define __LOAD_MAT_AT_vload(name, byteOffset) __vload_CN(name)(0, ((const __global name ## _T1*)(name ## Ptr + (byteOffset))))
77#define __LOAD_MAT_AT_1 __LOAD_MAT_AT
78#define __LOAD_MAT_AT_2 __LOAD_MAT_AT
79#define __LOAD_MAT_AT_3 __LOAD_MAT_AT_vload
80#define __LOAD_MAT_AT_4 __LOAD_MAT_AT
81#define __LOAD_MAT_AT_CN__(name_cn) __LOAD_MAT_AT_ ## name_cn
82#define __LOAD_MAT_AT_CN_(name_cn) __LOAD_MAT_AT_CN__(name_cn)
83#define __LOAD_MAT_AT_CN(name) __LOAD_MAT_AT_CN_(name ## _CN)
84#define LOAD_MAT_AT(name, byteOffset) __LOAD_MAT_AT_CN(name)(name, byteOffset)
85
86#define __STORE_MAT_AT(name, byteOffset, v) *((__global name ## _T*)(name ## Ptr + (byteOffset))) = v
87#define __vstore_CN__(name_cn) vstore ## name_cn
88#define __vstore_CN_(name_cn) __vstore_CN__(name_cn)
89#define __vstore_CN(name) __vstore_CN_(name ## _CN)
90#define __STORE_MAT_AT_vstore(name, byteOffset, v) __vstore_CN(name)(v, 0, ((__global name ## _T1*)(name ## Ptr + (byteOffset))))
91#define __STORE_MAT_AT_1 __STORE_MAT_AT
92#define __STORE_MAT_AT_2 __STORE_MAT_AT
93#define __STORE_MAT_AT_3 __STORE_MAT_AT_vstore
94#define __STORE_MAT_AT_4 __STORE_MAT_AT
95#define __STORE_MAT_AT_CN__(name_cn) __STORE_MAT_AT_ ## name_cn
96#define __STORE_MAT_AT_CN_(name_cn) __STORE_MAT_AT_CN__(name_cn)
97#define __STORE_MAT_AT_CN(name) __STORE_MAT_AT_CN_(name ## _CN)
98#define STORE_MAT_AT(name, byteOffset, v) __STORE_MAT_AT_CN(name)(name, byteOffset, v)
99
100#define T1_uchar uchar
101#define T1_uchar2 uchar
102#define T1_uchar3 uchar
103#define T1_uchar4 uchar
104#define T1_char char
105#define T1_char2 char
106#define T1_char3 char
107#define T1_char4 char
108#define T1_ushort ushort
109#define T1_ushort2 ushort
110#define T1_ushort3 ushort
111#define T1_ushort4 ushort
112#define T1_short short
113#define T1_short2 short
114#define T1_short3 short
115#define T1_short4 short
116#define T1_int int
117#define T1_int2 int
118#define T1_int3 int
119#define T1_int4 int
120#define T1_float float
121#define T1_float2 float
122#define T1_float3 float
123#define T1_float4 float
124#define T1_double double
125#define T1_double2 double
126#define T1_double3 double
127#define T1_double4 double
128#define T1(type) REF(CAT(T1_, REF(type)))
129
130#define uchar1 uchar
131#define char1 char
132#define short1 short
133#define ushort1 ushort
134#define int1 int
135#define float1 float
136#define double1 double
137#define TYPE(type, cn) REF(CAT(REF(type), REF(cn)))
138
139#define __CONVERT_MODE_uchar_uchar __NO_CONVERT
140#define __CONVERT_MODE_uchar_char __CONVERT_sat
141#define __CONVERT_MODE_uchar_ushort __CONVERT
142#define __CONVERT_MODE_uchar_short __CONVERT
143#define __CONVERT_MODE_uchar_int __CONVERT
144#define __CONVERT_MODE_uchar_float __CONVERT
145#define __CONVERT_MODE_uchar_double __CONVERT
146#define __CONVERT_MODE_char_uchar __CONVERT_sat
147#define __CONVERT_MODE_char_char __NO_CONVERT
148#define __CONVERT_MODE_char_ushort __CONVERT_sat
149#define __CONVERT_MODE_char_short __CONVERT
150#define __CONVERT_MODE_char_int __CONVERT
151#define __CONVERT_MODE_char_float __CONVERT
152#define __CONVERT_MODE_char_double __CONVERT
153#define __CONVERT_MODE_ushort_uchar __CONVERT_sat
154#define __CONVERT_MODE_ushort_char __CONVERT_sat
155#define __CONVERT_MODE_ushort_ushort __NO_CONVERT
156#define __CONVERT_MODE_ushort_short __CONVERT_sat
157#define __CONVERT_MODE_ushort_int __CONVERT
158#define __CONVERT_MODE_ushort_float __CONVERT
159#define __CONVERT_MODE_ushort_double __CONVERT
160#define __CONVERT_MODE_short_uchar __CONVERT_sat
161#define __CONVERT_MODE_short_char __CONVERT_sat
162#define __CONVERT_MODE_short_ushort __CONVERT_sat
163#define __CONVERT_MODE_short_short __NO_CONVERT
164#define __CONVERT_MODE_short_int __CONVERT
165#define __CONVERT_MODE_short_float __CONVERT
166#define __CONVERT_MODE_short_double __CONVERT
167#define __CONVERT_MODE_int_uchar __CONVERT_sat
168#define __CONVERT_MODE_int_char __CONVERT_sat
169#define __CONVERT_MODE_int_ushort __CONVERT_sat
170#define __CONVERT_MODE_int_short __CONVERT_sat
171#define __CONVERT_MODE_int_int __NO_CONVERT
172#define __CONVERT_MODE_int_float __CONVERT
173#define __CONVERT_MODE_int_double __CONVERT
174#define __CONVERT_MODE_float_uchar __CONVERT_sat_rte
175#define __CONVERT_MODE_float_char __CONVERT_sat_rte
176#define __CONVERT_MODE_float_ushort __CONVERT_sat_rte
177#define __CONVERT_MODE_float_short __CONVERT_sat_rte
178#define __CONVERT_MODE_float_int __CONVERT_rte
179#define __CONVERT_MODE_float_float __NO_CONVERT
180#define __CONVERT_MODE_float_double __CONVERT
181#define __CONVERT_MODE_double_uchar __CONVERT_sat_rte
182#define __CONVERT_MODE_double_char __CONVERT_sat_rte
183#define __CONVERT_MODE_double_ushort __CONVERT_sat_rte
184#define __CONVERT_MODE_double_short __CONVERT_sat_rte
185#define __CONVERT_MODE_double_int __CONVERT_rte
186#define __CONVERT_MODE_double_float __CONVERT
187#define __CONVERT_MODE_double_double __NO_CONVERT
188#define __CONVERT_MODE(srcType, dstType) CAT(__CONVERT_MODE_, CAT(REF(T1(srcType)), CAT(_, REF(T1(dstType)))))
189
190#define __ROUND_MODE__NO_CONVERT
191#define __ROUND_MODE__CONVERT // nothing
192#define __ROUND_MODE__CONVERT_rte _rte
193#define __ROUND_MODE__CONVERT_sat _sat
194#define __ROUND_MODE__CONVERT_sat_rte _sat_rte
195#define ROUND_MODE(srcType, dstType) CAT(__ROUND_MODE_, __CONVERT_MODE(srcType, dstType))
196
197#define __CONVERT_ROUND(dstType, roundMode) CAT(CAT(convert_, REF(dstType)), roundMode)
198#define __NO_CONVERT(dstType) // nothing
199#define __CONVERT(dstType) __CONVERT_ROUND(dstType,)
200#define __CONVERT_rte(dstType) __CONVERT_ROUND(dstType,_rte)
201#define __CONVERT_sat(dstType) __CONVERT_ROUND(dstType,_sat)
202#define __CONVERT_sat_rte(dstType) __CONVERT_ROUND(dstType,_sat_rte)
203#define CONVERT(srcType, dstType) REF(__CONVERT_MODE(srcType,dstType))(dstType)
204#define CONVERT_TO(dstType) __CONVERT_ROUND(dstType,)
205
206// OpenCV depths
207#define CV_8U   0
208#define CV_8S   1
209#define CV_16U  2
210#define CV_16S  3
211#define CV_32S  4
212#define CV_32F  5
213#define CV_64F  6
214
215//
216// End of common preprocessors macro
217//
218
219
220
221#if defined(DEFINE_feed)
222
223#define workType TYPE(weight_T1, src_CN)
224
225#if src_DEPTH == 3 && src_CN == 3
226#define convertSrcToWorkType convert_float3
227#else
228#define convertSrcToWorkType CONVERT_TO(workType)
229#endif
230
231#if dst_DEPTH == 3 && dst_CN == 3
232#define convertToDstType convert_short3
233#else
234#define convertToDstType CONVERT_TO(dst_T) // sat_rte provides incompatible results with CPU path
235#endif
236
237__kernel void feed(
238        DECLARE_MAT_ARG(src), DECLARE_MAT_ARG(weight),
239        DECLARE_MAT_ARG(dst), DECLARE_MAT_ARG(dstWeight)
240)
241{
242    const int x = get_global_id(0);
243    const int y = get_global_id(1);
244
245    if (x < srcWidth && y < srcHeight)
246    {
247        int src_byteOffset = MAT_BYTE_OFFSET(src, x, y);
248        int weight_byteOffset = MAT_BYTE_OFFSET(weight, x, y);
249        int dst_byteOffset = MAT_BYTE_OFFSET(dst, x, y);
250        int dstWeight_byteOffset = MAT_BYTE_OFFSET(dstWeight, x, y);
251
252        weight_T w = LOAD_MAT_AT(weight, weight_byteOffset);
253        workType src_value = convertSrcToWorkType(LOAD_MAT_AT(src, src_byteOffset));
254        STORE_MAT_AT(dst, dst_byteOffset, LOAD_MAT_AT(dst, dst_byteOffset) + convertToDstType(src_value * w));
255        STORE_MAT_AT(dstWeight, dstWeight_byteOffset, LOAD_MAT_AT(dstWeight, dstWeight_byteOffset) + w);
256    }
257}
258
259#endif
260
261#if defined(DEFINE_normalizeUsingWeightMap)
262
263#if mat_DEPTH == 3 && mat_CN == 3
264#define workType float3
265#define convertSrcToWorkType convert_float3
266#define convertToDstType convert_short3
267#else
268#define workType TYPE(weight_T1, mat_CN)
269#define convertSrcToWorkType CONVERT_TO(workType)
270#define convertToDstType CONVERT_TO(mat_T) // sat_rte provides incompatible results with CPU path
271#endif
272
273#if weight_DEPTH >= CV_32F
274#define WEIGHT_EPS 1e-5f
275#else
276#define WEIGHT_EPS 0
277#endif
278
279__kernel void normalizeUsingWeightMap(
280        DECLARE_MAT_ARG(mat), DECLARE_MAT_ARG(weight)
281)
282{
283    const int x = get_global_id(0);
284    const int y = get_global_id(1);
285
286    if (x < matWidth && y < matHeight)
287    {
288        int mat_byteOffset = MAT_BYTE_OFFSET(mat, x, y);
289        int weight_byteOffset = MAT_BYTE_OFFSET(weight, x, y);
290
291        weight_T w = LOAD_MAT_AT(weight, weight_byteOffset);
292        workType value = convertSrcToWorkType(LOAD_MAT_AT(mat, mat_byteOffset));
293        value = value / (w + WEIGHT_EPS);
294        STORE_MAT_AT(mat, mat_byteOffset, convertToDstType(value));
295    }
296}
297
298#endif
299