1793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler/*M///////////////////////////////////////////////////////////////////////////////////////
2793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
3793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
5793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  By downloading, copying, installing or using the software you agree to this license.
6793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  If you do not agree to this license, do not download, install,
7793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  copy or use the software.
8793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
9793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
10793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//                           License Agreement
11793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//                For Open Source Computer Vision Library
12793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
13793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
14793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Third party copyrights are property of their respective owners.
16793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
17793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// @Authors
18793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//    Wu Zailong, bullet@yeah.net
19793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
20793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Redistribution and use in source and binary forms, with or without modification,
21793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// are permitted provided that the following conditions are met:
22793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
23793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//   * Redistribution's of source code must retain the above copyright notice,
24793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     this list of conditions and the following disclaimer.
25793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
26793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//   * Redistribution's in binary form must reproduce the above copyright notice,
27793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     this list of conditions and the following disclaimer in the documentation
28793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     and/or other materials provided with the distribution.
29793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
30793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//   * The name of the copyright holders may not be used to endorse or promote products
31793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     derived from this software without specific prior written permission.
32793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
33793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// This software is provided by the copyright holders and contributors as is and
34793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// any express or implied warranties, including, but not limited to, the implied
35793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// warranties of merchantability and fitness for a particular purpose are disclaimed.
36793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// In no event shall the Intel Corporation or contributors be liable for any direct,
37793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// indirect, incidental, special, exemplary, or consequential damages
38793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// (including, but not limited to, procurement of substitute goods or services;
39793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// loss of use, data, or profits; or business interruption) however caused
40793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and on any theory of liability, whether in contract, strict liability,
41793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// or tort (including negligence or otherwise) arising in any way out of
42793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// the use of this software, even if advised of the possibility of such damage.
43793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
44793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//M*/
45793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
46793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef DOUBLE_SUPPORT
47793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef cl_amd_fp64
48793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#pragma OPENCL EXTENSION cl_amd_fp64:enable
49793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined (cl_khr_fp64)
50793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#pragma OPENCL EXTENSION cl_khr_fp64:enable
51793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
52793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
53793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
54793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define noconvert
55793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
56793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if cn != 3
57793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define loadpix(addr)  *(__global const T*)(addr)
58793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define storepix(val, addr)  *(__global T*)(addr) = val
59793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define TSIZE ((int)sizeof(T))
60793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define convertScalar(a) (a)
61793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else
62793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define loadpix(addr)  vload3(0, (__global const T1*)(addr))
63793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define storepix(val, addr) vstore3(val, 0, (__global T1*)(addr))
64793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define TSIZE ((int)sizeof(T1)*3)
65793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define convertScalar(a) (T)(a.x, a.y, a.z)
66793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
67793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
68793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerenum
69793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
70793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    INTER_BITS = 5,
71793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    INTER_TAB_SIZE = 1 << INTER_BITS,
72793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    INTER_TAB_SIZE2 = INTER_TAB_SIZE * INTER_TAB_SIZE
73793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler};
74793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
75793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef INTER_NEAREST
76793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define convertToWT
77793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
78793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
79793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef BORDER_CONSTANT
80793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define EXTRAPOLATE(v2, v) v = scalar;
81793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined BORDER_REPLICATE
82793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define EXTRAPOLATE(v2, v) \
83793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    { \
84793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        v2 = max(min(v2, (int2)(src_cols - 1, src_rows - 1)), (int2)(0)); \
85793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        v = convertToWT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \
86793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
87793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined BORDER_WRAP
88793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define EXTRAPOLATE(v2, v) \
89793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    { \
90793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        if (v2.x < 0) \
91793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            v2.x -= ((v2.x - src_cols + 1) / src_cols) * src_cols; \
92793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        if (v2.x >= src_cols) \
93793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            v2.x %= src_cols; \
94793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        \
95793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        if (v2.y < 0) \
96793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            v2.y -= ((v2.y - src_rows + 1) / src_rows) * src_rows; \
97793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        if( v2.y >= src_rows ) \
98793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            v2.y %= src_rows; \
99793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        v = convertToWT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \
100793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
101793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
102793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef BORDER_REFLECT
103793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define DELTA int delta = 0
104793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else
105793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define DELTA int delta = 1
106793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
107793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define EXTRAPOLATE(v2, v) \
108793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    { \
109793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        DELTA; \
110793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        if (src_cols == 1) \
111793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            v2.x = 0; \
112793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        else \
113793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            do \
114793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            { \
115793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if( v2.x < 0 ) \
116793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    v2.x = -v2.x - 1 + delta; \
117793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else \
118793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    v2.x = src_cols - 1 - (v2.x - src_cols) - delta; \
119793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            } \
120793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            while (v2.x >= src_cols || v2.x < 0); \
121793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        \
122793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        if (src_rows == 1) \
123793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            v2.y = 0; \
124793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        else \
125793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            do \
126793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            { \
127793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if( v2.y < 0 ) \
128793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    v2.y = -v2.y - 1 + delta; \
129793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else \
130793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    v2.y = src_rows - 1 - (v2.y - src_rows) - delta; \
131793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            } \
132793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            while (v2.y >= src_rows || v2.y < 0); \
133793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        v = convertToWT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \
134793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
135793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else
136793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#error No extrapolation method
137793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
138793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
139793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0)
140793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
141793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef INTER_NEAREST
142793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
143793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
144793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
145793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            __global const uchar * map1ptr, int map1_step, int map1_offset,
146793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            __global const uchar * map2ptr, int map2_step, int map2_offset,
147793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            ST nVal)
148793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
149793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int x = get_global_id(0);
150793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int y = get_global_id(1) * rowsPerWI;
151793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
152793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if (x < dst_cols)
153793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
154793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        T scalar = convertScalar(nVal);
155793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
156793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(float), map1_offset));
157793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(float), map2_offset));
158793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
159793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
160793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        #pragma unroll
161793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        for (int i = 0; i < rowsPerWI; ++i, ++y,
162793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            map1_index += map1_step, map2_index += map2_step, dst_index += dst_step)
163793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            if (y < dst_rows)
164793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
165793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global const float * map1 = (__global const float *)(map1ptr + map1_index);
166793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global const float * map2 = (__global const float *)(map2ptr + map2_index);
167793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global T * dst = (__global T *)(dstptr + dst_index);
168793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
169793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int gx = convert_int_sat_rte(map1[0]);
170793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int gy = convert_int_sat_rte(map2[0]);
171793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
172793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (NEED_EXTRAPOLATION(gx, gy))
173793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                {
174793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifndef BORDER_CONSTANT
175793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    int2 gxy = (int2)(gx, gy);
176793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
177793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    T v;
178793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(gxy, v)
179793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    storepix(v, dst);
180793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                }
181793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
182793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                {
183793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset));
184793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    storepix(loadpix((__global const T*)(srcptr + src_index)), dst);
185793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                }
186793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
187793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
188793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
189793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
190793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_32FC2(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
191793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                          __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
192793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                          __global const uchar * mapptr, int map_step, int map_offset,
193793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                          ST nVal)
194793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
195793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int x = get_global_id(0);
196793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int y = get_global_id(1) * rowsPerWI;
197793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
198793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if (x < dst_cols)
199793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
200793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        T scalar = convertScalar(nVal);
201793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
202793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int map_index = mad24(y, map_step, mad24(x, (int)sizeof(float2), map_offset));
203793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
204793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        #pragma unroll
205793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        for (int i = 0; i < rowsPerWI; ++i, ++y,
206793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            map_index += map_step, dst_index += dst_step)
207793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            if (y < dst_rows)
208793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
209793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global const float2 * map = (__global const float2 *)(mapptr + map_index);
210793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global T * dst = (__global T *)(dstptr + dst_index);
211793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
212793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 gxy = convert_int2_sat_rte(map[0]);
213793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int gx = gxy.x, gy = gxy.y;
214793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
215793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (NEED_EXTRAPOLATION(gx, gy))
216793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                {
217793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    T v;
218793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(gxy, v)
219793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    storepix(v, dst);
220793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                }
221793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
222793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                {
223793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset));
224793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    storepix(loadpix((__global const T *)(srcptr + src_index)), dst);
225793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                }
226793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        }
227793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
228793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
229793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
230793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_16SC2(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
231793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                          __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
232793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                          __global const uchar * mapptr, int map_step, int map_offset,
233793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                          ST nVal)
234793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
235793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int x = get_global_id(0);
236793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int y = get_global_id(1) * rowsPerWI;
237793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
238793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if (x < dst_cols)
239793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
240793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        T scalar = convertScalar(nVal);
241793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
242793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int map_index = mad24(y, map_step, mad24(x, (int)sizeof(short2), map_offset));
243793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
244793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        #pragma unroll
245793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        for (int i = 0; i < rowsPerWI; ++i, ++y,
246793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            map_index += map_step, dst_index += dst_step)
247793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            if (y < dst_rows)
248793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
249793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global const short2 * map = (__global const short2 *)(mapptr + map_index);
250793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global T * dst = (__global T *)(dstptr + dst_index);
251793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
252793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 gxy = convert_int2(map[0]);
253793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int gx = gxy.x, gy = gxy.y;
254793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
255793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (NEED_EXTRAPOLATION(gx, gy))
256793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                {
257793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    T v;
258793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(gxy, v)
259793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    storepix(v, dst);
260793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                }
261793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
262793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                {
263793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset));
264793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    storepix(loadpix((__global const T *)(srcptr + src_index)), dst);
265793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                }
266793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
267793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
268793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
269793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
270793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_16SC2_16UC1(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
271793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                                __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
272793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                                __global const uchar * map1ptr, int map1_step, int map1_offset,
273793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                                __global const uchar * map2ptr, int map2_step, int map2_offset,
274793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                                ST nVal)
275793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
276793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int x = get_global_id(0);
277793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int y = get_global_id(1) * rowsPerWI;
278793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
279793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if (x < dst_cols)
280793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
281793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        T scalar = convertScalar(nVal);
282793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
283793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(short2), map1_offset));
284793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(ushort), map2_offset));
285793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
286793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        #pragma unroll
287793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        for (int i = 0; i < rowsPerWI; ++i, ++y,
288793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            map1_index += map1_step, map2_index += map2_step, dst_index += dst_step)
289793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            if (y < dst_rows)
290793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
291793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global const short2 * map1 = (__global const short2 *)(map1ptr + map1_index);
292793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global const ushort * map2 = (__global const ushort *)(map2ptr + map2_index);
293793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global T * dst = (__global T *)(dstptr + dst_index);
294793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
295793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int map2Value = convert_int(map2[0]) & (INTER_TAB_SIZE2 - 1);
296793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int dx = (map2Value & (INTER_TAB_SIZE - 1)) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
297793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int dy = (map2Value >> INTER_BITS) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
298793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 gxy = convert_int2(map1[0]) + (int2)(dx, dy);
299793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int gx = gxy.x, gy = gxy.y;
300793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
301793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (NEED_EXTRAPOLATION(gx, gy))
302793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                {
303793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    T v;
304793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(gxy, v)
305793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    storepix(v, dst);
306793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                }
307793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
308793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                {
309793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset));
310793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    storepix(loadpix((__global const T *)(srcptr + src_index)), dst);
311793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                }
312793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
313793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
314793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
315793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
316793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined INTER_LINEAR
317793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
318793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__constant float coeffs[64] =
319793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 1.000000f, 0.000000f, 0.968750f, 0.031250f, 0.937500f, 0.062500f, 0.906250f, 0.093750f, 0.875000f, 0.125000f, 0.843750f, 0.156250f,
320793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler  0.812500f, 0.187500f, 0.781250f, 0.218750f, 0.750000f, 0.250000f, 0.718750f, 0.281250f, 0.687500f, 0.312500f, 0.656250f, 0.343750f,
321793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler  0.625000f, 0.375000f, 0.593750f, 0.406250f, 0.562500f, 0.437500f, 0.531250f, 0.468750f, 0.500000f, 0.500000f, 0.468750f, 0.531250f,
322793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler  0.437500f, 0.562500f, 0.406250f, 0.593750f, 0.375000f, 0.625000f, 0.343750f, 0.656250f, 0.312500f, 0.687500f, 0.281250f, 0.718750f,
323793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler  0.250000f, 0.750000f, 0.218750f, 0.781250f, 0.187500f, 0.812500f, 0.156250f, 0.843750f, 0.125000f, 0.875000f, 0.093750f, 0.906250f,
324793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler  0.062500f, 0.937500f, 0.031250f, 0.968750f };
325793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
326793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_16SC2_16UC1(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
327793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                                __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
328793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                                __global const uchar * map1ptr, int map1_step, int map1_offset,
329793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                                __global const uchar * map2ptr, int map2_step, int map2_offset,
330793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                                ST nVal)
331793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
332793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int x = get_global_id(0);
333793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int y = get_global_id(1) * rowsPerWI;
334793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
335793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if (x < dst_cols)
336793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
337793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT scalar = convertToWT(convertScalar(nVal));
338793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
339793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(short2), map1_offset));
340793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(ushort), map2_offset));
341793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
342793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        #pragma unroll
343793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        for (int i = 0; i < rowsPerWI; ++i, ++y,
344793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            map1_index += map1_step, map2_index += map2_step, dst_index += dst_step)
345793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            if (y < dst_rows)
346793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
347793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global const short2 * map1 = (__global const short2 *)(map1ptr + map1_index);
348793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global const ushort * map2 = (__global const ushort *)(map2ptr + map2_index);
349793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global T * dst = (__global T *)(dstptr + dst_index);
350793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
351793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 map_dataA = convert_int2(map1[0]);
352793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
353793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
354793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
355793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
356793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                ushort map2Value = (ushort)(map2[0] & (INTER_TAB_SIZE2 - 1));
357793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                WT2 u = (WT2)(map2Value & (INTER_TAB_SIZE - 1), map2Value >> INTER_BITS) / (WT2)(INTER_TAB_SIZE);
358793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
359793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                WT a = scalar, b = scalar, c = scalar, d = scalar;
360793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
361793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
362793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    a = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset))));
363793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
364793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(map_dataA, a);
365793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
366793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
367793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    b = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset))));
368793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
369793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(map_dataB, b);
370793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
371793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
372793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    c = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset))));
373793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
374793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(map_dataC, c);
375793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
376793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
377793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    d = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset))));
378793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
379793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(map_dataD, d);
380793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
381793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                WT dst_data = a * (1 - u.x) * (1 - u.y) +
382793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                              b * (u.x)     * (1 - u.y) +
383793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                              c * (1 - u.x) * (u.y) +
384793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                              d * (u.x)     * (u.y);
385793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                storepix(convertToT(dst_data), dst);
386793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
387793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
388793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
389793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
390793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
391793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
392793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            __global const uchar * map1ptr, int map1_step, int map1_offset,
393793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            __global const uchar * map2ptr, int map2_step, int map2_offset,
394793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            ST nVal)
395793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
396793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int x = get_global_id(0);
397793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int y = get_global_id(1) * rowsPerWI;
398793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
399793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if (x < dst_cols)
400793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
401793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT scalar = convertToWT(convertScalar(nVal));
402793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
403793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(float), map1_offset));
404793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(float), map2_offset));
405793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
406793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        #pragma unroll
407793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        for (int i = 0; i < rowsPerWI; ++i, ++y,
408793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            map1_index += map1_step, map2_index += map2_step, dst_index += dst_step)
409793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            if (y < dst_rows)
410793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
411793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global const float * map1 = (__global const float *)(map1ptr + map1_index);
412793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global const float * map2 = (__global const float *)(map2ptr + map2_index);
413793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global T * dst = (__global T *)(dstptr + dst_index);
414793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
415793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if defined BORDER_CONSTANT
416793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                float xf = map1[0], yf = map2[0];
417793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int sx = convert_int_sat_rtz(mad(xf, INTER_TAB_SIZE, 0.5f)) >> INTER_BITS;
418793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int sy = convert_int_sat_rtz(mad(yf, INTER_TAB_SIZE, 0.5f)) >> INTER_BITS;
419793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
420793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __constant float * coeffs_x = coeffs + ((convert_int_rte(xf * INTER_TAB_SIZE) & (INTER_TAB_SIZE - 1)) << 1);
421793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __constant float * coeffs_y = coeffs + ((convert_int_rte(yf * INTER_TAB_SIZE) & (INTER_TAB_SIZE - 1)) << 1);
422793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
423793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                WT sum = (WT)(0), xsum;
424793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int src_index = mad24(sy, src_step, mad24(sx, TSIZE, src_offset));
425793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
426793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                #pragma unroll
427793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                for (int yp = 0; yp < 2; ++yp, src_index += src_step)
428793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                {
429793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    if (sy + yp >= 0 && sy + yp < src_rows)
430793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    {
431793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        xsum = (WT)(0);
432793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        if (sx >= 0 && sx + 2 < src_cols)
433793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        {
434793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if depth == 0 && cn == 1
435793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            uchar2 value = vload2(0, srcptr + src_index);
436793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            xsum = dot(convert_float2(value), (float2)(coeffs_x[0], coeffs_x[1]));
437793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else
438793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            #pragma unroll
439793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            for (int xp = 0; xp < 2; ++xp)
440793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                                xsum = fma(convertToWT(loadpix(srcptr + mad24(xp, TSIZE, src_index))), coeffs_x[xp], xsum);
441793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
442793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        }
443793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        else
444793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        {
445793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            #pragma unroll
446793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            for (int xp = 0; xp < 2; ++xp)
447793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                                xsum = fma(sx + xp >= 0 && sx + xp < src_cols ?
448793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                                           convertToWT(loadpix(srcptr + mad24(xp, TSIZE, src_index))) : scalar, coeffs_x[xp], xsum);
449793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        }
450793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        sum = fma(xsum, coeffs_y[yp], sum);
451793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    }
452793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    else
453793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        sum = fma(scalar, coeffs_y[yp], sum);
454793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                }
455793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
456793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                storepix(convertToT(sum), dst);
457793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else
458793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                float2 map_data = (float2)(map1[0], map2[0]);
459793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
460793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 map_dataA = convert_int2_sat_rtn(map_data);
461793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
462793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
463793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
464793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
465793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                float2 _u = map_data - convert_float2(map_dataA);
466793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
467793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                WT scalar = convertToWT(convertScalar(nVal));
468793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                WT a = scalar, b = scalar, c = scalar, d = scalar;
469793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
470793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
471793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    a = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset))));
472793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
473793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(map_dataA, a);
474793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
475793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
476793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    b = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset))));
477793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
478793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(map_dataB, b);
479793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
480793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
481793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    c = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset))));
482793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
483793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(map_dataC, c);
484793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
485793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
486793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    d = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset))));
487793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
488793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(map_dataD, d);
489793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
490793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                WT dst_data = a * (1 - u.x) * (1 - u.y) +
491793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                              b * (u.x)     * (1 - u.y) +
492793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                              c * (1 - u.x) * (u.y) +
493793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                              d * (u.x)     * (u.y);
494793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                storepix(convertToT(dst_data), dst);
495793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
496793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
497793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
498793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
499793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
500793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_32FC2(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
501793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                          __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
502793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                          __global const uchar * mapptr, int map_step, int map_offset,
503793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                          ST nVal)
504793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
505793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int x = get_global_id(0);
506793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int y = get_global_id(1) * rowsPerWI;
507793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
508793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if (x < dst_cols)
509793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
510793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT scalar = convertToWT(convertScalar(nVal));
511793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
512793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int map_index = mad24(y, map_step, mad24(x, (int)sizeof(float2), map_offset));
513793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
514793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        #pragma unroll
515793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        for (int i = 0; i < rowsPerWI; ++i, ++y,
516793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            map_index += map_step, dst_index += dst_step)
517793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            if (y < dst_rows)
518793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
519793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global const float2 * map = (__global const float2 *)(mapptr + map_index);
520793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                __global T * dst = (__global T *)(dstptr + dst_index);
521793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
522793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                float2 map_data = map[0];
523793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 map_dataA = convert_int2_sat_rtn(map_data);
524793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
525793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
526793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
527793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
528793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                float2 _u = map_data - convert_float2(map_dataA);
529793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
530793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                WT a = scalar, b = scalar, c = scalar, d = scalar;
531793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
532793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
533793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    a = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset))));
534793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
535793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(map_dataA, a);
536793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
537793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
538793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    b = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset))));
539793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
540793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(map_dataB, b);
541793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
542793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
543793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    c = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset))));
544793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
545793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(map_dataC, c);
546793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
547793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
548793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    d = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset))));
549793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
550793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    EXTRAPOLATE(map_dataD, d);
551793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
552793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                WT dst_data = a * (1 - u.x) * (1 - u.y) +
553793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                              b * (u.x)     * (1 - u.y) +
554793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                              c * (1 - u.x) * (u.y) +
555793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                              d * (u.x)     * (u.y);
556793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                storepix(convertToT(dst_data), dst);
557793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
558793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
559793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
560793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
561793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
562