1793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler/*M///////////////////////////////////////////////////////////////////////////////////////
2793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
3793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
5793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  By downloading, copying, installing or using the software you agree to this license.
6793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  If you do not agree to this license, do not download, install,
7793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  copy or use the software.
8793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
9793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
10793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//                           License Agreement
11793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//                For Open Source Computer Vision Library
12793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
13793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
14793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Third party copyrights are property of their respective owners.
16793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
17793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// @Authors
18793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//    Zhang Ying, zhangying913@gmail.com
19793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//	  Niko Li, newlife20080214@gmail.com
20793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Redistribution and use in source and binary forms, with or without modification,
21793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// are permitted provided that the following conditions are met:
22793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
23793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//   * Redistribution's of source code must retain the above copyright notice,
24793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     this list of conditions and the following disclaimer.
25793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
26793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//   * Redistribution's in binary form must reproduce the above copyright notice,
27793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     this list of conditions and the following disclaimer in the documentation
28793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     and/or other materials provided with the distribution.
29793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
30793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//   * The name of the copyright holders may not be used to endorse or promote products
31793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     derived from this software without specific prior written permission.
32793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
33793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// This software is provided by the copyright holders and contributors as is and
34793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// any express or implied warranties, including, but not limited to, the implied
35793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// warranties of merchantability and fitness for a particular purpose are disclaimed.
36793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// In no event shall the Intel Corporation or contributors be liable for any direct,
37793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// indirect, incidental, special, exemplary, or consequential damages
38793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// (including, but not limited to, procurement of substitute goods or services;
39793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// loss of use, data, or profits; or business interruption) however caused
40793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and on any theory of liability, whether in contract, strict liability,
41793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// or tort (including negligence or otherwise) arising in any way out of
42793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// the use of this software, even if advised of the possibility of such damage.
43793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
44793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//M*/
45793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
46793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef DOUBLE_SUPPORT
47793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef cl_amd_fp64
48793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#pragma OPENCL EXTENSION cl_amd_fp64:enable
49793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined (cl_khr_fp64)
50793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#pragma OPENCL EXTENSION cl_khr_fp64:enable
51793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
52793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
53793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
54793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
55793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)
56793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define INC(x,l) min(x+1,l-1)
57793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
58793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define noconvert
59793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
60793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if cn != 3
61793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define loadpix(addr)  *(__global const T *)(addr)
62793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define storepix(val, addr)  *(__global T *)(addr) = val
63793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define TSIZE (int)sizeof(T)
64793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else
65793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define loadpix(addr)  vload3(0, (__global const T1 *)(addr))
66793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
67793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define TSIZE (int)sizeof(T1)*cn
68793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
69793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
70793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if defined USE_SAMPLER
71793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
72793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if cn == 1
73793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define READ_IMAGE(X,Y,Z)  read_imagef(X,Y,Z).x
74793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define INTERMEDIATE_TYPE  float
75793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif cn == 2
76793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define READ_IMAGE(X,Y,Z)  read_imagef(X,Y,Z).xy
77793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define INTERMEDIATE_TYPE  float2
78793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif cn == 3
79793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define READ_IMAGE(X,Y,Z)  read_imagef(X,Y,Z).xyz
80793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define INTERMEDIATE_TYPE  float3
81793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif cn == 4
82793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define READ_IMAGE(X,Y,Z)  read_imagef(X,Y,Z)
83793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define INTERMEDIATE_TYPE  float4
84793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
85793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
86793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define __CAT(x, y) x##y
87793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define CAT(x, y) __CAT(x, y)
88793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//#define INTERMEDIATE_TYPE CAT(float, cn)
89793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define float1 float
90793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
91793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if depth == 0
92793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define RESULT_SCALE    255.0f
93793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif depth == 1
94793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define RESULT_SCALE    127.0f
95793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif depth == 2
96793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define RESULT_SCALE    65535.0f
97793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif depth == 3
98793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define RESULT_SCALE    32767.0f
99793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else
100793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define RESULT_SCALE    1.0f
101793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
102793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
103793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void resizeSampler(__read_only image2d_t srcImage,
104793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            __global uchar* dstptr, int dststep, int dstoffset,
105793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            int dstrows, int dstcols,
106793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            float ifx, float ify)
107793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
108793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
109793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                              CLK_ADDRESS_CLAMP_TO_EDGE |
110793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                              CLK_FILTER_LINEAR;
111793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
112793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int dx = get_global_id(0);
113793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int dy = get_global_id(1);
114793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
115793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    float sx = ((dx+0.5f) * ifx), sy = ((dy+0.5f) * ify);
116793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
117793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    INTERMEDIATE_TYPE intermediate = READ_IMAGE(srcImage, sampler, (float2)(sx, sy));
118793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
119793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if depth <= 4
120793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    T uval = convertToDT(round(intermediate * RESULT_SCALE));
121793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else
122793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    T uval = convertToDT(intermediate * RESULT_SCALE);
123793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
124793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
125793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if(dx < dstcols && dy < dstrows)
126793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
127793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        storepix(uval, dstptr + mad24(dy, dststep, dstoffset + dx*TSIZE));
128793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
129793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
130793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
131793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined INTER_LINEAR_INTEGER
132793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
133793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
134793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                       __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
135793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                       __global const uchar * buffer)
136793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
137793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int dx = get_global_id(0);
138793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int dy = get_global_id(1);
139793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
140793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if (dx < dst_cols && dy < dst_rows)
141793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
142793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __global const int * xofs = (__global const int *)(buffer), * yofs = xofs + dst_cols;
143793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __global const short * ialpha = (__global const short *)(yofs + dst_rows);
144793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __global const short * ibeta = ialpha + ((dst_cols + dy) << 1);
145793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        ialpha += dx << 1;
146793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
147793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int sx0 = xofs[dx], sy0 = clamp(yofs[dy], 0, src_rows - 1),
148793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        sy1 = clamp(yofs[dy] + 1, 0, src_rows - 1);
149793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        short a0 = ialpha[0], a1 = ialpha[1];
150793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        short b0 = ibeta[0], b1 = ibeta[1];
151793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
152793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int src_index0 = mad24(sy0, src_step, mad24(sx0, TSIZE, src_offset)),
153793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        src_index1 = mad24(sy1, src_step, mad24(sx0, TSIZE, src_offset));
154793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT data0 = convertToWT(loadpix(srcptr + src_index0));
155793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT data1 = convertToWT(loadpix(srcptr + src_index0 + TSIZE));
156793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT data2 = convertToWT(loadpix(srcptr + src_index1));
157793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT data3 = convertToWT(loadpix(srcptr + src_index1 + TSIZE));
158793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
159793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT val = ( (((data0 * a0 + data1 * a1) >> 4) * b0) >> 16) +
160793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                 ( (((data2 * a0 + data3 * a1) >> 4) * b1) >> 16);
161793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
162793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        storepix(convertToDT((val + 2) >> 2),
163793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                 dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
164793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
165793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
166793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
167793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined INTER_LINEAR
168793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
169793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
170793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                       __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
171793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                       float ifx, float ify)
172793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
173793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int dx = get_global_id(0);
174793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int dy = get_global_id(1);
175793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
176793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if (dx < dst_cols && dy < dst_rows)
177793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
178793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
179793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int x = floor(sx), y = floor(sy);
180793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
181793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        float u = sx - x, v = sy - y;
182793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
183793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        if ( x<0 ) x=0,u=0;
184793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        if ( x>=src_cols ) x=src_cols-1,u=0;
185793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        if ( y<0 ) y=0,v=0;
186793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        if ( y>=src_rows ) y=src_rows-1,v=0;
187793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
188793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int y_ = INC(y, src_rows);
189793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int x_ = INC(x, src_cols);
190793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
191793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if depth <= 4
192793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        u = u * INTER_RESIZE_COEF_SCALE;
193793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        v = v * INTER_RESIZE_COEF_SCALE;
194793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
195793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int U = rint(u);
196793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int V = rint(v);
197793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int U1 = rint(INTER_RESIZE_COEF_SCALE - u);
198793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int V1 = rint(INTER_RESIZE_COEF_SCALE - v);
199793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
200793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
201793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
202793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
203793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));
204793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
205793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT val = mul24((WT)mul24(U1, V1), data0) + mul24((WT)mul24(U, V1), data1) +
206793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                   mul24((WT)mul24(U1, V), data2) + mul24((WT)mul24(U, V), data3);
207793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
208793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        T uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
209793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else
210793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        float u1 = 1.f - u;
211793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        float v1 = 1.f - v;
212793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
213793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
214793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
215793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));
216793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
217793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        T uval = u1 * v1 * data0 + u * v1 * data1 + u1 * v *data2 + u * v *data3;
218793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
219793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        storepix(uval, dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
220793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
221793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
222793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
223793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined INTER_NEAREST
224793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
225793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
226793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                       __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
227793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                       float ifx, float ify)
228793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
229793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int dx = get_global_id(0);
230793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int dy = get_global_id(1);
231793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
232793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if (dx < dst_cols && dy < dst_rows)
233793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
234793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        float s1 = dx * ifx;
235793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        float s2 = dy * ify;
236793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int sx = min(convert_int_rtz(s1), src_cols - 1);
237793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int sy = min(convert_int_rtz(s2), src_rows - 1);
238793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
239793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        storepix(loadpix(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))),
240793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                 dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
241793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
242793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
243793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
244793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined INTER_AREA
245793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
246793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef INTER_AREA_FAST
247793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
248793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
249793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                              __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
250793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
251793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int dx = get_global_id(0);
252793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int dy = get_global_id(1);
253793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
254793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if (dx < dst_cols && dy < dst_rows)
255793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
256793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int dst_index = mad24(dy, dst_step, dst_offset);
257793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
258793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int sx = XSCALE * dx;
259793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int sy = YSCALE * dy;
260793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WTV sum = (WTV)(0);
261793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
262793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        #pragma unroll
263793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        for (int py = 0; py < YSCALE; ++py)
264793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        {
265793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            int y = min(sy + py, src_rows - 1);
266793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            int src_index = mad24(y, src_step, src_offset);
267793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            #pragma unroll
268793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            for (int px = 0; px < XSCALE; ++px)
269793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
270793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int x = min(sx + px, src_cols - 1);
271793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                sum += convertToWTV(loadpix(src + src_index + x*TSIZE));
272793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
273793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        }
274793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
275793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        storepix(convertToT(convertToWT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index));
276793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
277793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
278793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
279793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else
280793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
281793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void resizeAREA(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
282793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                         __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
283793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                         float ifx, float ify, __global const int * ofs_tab,
284793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                         __global const int * map_tab, __global const float * alpha_tab)
285793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
286793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int dx = get_global_id(0);
287793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int dy = get_global_id(1);
288793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
289793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if (dx < dst_cols && dy < dst_rows)
290793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
291793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int dst_index = mad24(dy, dst_step, dst_offset);
292793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
293793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __global const int * xmap_tab = map_tab;
294793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1));
295793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __global const float * xalpha_tab = alpha_tab;
296793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1));
297793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __global const int * xofs_tab = ofs_tab;
298793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1);
299793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
300793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1];
301793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1];
302793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
303793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1];
304793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1];
305793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
306793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        WTV sum = (WTV)(0), buf;
307793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int src_index = mad24(sy0, src_step, src_offset);
308793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
309793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk)
310793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        {
311793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            WTV beta = (WTV)(yalpha_tab[yk]);
312793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            buf = (WTV)(0);
313793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
314793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk)
315793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
316793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                WTV alpha = (WTV)(xalpha_tab[xk]);
317793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                buf += convertToWTV(loadpix(src + mad24(sx, TSIZE, src_index))) * alpha;
318793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
319793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            sum += buf * beta;
320793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        }
321793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
322793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        storepix(convertToT(sum), dst + mad24(dx, TSIZE, dst_index));
323793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
324793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
325793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
326793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
327793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
328793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
329