1793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler/*M/////////////////////////////////////////////////////////////////////////////////////// 2793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 3793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 5793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// By downloading, copying, installing or using the software you agree to this license. 6793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// If you do not agree to this license, do not download, install, 7793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// copy or use the software. 8793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 9793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 10793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// License Agreement 11793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// For Open Source Computer Vision Library 12793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 13793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. 14793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. 15793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Third party copyrights are property of their respective owners. 16793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 17793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// @Authors 18793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Wu Zailong, bullet@yeah.net 19793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 20793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Redistribution and use in source and binary forms, with or without modification, 21793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// are permitted provided that the following conditions are met: 22793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 23793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * Redistribution's of source code must retain the above copyright notice, 24793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// this list of conditions and the following disclaimer. 25793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 26793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * Redistribution's in binary form must reproduce the above copyright notice, 27793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// this list of conditions and the following disclaimer in the documentation 28793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and/or other materials provided with the distribution. 29793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 30793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * The name of the copyright holders may not be used to endorse or promote products 31793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// derived from this software without specific prior written permission. 32793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 33793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// This software is provided by the copyright holders and contributors as is and 34793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// any express or implied warranties, including, but not limited to, the implied 35793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// warranties of merchantability and fitness for a particular purpose are disclaimed. 36793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// In no event shall the Intel Corporation or contributors be liable for any direct, 37793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// indirect, incidental, special, exemplary, or consequential damages 38793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// (including, but not limited to, procurement of substitute goods or services; 39793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// loss of use, data, or profits; or business interruption) however caused 40793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and on any theory of liability, whether in contract, strict liability, 41793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// or tort (including negligence or otherwise) arising in any way out of 42793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// the use of this software, even if advised of the possibility of such damage. 43793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 44793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//M*/ 45793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 46793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef DOUBLE_SUPPORT 47793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef cl_amd_fp64 48793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#pragma OPENCL EXTENSION cl_amd_fp64:enable 49793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined (cl_khr_fp64) 50793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#pragma OPENCL EXTENSION cl_khr_fp64:enable 51793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 52793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 53793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 54793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define noconvert 55793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 56793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if cn != 3 57793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define loadpix(addr) *(__global const T*)(addr) 58793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define storepix(val, addr) *(__global T*)(addr) = val 59793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define TSIZE ((int)sizeof(T)) 60793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define convertScalar(a) (a) 61793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else 62793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define loadpix(addr) vload3(0, (__global const T1*)(addr)) 63793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define storepix(val, addr) vstore3(val, 0, (__global T1*)(addr)) 64793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define TSIZE ((int)sizeof(T1)*3) 65793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define convertScalar(a) (T)(a.x, a.y, a.z) 66793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 67793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 68793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerenum 69793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 70793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler INTER_BITS = 5, 71793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler INTER_TAB_SIZE = 1 << INTER_BITS, 72793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler INTER_TAB_SIZE2 = INTER_TAB_SIZE * INTER_TAB_SIZE 73793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}; 74793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 75793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef INTER_NEAREST 76793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define convertToWT 77793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 78793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 79793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef BORDER_CONSTANT 80793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define EXTRAPOLATE(v2, v) v = scalar; 81793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined BORDER_REPLICATE 82793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define EXTRAPOLATE(v2, v) \ 83793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { \ 84793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v2 = max(min(v2, (int2)(src_cols - 1, src_rows - 1)), (int2)(0)); \ 85793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v = convertToWT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \ 86793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 87793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined BORDER_WRAP 88793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define EXTRAPOLATE(v2, v) \ 89793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { \ 90793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (v2.x < 0) \ 91793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v2.x -= ((v2.x - src_cols + 1) / src_cols) * src_cols; \ 92793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (v2.x >= src_cols) \ 93793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v2.x %= src_cols; \ 94793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler \ 95793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (v2.y < 0) \ 96793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v2.y -= ((v2.y - src_rows + 1) / src_rows) * src_rows; \ 97793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( v2.y >= src_rows ) \ 98793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v2.y %= src_rows; \ 99793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v = convertToWT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \ 100793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 101793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101) 102793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef BORDER_REFLECT 103793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define DELTA int delta = 0 104793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else 105793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define DELTA int delta = 1 106793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 107793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define EXTRAPOLATE(v2, v) \ 108793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { \ 109793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler DELTA; \ 110793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (src_cols == 1) \ 111793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v2.x = 0; \ 112793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else \ 113793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler do \ 114793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { \ 115793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( v2.x < 0 ) \ 116793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v2.x = -v2.x - 1 + delta; \ 117793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else \ 118793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v2.x = src_cols - 1 - (v2.x - src_cols) - delta; \ 119793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } \ 120793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler while (v2.x >= src_cols || v2.x < 0); \ 121793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler \ 122793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (src_rows == 1) \ 123793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v2.y = 0; \ 124793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else \ 125793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler do \ 126793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { \ 127793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if( v2.y < 0 ) \ 128793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v2.y = -v2.y - 1 + delta; \ 129793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else \ 130793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v2.y = src_rows - 1 - (v2.y - src_rows) - delta; \ 131793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } \ 132793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler while (v2.y >= src_rows || v2.y < 0); \ 133793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v = convertToWT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \ 134793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 135793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else 136793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#error No extrapolation method 137793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 138793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 139793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0) 140793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 141793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef INTER_NEAREST 142793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 143793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, 144793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, 145793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const uchar * map1ptr, int map1_step, int map1_offset, 146793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const uchar * map2ptr, int map2_step, int map2_offset, 147793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler ST nVal) 148793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 149793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int x = get_global_id(0); 150793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int y = get_global_id(1) * rowsPerWI; 151793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 152793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (x < dst_cols) 153793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 154793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler T scalar = convertScalar(nVal); 155793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 156793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(float), map1_offset)); 157793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(float), map2_offset)); 158793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset)); 159793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 160793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #pragma unroll 161793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int i = 0; i < rowsPerWI; ++i, ++y, 162793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler map1_index += map1_step, map2_index += map2_step, dst_index += dst_step) 163793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (y < dst_rows) 164793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 165793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const float * map1 = (__global const float *)(map1ptr + map1_index); 166793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const float * map2 = (__global const float *)(map2ptr + map2_index); 167793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global T * dst = (__global T *)(dstptr + dst_index); 168793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 169793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int gx = convert_int_sat_rte(map1[0]); 170793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int gy = convert_int_sat_rte(map2[0]); 171793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 172793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (NEED_EXTRAPOLATION(gx, gy)) 173793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 174793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifndef BORDER_CONSTANT 175793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 gxy = (int2)(gx, gy); 176793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 177793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler T v; 178793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(gxy, v) 179793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(v, dst); 180793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 181793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 182793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 183793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset)); 184793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(loadpix((__global const T*)(srcptr + src_index)), dst); 185793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 186793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 187793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 188793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 189793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 190793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_32FC2(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, 191793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, 192793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const uchar * mapptr, int map_step, int map_offset, 193793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler ST nVal) 194793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 195793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int x = get_global_id(0); 196793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int y = get_global_id(1) * rowsPerWI; 197793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 198793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (x < dst_cols) 199793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 200793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler T scalar = convertScalar(nVal); 201793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset)); 202793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int map_index = mad24(y, map_step, mad24(x, (int)sizeof(float2), map_offset)); 203793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 204793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #pragma unroll 205793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int i = 0; i < rowsPerWI; ++i, ++y, 206793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler map_index += map_step, dst_index += dst_step) 207793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (y < dst_rows) 208793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 209793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const float2 * map = (__global const float2 *)(mapptr + map_index); 210793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global T * dst = (__global T *)(dstptr + dst_index); 211793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 212793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 gxy = convert_int2_sat_rte(map[0]); 213793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int gx = gxy.x, gy = gxy.y; 214793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 215793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (NEED_EXTRAPOLATION(gx, gy)) 216793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 217793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler T v; 218793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(gxy, v) 219793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(v, dst); 220793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 221793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 222793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 223793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset)); 224793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(loadpix((__global const T *)(srcptr + src_index)), dst); 225793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 226793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 227793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 228793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 229793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 230793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_16SC2(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, 231793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, 232793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const uchar * mapptr, int map_step, int map_offset, 233793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler ST nVal) 234793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 235793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int x = get_global_id(0); 236793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int y = get_global_id(1) * rowsPerWI; 237793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 238793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (x < dst_cols) 239793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 240793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler T scalar = convertScalar(nVal); 241793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset)); 242793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int map_index = mad24(y, map_step, mad24(x, (int)sizeof(short2), map_offset)); 243793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 244793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #pragma unroll 245793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int i = 0; i < rowsPerWI; ++i, ++y, 246793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler map_index += map_step, dst_index += dst_step) 247793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (y < dst_rows) 248793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 249793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const short2 * map = (__global const short2 *)(mapptr + map_index); 250793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global T * dst = (__global T *)(dstptr + dst_index); 251793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 252793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 gxy = convert_int2(map[0]); 253793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int gx = gxy.x, gy = gxy.y; 254793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 255793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (NEED_EXTRAPOLATION(gx, gy)) 256793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 257793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler T v; 258793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(gxy, v) 259793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(v, dst); 260793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 261793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 262793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 263793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset)); 264793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(loadpix((__global const T *)(srcptr + src_index)), dst); 265793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 266793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 267793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 268793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 269793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 270793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_16SC2_16UC1(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, 271793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, 272793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const uchar * map1ptr, int map1_step, int map1_offset, 273793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const uchar * map2ptr, int map2_step, int map2_offset, 274793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler ST nVal) 275793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 276793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int x = get_global_id(0); 277793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int y = get_global_id(1) * rowsPerWI; 278793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 279793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (x < dst_cols) 280793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 281793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler T scalar = convertScalar(nVal); 282793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset)); 283793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(short2), map1_offset)); 284793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(ushort), map2_offset)); 285793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 286793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #pragma unroll 287793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int i = 0; i < rowsPerWI; ++i, ++y, 288793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler map1_index += map1_step, map2_index += map2_step, dst_index += dst_step) 289793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (y < dst_rows) 290793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 291793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const short2 * map1 = (__global const short2 *)(map1ptr + map1_index); 292793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const ushort * map2 = (__global const ushort *)(map2ptr + map2_index); 293793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global T * dst = (__global T *)(dstptr + dst_index); 294793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 295793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int map2Value = convert_int(map2[0]) & (INTER_TAB_SIZE2 - 1); 296793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dx = (map2Value & (INTER_TAB_SIZE - 1)) < (INTER_TAB_SIZE >> 1) ? 1 : 0; 297793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dy = (map2Value >> INTER_BITS) < (INTER_TAB_SIZE >> 1) ? 1 : 0; 298793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 gxy = convert_int2(map1[0]) + (int2)(dx, dy); 299793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int gx = gxy.x, gy = gxy.y; 300793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 301793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (NEED_EXTRAPOLATION(gx, gy)) 302793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 303793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler T v; 304793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(gxy, v) 305793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(v, dst); 306793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 307793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 308793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 309793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset)); 310793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(loadpix((__global const T *)(srcptr + src_index)), dst); 311793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 312793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 313793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 314793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 315793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 316793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined INTER_LINEAR 317793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 318793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__constant float coeffs[64] = 319793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 1.000000f, 0.000000f, 0.968750f, 0.031250f, 0.937500f, 0.062500f, 0.906250f, 0.093750f, 0.875000f, 0.125000f, 0.843750f, 0.156250f, 320793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 0.812500f, 0.187500f, 0.781250f, 0.218750f, 0.750000f, 0.250000f, 0.718750f, 0.281250f, 0.687500f, 0.312500f, 0.656250f, 0.343750f, 321793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 0.625000f, 0.375000f, 0.593750f, 0.406250f, 0.562500f, 0.437500f, 0.531250f, 0.468750f, 0.500000f, 0.500000f, 0.468750f, 0.531250f, 322793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 0.437500f, 0.562500f, 0.406250f, 0.593750f, 0.375000f, 0.625000f, 0.343750f, 0.656250f, 0.312500f, 0.687500f, 0.281250f, 0.718750f, 323793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 0.250000f, 0.750000f, 0.218750f, 0.781250f, 0.187500f, 0.812500f, 0.156250f, 0.843750f, 0.125000f, 0.875000f, 0.093750f, 0.906250f, 324793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 0.062500f, 0.937500f, 0.031250f, 0.968750f }; 325793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 326793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_16SC2_16UC1(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, 327793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, 328793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const uchar * map1ptr, int map1_step, int map1_offset, 329793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const uchar * map2ptr, int map2_step, int map2_offset, 330793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler ST nVal) 331793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 332793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int x = get_global_id(0); 333793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int y = get_global_id(1) * rowsPerWI; 334793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 335793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (x < dst_cols) 336793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 337793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT scalar = convertToWT(convertScalar(nVal)); 338793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset)); 339793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(short2), map1_offset)); 340793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(ushort), map2_offset)); 341793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 342793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #pragma unroll 343793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int i = 0; i < rowsPerWI; ++i, ++y, 344793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler map1_index += map1_step, map2_index += map2_step, dst_index += dst_step) 345793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (y < dst_rows) 346793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 347793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const short2 * map1 = (__global const short2 *)(map1ptr + map1_index); 348793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const ushort * map2 = (__global const ushort *)(map2ptr + map2_index); 349793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global T * dst = (__global T *)(dstptr + dst_index); 350793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 351793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 map_dataA = convert_int2(map1[0]); 352793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y); 353793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1); 354793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1); 355793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 356793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler ushort map2Value = (ushort)(map2[0] & (INTER_TAB_SIZE2 - 1)); 357793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT2 u = (WT2)(map2Value & (INTER_TAB_SIZE - 1), map2Value >> INTER_BITS) / (WT2)(INTER_TAB_SIZE); 358793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 359793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT a = scalar, b = scalar, c = scalar, d = scalar; 360793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 361793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y)) 362793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler a = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset)))); 363793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 364793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(map_dataA, a); 365793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 366793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y)) 367793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler b = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset)))); 368793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 369793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(map_dataB, b); 370793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 371793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y)) 372793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler c = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset)))); 373793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 374793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(map_dataC, c); 375793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 376793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y)) 377793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler d = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset)))); 378793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 379793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(map_dataD, d); 380793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 381793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT dst_data = a * (1 - u.x) * (1 - u.y) + 382793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler b * (u.x) * (1 - u.y) + 383793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler c * (1 - u.x) * (u.y) + 384793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler d * (u.x) * (u.y); 385793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(convertToT(dst_data), dst); 386793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 387793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 388793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 389793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 390793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, 391793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, 392793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const uchar * map1ptr, int map1_step, int map1_offset, 393793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const uchar * map2ptr, int map2_step, int map2_offset, 394793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler ST nVal) 395793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 396793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int x = get_global_id(0); 397793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int y = get_global_id(1) * rowsPerWI; 398793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 399793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (x < dst_cols) 400793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 401793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT scalar = convertToWT(convertScalar(nVal)); 402793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset)); 403793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(float), map1_offset)); 404793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(float), map2_offset)); 405793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 406793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #pragma unroll 407793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int i = 0; i < rowsPerWI; ++i, ++y, 408793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler map1_index += map1_step, map2_index += map2_step, dst_index += dst_step) 409793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (y < dst_rows) 410793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 411793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const float * map1 = (__global const float *)(map1ptr + map1_index); 412793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const float * map2 = (__global const float *)(map2ptr + map2_index); 413793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global T * dst = (__global T *)(dstptr + dst_index); 414793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 415793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if defined BORDER_CONSTANT 416793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float xf = map1[0], yf = map2[0]; 417793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int sx = convert_int_sat_rtz(mad(xf, INTER_TAB_SIZE, 0.5f)) >> INTER_BITS; 418793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int sy = convert_int_sat_rtz(mad(yf, INTER_TAB_SIZE, 0.5f)) >> INTER_BITS; 419793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 420793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant float * coeffs_x = coeffs + ((convert_int_rte(xf * INTER_TAB_SIZE) & (INTER_TAB_SIZE - 1)) << 1); 421793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant float * coeffs_y = coeffs + ((convert_int_rte(yf * INTER_TAB_SIZE) & (INTER_TAB_SIZE - 1)) << 1); 422793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 423793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT sum = (WT)(0), xsum; 424793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int src_index = mad24(sy, src_step, mad24(sx, TSIZE, src_offset)); 425793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 426793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #pragma unroll 427793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int yp = 0; yp < 2; ++yp, src_index += src_step) 428793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 429793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (sy + yp >= 0 && sy + yp < src_rows) 430793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 431793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler xsum = (WT)(0); 432793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (sx >= 0 && sx + 2 < src_cols) 433793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 434793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if depth == 0 && cn == 1 435793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler uchar2 value = vload2(0, srcptr + src_index); 436793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler xsum = dot(convert_float2(value), (float2)(coeffs_x[0], coeffs_x[1])); 437793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else 438793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #pragma unroll 439793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int xp = 0; xp < 2; ++xp) 440793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler xsum = fma(convertToWT(loadpix(srcptr + mad24(xp, TSIZE, src_index))), coeffs_x[xp], xsum); 441793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 442793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 443793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 444793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 445793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #pragma unroll 446793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int xp = 0; xp < 2; ++xp) 447793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler xsum = fma(sx + xp >= 0 && sx + xp < src_cols ? 448793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler convertToWT(loadpix(srcptr + mad24(xp, TSIZE, src_index))) : scalar, coeffs_x[xp], xsum); 449793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 450793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = fma(xsum, coeffs_y[yp], sum); 451793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 452793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 453793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = fma(scalar, coeffs_y[yp], sum); 454793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 455793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 456793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(convertToT(sum), dst); 457793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else 458793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float2 map_data = (float2)(map1[0], map2[0]); 459793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 460793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 map_dataA = convert_int2_sat_rtn(map_data); 461793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y); 462793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1); 463793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1); 464793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 465793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float2 _u = map_data - convert_float2(map_dataA); 466793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE; 467793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT scalar = convertToWT(convertScalar(nVal)); 468793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT a = scalar, b = scalar, c = scalar, d = scalar; 469793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 470793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y)) 471793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler a = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset)))); 472793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 473793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(map_dataA, a); 474793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 475793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y)) 476793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler b = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset)))); 477793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 478793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(map_dataB, b); 479793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 480793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y)) 481793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler c = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset)))); 482793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 483793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(map_dataC, c); 484793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 485793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y)) 486793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler d = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset)))); 487793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 488793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(map_dataD, d); 489793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 490793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT dst_data = a * (1 - u.x) * (1 - u.y) + 491793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler b * (u.x) * (1 - u.y) + 492793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler c * (1 - u.x) * (u.y) + 493793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler d * (u.x) * (u.y); 494793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(convertToT(dst_data), dst); 495793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 496793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 497793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 498793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 499793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 500793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void remap_32FC2(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, 501793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, 502793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const uchar * mapptr, int map_step, int map_offset, 503793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler ST nVal) 504793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 505793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int x = get_global_id(0); 506793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int y = get_global_id(1) * rowsPerWI; 507793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 508793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (x < dst_cols) 509793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 510793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT scalar = convertToWT(convertScalar(nVal)); 511793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset)); 512793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int map_index = mad24(y, map_step, mad24(x, (int)sizeof(float2), map_offset)); 513793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 514793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #pragma unroll 515793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int i = 0; i < rowsPerWI; ++i, ++y, 516793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler map_index += map_step, dst_index += dst_step) 517793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (y < dst_rows) 518793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 519793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const float2 * map = (__global const float2 *)(mapptr + map_index); 520793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global T * dst = (__global T *)(dstptr + dst_index); 521793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 522793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float2 map_data = map[0]; 523793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 map_dataA = convert_int2_sat_rtn(map_data); 524793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y); 525793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1); 526793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1); 527793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 528793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float2 _u = map_data - convert_float2(map_dataA); 529793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE; 530793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT a = scalar, b = scalar, c = scalar, d = scalar; 531793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 532793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y)) 533793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler a = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset)))); 534793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 535793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(map_dataA, a); 536793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 537793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y)) 538793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler b = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset)))); 539793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 540793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(map_dataB, b); 541793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 542793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y)) 543793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler c = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset)))); 544793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 545793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(map_dataC, c); 546793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 547793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y)) 548793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler d = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset)))); 549793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler else 550793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler EXTRAPOLATE(map_dataD, d); 551793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 552793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT dst_data = a * (1 - u.x) * (1 - u.y) + 553793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler b * (u.x) * (1 - u.y) + 554793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler c * (1 - u.x) * (u.y) + 555793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler d * (u.x) * (u.y); 556793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(convertToT(dst_data), dst); 557793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 558793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 559793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 560793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 561793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 562