1793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler/*M/////////////////////////////////////////////////////////////////////////////////////// 2793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 3793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 5793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// By downloading, copying, installing or using the software you agree to this license. 6793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// If you do not agree to this license, do not download, install, 7793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// copy or use the software. 8793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 9793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 10793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// License Agreement 11793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// For Open Source Computer Vision Library 12793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 13793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. 14793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. 15793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Third party copyrights are property of their respective owners. 16793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 17793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// @Authors 18793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Zhang Ying, zhangying913@gmail.com 19793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Niko Li, newlife20080214@gmail.com 20793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Redistribution and use in source and binary forms, with or without modification, 21793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// are permitted provided that the following conditions are met: 22793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 23793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * Redistribution's of source code must retain the above copyright notice, 24793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// this list of conditions and the following disclaimer. 25793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 26793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * Redistribution's in binary form must reproduce the above copyright notice, 27793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// this list of conditions and the following disclaimer in the documentation 28793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and/or other materials provided with the distribution. 29793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 30793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * The name of the copyright holders may not be used to endorse or promote products 31793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// derived from this software without specific prior written permission. 32793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 33793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// This software is provided by the copyright holders and contributors as is and 34793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// any express or implied warranties, including, but not limited to, the implied 35793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// warranties of merchantability and fitness for a particular purpose are disclaimed. 36793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// In no event shall the Intel Corporation or contributors be liable for any direct, 37793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// indirect, incidental, special, exemplary, or consequential damages 38793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// (including, but not limited to, procurement of substitute goods or services; 39793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// loss of use, data, or profits; or business interruption) however caused 40793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and on any theory of liability, whether in contract, strict liability, 41793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// or tort (including negligence or otherwise) arising in any way out of 42793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// the use of this software, even if advised of the possibility of such damage. 43793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 44793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//M*/ 45793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 46793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef DOUBLE_SUPPORT 47793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef cl_amd_fp64 48793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#pragma OPENCL EXTENSION cl_amd_fp64:enable 49793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined (cl_khr_fp64) 50793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#pragma OPENCL EXTENSION cl_khr_fp64:enable 51793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 52793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 53793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 54793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS) 55793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1) 56793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define INC(x,l) min(x+1,l-1) 57793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 58793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define noconvert 59793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 60793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if cn != 3 61793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define loadpix(addr) *(__global const T *)(addr) 62793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define storepix(val, addr) *(__global T *)(addr) = val 63793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define TSIZE (int)sizeof(T) 64793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else 65793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define loadpix(addr) vload3(0, (__global const T1 *)(addr)) 66793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr)) 67793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define TSIZE (int)sizeof(T1)*cn 68793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 69793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 70793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if defined USE_SAMPLER 71793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 72793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if cn == 1 73793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).x 74793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define INTERMEDIATE_TYPE float 75793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif cn == 2 76793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xy 77793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define INTERMEDIATE_TYPE float2 78793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif cn == 3 79793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xyz 80793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define INTERMEDIATE_TYPE float3 81793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif cn == 4 82793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z) 83793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define INTERMEDIATE_TYPE float4 84793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 85793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 86793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define __CAT(x, y) x##y 87793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define CAT(x, y) __CAT(x, y) 88793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//#define INTERMEDIATE_TYPE CAT(float, cn) 89793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define float1 float 90793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 91793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if depth == 0 92793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define RESULT_SCALE 255.0f 93793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif depth == 1 94793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define RESULT_SCALE 127.0f 95793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif depth == 2 96793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define RESULT_SCALE 65535.0f 97793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif depth == 3 98793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define RESULT_SCALE 32767.0f 99793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else 100793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define RESULT_SCALE 1.0f 101793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 102793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 103793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void resizeSampler(__read_only image2d_t srcImage, 104793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global uchar* dstptr, int dststep, int dstoffset, 105793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dstrows, int dstcols, 106793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float ifx, float ify) 107793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 108793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | 109793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler CLK_ADDRESS_CLAMP_TO_EDGE | 110793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler CLK_FILTER_LINEAR; 111793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 112793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dx = get_global_id(0); 113793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dy = get_global_id(1); 114793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 115793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float sx = ((dx+0.5f) * ifx), sy = ((dy+0.5f) * ify); 116793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 117793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler INTERMEDIATE_TYPE intermediate = READ_IMAGE(srcImage, sampler, (float2)(sx, sy)); 118793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 119793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if depth <= 4 120793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler T uval = convertToDT(round(intermediate * RESULT_SCALE)); 121793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else 122793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler T uval = convertToDT(intermediate * RESULT_SCALE); 123793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 124793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 125793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if(dx < dstcols && dy < dstrows) 126793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 127793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(uval, dstptr + mad24(dy, dststep, dstoffset + dx*TSIZE)); 128793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 129793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 130793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 131793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined INTER_LINEAR_INTEGER 132793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 133793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, 134793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, 135793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const uchar * buffer) 136793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 137793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dx = get_global_id(0); 138793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dy = get_global_id(1); 139793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 140793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (dx < dst_cols && dy < dst_rows) 141793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 142793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const int * xofs = (__global const int *)(buffer), * yofs = xofs + dst_cols; 143793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const short * ialpha = (__global const short *)(yofs + dst_rows); 144793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const short * ibeta = ialpha + ((dst_cols + dy) << 1); 145793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler ialpha += dx << 1; 146793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 147793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int sx0 = xofs[dx], sy0 = clamp(yofs[dy], 0, src_rows - 1), 148793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sy1 = clamp(yofs[dy] + 1, 0, src_rows - 1); 149793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler short a0 = ialpha[0], a1 = ialpha[1]; 150793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler short b0 = ibeta[0], b1 = ibeta[1]; 151793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 152793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int src_index0 = mad24(sy0, src_step, mad24(sx0, TSIZE, src_offset)), 153793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler src_index1 = mad24(sy1, src_step, mad24(sx0, TSIZE, src_offset)); 154793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT data0 = convertToWT(loadpix(srcptr + src_index0)); 155793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT data1 = convertToWT(loadpix(srcptr + src_index0 + TSIZE)); 156793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT data2 = convertToWT(loadpix(srcptr + src_index1)); 157793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT data3 = convertToWT(loadpix(srcptr + src_index1 + TSIZE)); 158793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 159793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT val = ( (((data0 * a0 + data1 * a1) >> 4) * b0) >> 16) + 160793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler ( (((data2 * a0 + data3 * a1) >> 4) * b1) >> 16); 161793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 162793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(convertToDT((val + 2) >> 2), 163793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset))); 164793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 165793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 166793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 167793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined INTER_LINEAR 168793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 169793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, 170793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, 171793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float ifx, float ify) 172793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 173793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dx = get_global_id(0); 174793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dy = get_global_id(1); 175793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 176793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (dx < dst_cols && dy < dst_rows) 177793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 178793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f); 179793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int x = floor(sx), y = floor(sy); 180793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 181793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float u = sx - x, v = sy - y; 182793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 183793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if ( x<0 ) x=0,u=0; 184793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if ( x>=src_cols ) x=src_cols-1,u=0; 185793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if ( y<0 ) y=0,v=0; 186793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if ( y>=src_rows ) y=src_rows-1,v=0; 187793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 188793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int y_ = INC(y, src_rows); 189793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int x_ = INC(x, src_cols); 190793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 191793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if depth <= 4 192793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler u = u * INTER_RESIZE_COEF_SCALE; 193793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler v = v * INTER_RESIZE_COEF_SCALE; 194793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 195793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int U = rint(u); 196793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int V = rint(v); 197793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int U1 = rint(INTER_RESIZE_COEF_SCALE - u); 198793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int V1 = rint(INTER_RESIZE_COEF_SCALE - v); 199793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 200793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)))); 201793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset)))); 202793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset)))); 203793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset)))); 204793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 205793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT val = mul24((WT)mul24(U1, V1), data0) + mul24((WT)mul24(U, V1), data1) + 206793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler mul24((WT)mul24(U1, V), data2) + mul24((WT)mul24(U, V), data3); 207793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 208793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler T uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS); 209793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else 210793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float u1 = 1.f - u; 211793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float v1 = 1.f - v; 212793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)))); 213793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset)))); 214793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset)))); 215793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset)))); 216793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 217793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler T uval = u1 * v1 * data0 + u * v1 * data1 + u1 * v *data2 + u * v *data3; 218793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 219793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(uval, dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset))); 220793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 221793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 222793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 223793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined INTER_NEAREST 224793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 225793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, 226793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, 227793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float ifx, float ify) 228793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 229793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dx = get_global_id(0); 230793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dy = get_global_id(1); 231793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 232793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (dx < dst_cols && dy < dst_rows) 233793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 234793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float s1 = dx * ifx; 235793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float s2 = dy * ify; 236793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int sx = min(convert_int_rtz(s1), src_cols - 1); 237793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int sy = min(convert_int_rtz(s2), src_rows - 1); 238793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 239793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(loadpix(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))), 240793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset))); 241793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 242793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 243793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 244793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#elif defined INTER_AREA 245793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 246793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifdef INTER_AREA_FAST 247793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 248793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, 249793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols) 250793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 251793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dx = get_global_id(0); 252793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dy = get_global_id(1); 253793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 254793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (dx < dst_cols && dy < dst_rows) 255793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 256793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dst_index = mad24(dy, dst_step, dst_offset); 257793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 258793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int sx = XSCALE * dx; 259793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int sy = YSCALE * dy; 260793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WTV sum = (WTV)(0); 261793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 262793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #pragma unroll 263793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int py = 0; py < YSCALE; ++py) 264793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 265793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int y = min(sy + py, src_rows - 1); 266793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int src_index = mad24(y, src_step, src_offset); 267793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #pragma unroll 268793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int px = 0; px < XSCALE; ++px) 269793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 270793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int x = min(sx + px, src_cols - 1); 271793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum += convertToWTV(loadpix(src + src_index + x*TSIZE)); 272793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 273793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 274793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 275793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(convertToT(convertToWT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index)); 276793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 277793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 278793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 279793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#else 280793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 281793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler__kernel void resizeAREA(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, 282793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, 283793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float ifx, float ify, __global const int * ofs_tab, 284793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const int * map_tab, __global const float * alpha_tab) 285793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 286793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dx = get_global_id(0); 287793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dy = get_global_id(1); 288793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 289793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (dx < dst_cols && dy < dst_rows) 290793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 291793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int dst_index = mad24(dy, dst_step, dst_offset); 292793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 293793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const int * xmap_tab = map_tab; 294793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1)); 295793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const float * xalpha_tab = alpha_tab; 296793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1)); 297793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const int * xofs_tab = ofs_tab; 298793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1); 299793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 300793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1]; 301793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1]; 302793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 303793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1]; 304793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1]; 305793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 306793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WTV sum = (WTV)(0), buf; 307793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int src_index = mad24(sy0, src_step, src_offset); 308793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 309793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk) 310793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 311793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WTV beta = (WTV)(yalpha_tab[yk]); 312793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler buf = (WTV)(0); 313793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 314793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk) 315793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 316793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler WTV alpha = (WTV)(xalpha_tab[xk]); 317793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler buf += convertToWTV(loadpix(src + mad24(sx, TSIZE, src_index))) * alpha; 318793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 319793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum += buf * beta; 320793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 321793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 322793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler storepix(convertToT(sum), dst + mad24(dx, TSIZE, dst_index)); 323793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 324793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 325793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 326793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 327793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 328793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 329