1793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler/*M/////////////////////////////////////////////////////////////////////////////////////// 2793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 3793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 5793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// By downloading, copying, installing or using the software you agree to this license. 6793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// If you do not agree to this license, do not download, install, 7793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// copy or use the software. 8793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 9793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 10793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// License Agreement 11793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// For Open Source Computer Vision Library 12793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 13793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2013, OpenCV Foundation, all rights reserved. 16793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Third party copyrights are property of their respective owners. 17793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 18793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Redistribution and use in source and binary forms, with or without modification, 19793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// are permitted provided that the following conditions are met: 20793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 21793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * Redistribution's of source code must retain the above copyright notice, 22793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// this list of conditions and the following disclaimer. 23793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 24793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * Redistribution's in binary form must reproduce the above copyright notice, 25793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// this list of conditions and the following disclaimer in the documentation 26793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and/or other materials provided with the distribution. 27793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 28793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * The name of the copyright holders may not be used to endorse or promote products 29793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// derived from this software without specific prior written permission. 30793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 31793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// This software is provided by the copyright holders and contributors "as is" and 32793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// any express or implied warranties, including, but not limited to, the implied 33793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// warranties of merchantability and fitness for a particular purpose are disclaimed. 34793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// In no event shall the Intel Corporation or contributors be liable for any direct, 35793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// indirect, incidental, special, exemplary, or consequential damages 36793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// (including, but not limited to, procurement of substitute goods or services; 37793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// loss of use, data, or profits; or business interruption) however caused 38793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and on any theory of liability, whether in contract, strict liability, 39793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// or tort (including negligence or otherwise) arising in any way out of 40793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// the use of this software, even if advised of the possibility of such damage. 41793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 42793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//M*/ 43793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 44793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#pragma once 45793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 46793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifndef __OPENCV_CUDEV_GRID_PYR_UP_DETAIL_HPP__ 47793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define __OPENCV_CUDEV_GRID_PYR_UP_DETAIL_HPP__ 48793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 49793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "../../common.hpp" 50793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "../../util/vec_traits.hpp" 51793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "../../util/saturate_cast.hpp" 52793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "../../util/type_traits.hpp" 53793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "../../ptr2d/glob.hpp" 54793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "../../ptr2d/traits.hpp" 55793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 56793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslernamespace cv { namespace cudev { 57793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 58793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslernamespace pyramids_detail 59793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 60793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler template <class SrcPtr, typename DstType> 61793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global__ void pyrUp(const SrcPtr src, GlobPtr<DstType> dst, const int src_rows, const int src_cols, const int dst_rows, const int dst_cols) 62793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 63793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler typedef typename PtrTraits<SrcPtr>::value_type src_type; 64793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler typedef typename VecTraits<src_type>::elem_type src_elem_type; 65793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler typedef typename LargerType<float, src_elem_type>::type work_elem_type; 66793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type; 67793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 68793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int x = blockIdx.x * blockDim.x + threadIdx.x; 69793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int y = blockIdx.y * blockDim.y + threadIdx.y; 70793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 71793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __shared__ work_type s_srcPatch[10][10]; 72793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __shared__ work_type s_dstPatch[20][16]; 73793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 74793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (threadIdx.x < 10 && threadIdx.y < 10) 75793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 76793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1; 77793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1; 78793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 79793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler srcx = ::abs(srcx); 80793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler srcx = ::min(src_cols - 1, srcx); 81793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 82793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler srcy = ::abs(srcy); 83793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler srcy = ::min(src_rows - 1, srcy); 84793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 85793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<work_type>(src(srcy, srcx)); 86793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 87793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 88793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __syncthreads(); 89793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 90793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler work_type sum = VecTraits<work_type>::all(0); 91793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 92793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0); 93793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0); 94793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const bool eveny = ((threadIdx.y & 1) == 0); 95793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int tidx = threadIdx.x; 96793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 97793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (eveny) 98793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 99793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)]; 100793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)]; 101793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx ) >> 1)]; 102793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)]; 103793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)]; 104793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 105793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 106793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum; 107793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 108793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (threadIdx.y < 2) 109793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 110793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = VecTraits<work_type>::all(0); 111793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 112793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (eveny) 113793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 114793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)]; 115793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)]; 116793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)]; 117793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)]; 118793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)]; 119793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 120793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 121793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler s_dstPatch[threadIdx.y][threadIdx.x] = sum; 122793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 123793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 124793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (threadIdx.y > 13) 125793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 126793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = VecTraits<work_type>::all(0); 127793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 128793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (eveny) 129793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 130793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)]; 131793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)]; 132793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)]; 133793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)]; 134793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)]; 135793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 136793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 137793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum; 138793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 139793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 140793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __syncthreads(); 141793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 142793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = VecTraits<work_type>::all(0); 143793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 144793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int tidy = threadIdx.y; 145793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 146793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x]; 147793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][threadIdx.x]; 148793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + 0.375f * s_dstPatch[2 + tidy ][threadIdx.x]; 149793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][threadIdx.x]; 150793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x]; 151793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 152793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (x < dst_cols && y < dst_rows) 153793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler dst(y, x) = saturate_cast<DstType>(4.0f * sum); 154793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 155793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 156793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler template <class SrcPtr, typename DstType> 157793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __host__ void pyrUp(const SrcPtr& src, const GlobPtr<DstType>& dst, int src_rows, int src_cols, int dst_rows, int dst_cols, cudaStream_t stream) 158793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 159793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const dim3 block(16, 16); 160793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const dim3 grid(divUp(dst_cols, block.x), divUp(dst_rows, block.y)); 161793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 162793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler pyrUp<<<grid, block, 0, stream>>>(src, dst, src_rows, src_cols, dst_rows, dst_cols); 163793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler CV_CUDEV_SAFE_CALL( cudaGetLastError() ); 164793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 165793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (stream == 0) 166793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); 167793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 168793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler} 169793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 170793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}} 171793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 172793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif 173