1793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler/*M/////////////////////////////////////////////////////////////////////////////////////// 2793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 3793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 5793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// By downloading, copying, installing or using the software you agree to this license. 6793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// If you do not agree to this license, do not download, install, 7793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// copy or use the software. 8793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 9793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 10793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// License Agreement 11793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// For Open Source Computer Vision Library 12793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 13793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Third party copyrights are property of their respective owners. 16793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 17793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Redistribution and use in source and binary forms, with or without modification, 18793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// are permitted provided that the following conditions are met: 19793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 20793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * Redistribution's of source code must retain the above copyright notice, 21793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// this list of conditions and the following disclaimer. 22793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 23793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * Redistribution's in binary form must reproduce the above copyright notice, 24793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// this list of conditions and the following disclaimer in the documentation 25793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and/or other materials provided with the distribution. 26793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 27793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// * The name of the copyright holders may not be used to endorse or promote products 28793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// derived from this software without specific prior written permission. 29793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 30793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// This software is provided by the copyright holders and contributors "as is" and 31793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// any express or implied warranties, including, but not limited to, the implied 32793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// warranties of merchantability and fitness for a particular purpose are disclaimed. 33793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// In no event shall the Intel Corporation or contributors be liable for any direct, 34793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// indirect, incidental, special, exemplary, or consequential damages 35793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// (including, but not limited to, procurement of substitute goods or services; 36793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// loss of use, data, or profits; or business interruption) however caused 37793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and on any theory of liability, whether in contract, strict liability, 38793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// or tort (including negligence or otherwise) arising in any way out of 39793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// the use of this software, even if advised of the possibility of such damage. 40793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// 41793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//M*/ 42793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 43793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if !defined CUDA_DISABLER 44793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 45793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "opencv2/core/cuda/common.hpp" 46793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "opencv2/core/cuda/transform.hpp" 47793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "opencv2/core/cuda/functional.hpp" 48793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "opencv2/core/cuda/reduce.hpp" 49793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 50793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslernamespace cv { namespace cuda { namespace device 51793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{ 52793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler #define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200 53793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 54793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler namespace transform_points 55793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 56793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant__ float3 crot0; 57793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant__ float3 crot1; 58793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant__ float3 crot2; 59793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant__ float3 ctransl; 60793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 61793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler struct TransformOp : unary_function<float3, float3> 62793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 63793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __device__ __forceinline__ float3 operator()(const float3& p) const 64793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 65793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler return make_float3( 66793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x, 67793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y, 68793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z); 69793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 70793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __host__ __device__ __forceinline__ TransformOp() {} 71793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __host__ __device__ __forceinline__ TransformOp(const TransformOp&) {} 72793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler }; 73793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 74793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler void call(const PtrStepSz<float3> src, const float* rot, 75793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float* transl, PtrStepSz<float3> dst, 76793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaStream_t stream) 77793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 78793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3)); 79793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3)); 80793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3)); 81793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3)); 82793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cv::cuda::device::transform(src, dst, TransformOp(), WithOutMask(), stream); 83793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 84793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } // namespace transform_points 85793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 86793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler namespace project_points 87793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 88793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant__ float3 crot0; 89793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant__ float3 crot1; 90793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant__ float3 crot2; 91793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant__ float3 ctransl; 92793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant__ float3 cproj0; 93793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant__ float3 cproj1; 94793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 95793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler struct ProjectOp : unary_function<float3, float3> 96793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 97793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __device__ __forceinline__ float2 operator()(const float3& p) const 98793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 99793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler // Rotate and translate in 3D 100793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float3 t = make_float3( 101793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x, 102793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y, 103793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z); 104793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler // Project on 2D plane 105793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler return make_float2( 106793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler (cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z, 107793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler (cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z); 108793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 109793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __host__ __device__ __forceinline__ ProjectOp() {} 110793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __host__ __device__ __forceinline__ ProjectOp(const ProjectOp&) {} 111793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler }; 112793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 113793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler void call(const PtrStepSz<float3> src, const float* rot, 114793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float* transl, const float* proj, PtrStepSz<float2> dst, 115793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaStream_t stream) 116793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 117793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3)); 118793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3)); 119793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3)); 120793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3)); 121793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3)); 122793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3)); 123793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cv::cuda::device::transform(src, dst, ProjectOp(), WithOutMask(), stream); 124793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 125793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } // namespace project_points 126793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 127793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler namespace solve_pnp_ransac 128793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 129793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_MAX_NUM_ITERS * 3]; 130793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __constant__ float3 ctransl_vectors[SOLVE_PNP_RANSAC_MAX_NUM_ITERS]; 131793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 132793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int maxNumIters() 133793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 134793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler return SOLVE_PNP_RANSAC_MAX_NUM_ITERS; 135793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 136793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 137793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __device__ __forceinline__ float sqr(float x) 138793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 139793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler return x * x; 140793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 141793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 142793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler template <int BLOCK_SIZE> 143793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __global__ void computeHypothesisScoresKernel( 144793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int num_points, const float3* object, const float2* image, 145793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float dist_threshold, int* g_num_inliers) 146793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 147793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float3* const &rot_mat = crot_matrices + blockIdx.x * 3; 148793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float3 &transl_vec = ctransl_vectors[blockIdx.x]; 149793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler int num_inliers = 0; 150793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 151793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler for (int i = threadIdx.x; i < num_points; i += blockDim.x) 152793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 153793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float3 p = object[i]; 154793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler p = make_float3( 155793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler rot_mat[0].x * p.x + rot_mat[0].y * p.y + rot_mat[0].z * p.z + transl_vec.x, 156793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler rot_mat[1].x * p.x + rot_mat[1].y * p.y + rot_mat[1].z * p.z + transl_vec.y, 157793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler rot_mat[2].x * p.x + rot_mat[2].y * p.y + rot_mat[2].z * p.z + transl_vec.z); 158793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler p.x /= p.z; 159793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler p.y /= p.z; 160793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler float2 image_p = image[i]; 161793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (sqr(p.x - image_p.x) + sqr(p.y - image_p.y) < dist_threshold) 162793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler ++num_inliers; 163793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 164793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 165793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler __shared__ int s_num_inliers[BLOCK_SIZE]; 166793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler reduce<BLOCK_SIZE>(s_num_inliers, num_inliers, threadIdx.x, plus<int>()); 167793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 168793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler if (threadIdx.x == 0) 169793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler g_num_inliers[blockIdx.x] = num_inliers; 170793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 171793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 172793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler void computeHypothesisScores( 173793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const int num_hypotheses, const int num_points, const float* rot_matrices, 174793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float3* transl_vectors, const float3* object, const float2* image, 175793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler const float dist_threshold, int* hypothesis_scores) 176793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler { 177793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall(cudaMemcpyToSymbol(crot_matrices, rot_matrices, num_hypotheses * 3 * sizeof(float3))); 178793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall(cudaMemcpyToSymbol(ctransl_vectors, transl_vectors, num_hypotheses * sizeof(float3))); 179793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 180793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler dim3 threads(256); 181793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler dim3 grid(num_hypotheses); 182793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 183793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler computeHypothesisScoresKernel<256><<<grid, threads>>>( 184793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler num_points, object, image, dist_threshold, hypothesis_scores); 185793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall( cudaGetLastError() ); 186793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 187793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler cudaSafeCall( cudaDeviceSynchronize() ); 188793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } 189793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler } // namespace solvepnp_ransac 190793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}}} // namespace cv { namespace cuda { namespace cudev 191793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 192793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler 193793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif /* CUDA_DISABLER */ 194