1793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler/*M///////////////////////////////////////////////////////////////////////////////////////
2793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
3793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
5793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  By downloading, copying, installing or using the software you agree to this license.
6793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  If you do not agree to this license, do not download, install,
7793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  copy or use the software.
8793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
9793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
10793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//                           License Agreement
11793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//                For Open Source Computer Vision Library
12793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
13793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Third party copyrights are property of their respective owners.
16793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
17793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Redistribution and use in source and binary forms, with or without modification,
18793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// are permitted provided that the following conditions are met:
19793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
20793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//   * Redistribution's of source code must retain the above copyright notice,
21793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     this list of conditions and the following disclaimer.
22793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
23793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//   * Redistribution's in binary form must reproduce the above copyright notice,
24793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     this list of conditions and the following disclaimer in the documentation
25793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     and/or other materials provided with the distribution.
26793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
27793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//   * The name of the copyright holders may not be used to endorse or promote products
28793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     derived from this software without specific prior written permission.
29793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
30793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// This software is provided by the copyright holders and contributors "as is" and
31793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// any express or implied warranties, including, but not limited to, the implied
32793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// warranties of merchantability and fitness for a particular purpose are disclaimed.
33793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// In no event shall the Intel Corporation or contributors be liable for any direct,
34793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// indirect, incidental, special, exemplary, or consequential damages
35793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// (including, but not limited to, procurement of substitute goods or services;
36793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// loss of use, data, or profits; or business interruption) however caused
37793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and on any theory of liability, whether in contract, strict liability,
38793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// or tort (including negligence or otherwise) arising in any way out of
39793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// the use of this software, even if advised of the possibility of such damage.
40793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
41793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//M*/
42793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
43793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#if !defined CUDA_DISABLER
44793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
45793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "opencv2/core/cuda/common.hpp"
46793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "opencv2/core/cuda/transform.hpp"
47793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "opencv2/core/cuda/functional.hpp"
48793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "opencv2/core/cuda/reduce.hpp"
49793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
50793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslernamespace cv { namespace cuda { namespace device
51793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
52793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    #define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
53793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
54793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    namespace transform_points
55793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
56793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __constant__ float3 crot0;
57793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __constant__ float3 crot1;
58793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __constant__ float3 crot2;
59793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __constant__ float3 ctransl;
60793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
61793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        struct TransformOp : unary_function<float3, float3>
62793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        {
63793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            __device__ __forceinline__ float3 operator()(const float3& p) const
64793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
65793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                return make_float3(
66793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x,
67793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y,
68793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z);
69793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
70793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            __host__ __device__ __forceinline__ TransformOp() {}
71793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            __host__ __device__ __forceinline__ TransformOp(const TransformOp&) {}
72793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        };
73793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
74793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        void call(const PtrStepSz<float3> src, const float* rot,
75793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                  const float* transl, PtrStepSz<float3> dst,
76793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                  cudaStream_t stream)
77793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        {
78793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
79793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
80793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
81793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
82793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cv::cuda::device::transform(src, dst, TransformOp(), WithOutMask(), stream);
83793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        }
84793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    } // namespace transform_points
85793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
86793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    namespace project_points
87793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
88793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __constant__ float3 crot0;
89793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __constant__ float3 crot1;
90793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __constant__ float3 crot2;
91793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __constant__ float3 ctransl;
92793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __constant__ float3 cproj0;
93793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __constant__ float3 cproj1;
94793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
95793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        struct ProjectOp : unary_function<float3, float3>
96793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        {
97793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            __device__ __forceinline__ float2 operator()(const float3& p) const
98793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
99793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                // Rotate and translate in 3D
100793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                float3 t = make_float3(
101793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x,
102793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y,
103793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z);
104793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                // Project on 2D plane
105793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                return make_float2(
106793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        (cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z,
107793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        (cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z);
108793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
109793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            __host__ __device__ __forceinline__ ProjectOp() {}
110793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            __host__ __device__ __forceinline__ ProjectOp(const ProjectOp&) {}
111793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        };
112793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
113793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        void call(const PtrStepSz<float3> src, const float* rot,
114793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                  const float* transl, const float* proj, PtrStepSz<float2> dst,
115793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                  cudaStream_t stream)
116793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        {
117793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
118793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
119793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
120793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
121793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
122793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
123793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cv::cuda::device::transform(src, dst, ProjectOp(), WithOutMask(), stream);
124793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        }
125793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    } // namespace project_points
126793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
127793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    namespace solve_pnp_ransac
128793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
129793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_MAX_NUM_ITERS * 3];
130793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __constant__ float3 ctransl_vectors[SOLVE_PNP_RANSAC_MAX_NUM_ITERS];
131793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
132793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int maxNumIters()
133793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        {
134793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            return SOLVE_PNP_RANSAC_MAX_NUM_ITERS;
135793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        }
136793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
137793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __device__ __forceinline__ float sqr(float x)
138793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        {
139793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            return x * x;
140793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        }
141793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
142793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        template <int BLOCK_SIZE>
143793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        __global__ void computeHypothesisScoresKernel(
144793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                const int num_points, const float3* object, const float2* image,
145793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                const float dist_threshold, int* g_num_inliers)
146793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        {
147793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            const float3* const &rot_mat = crot_matrices + blockIdx.x * 3;
148793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            const float3 &transl_vec = ctransl_vectors[blockIdx.x];
149793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            int num_inliers = 0;
150793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
151793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            for (int i = threadIdx.x; i < num_points; i += blockDim.x)
152793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
153793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                float3 p = object[i];
154793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                p = make_float3(
155793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        rot_mat[0].x * p.x + rot_mat[0].y * p.y + rot_mat[0].z * p.z + transl_vec.x,
156793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        rot_mat[1].x * p.x + rot_mat[1].y * p.y + rot_mat[1].z * p.z + transl_vec.y,
157793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        rot_mat[2].x * p.x + rot_mat[2].y * p.y + rot_mat[2].z * p.z + transl_vec.z);
158793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                p.x /= p.z;
159793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                p.y /= p.z;
160793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                float2 image_p = image[i];
161793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (sqr(p.x - image_p.x) + sqr(p.y - image_p.y) < dist_threshold)
162793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    ++num_inliers;
163793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
164793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
165793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            __shared__ int s_num_inliers[BLOCK_SIZE];
166793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            reduce<BLOCK_SIZE>(s_num_inliers, num_inliers, threadIdx.x, plus<int>());
167793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
168793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            if (threadIdx.x == 0)
169793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                g_num_inliers[blockIdx.x] = num_inliers;
170793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        }
171793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
172793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        void computeHypothesisScores(
173793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                const int num_hypotheses, const int num_points, const float* rot_matrices,
174793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                const float3* transl_vectors, const float3* object, const float2* image,
175793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                const float dist_threshold, int* hypothesis_scores)
176793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        {
177793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall(cudaMemcpyToSymbol(crot_matrices, rot_matrices, num_hypotheses * 3 * sizeof(float3)));
178793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall(cudaMemcpyToSymbol(ctransl_vectors, transl_vectors, num_hypotheses * sizeof(float3)));
179793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
180793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            dim3 threads(256);
181793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            dim3 grid(num_hypotheses);
182793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
183793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            computeHypothesisScoresKernel<256><<<grid, threads>>>(
184793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    num_points, object, image, dist_threshold, hypothesis_scores);
185793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall( cudaGetLastError() );
186793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
187793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            cudaSafeCall( cudaDeviceSynchronize() );
188793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        }
189793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    } // namespace solvepnp_ransac
190793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}}} // namespace cv { namespace cuda { namespace cudev
191793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
192793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
193793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif /* CUDA_DISABLER */
194