1/*M/////////////////////////////////////////////////////////////////////////////////////// 2// 3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4// 5// By downloading, copying, installing or using the software you agree to this license. 6// If you do not agree to this license, do not download, install, 7// copy or use the software. 8// 9// 10// License Agreement 11// For Open Source Computer Vision Library 12// 13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14// Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15// Third party copyrights are property of their respective owners. 16// 17// Redistribution and use in source and binary forms, with or without modification, 18// are permitted provided that the following conditions are met: 19// 20// * Redistribution's of source code must retain the above copyright notice, 21// this list of conditions and the following disclaimer. 22// 23// * Redistribution's in binary form must reproduce the above copyright notice, 24// this list of conditions and the following disclaimer in the documentation 25// and/or other materials provided with the distribution. 26// 27// * The name of the copyright holders may not be used to endorse or promote products 28// derived from this software without specific prior written permission. 29// 30// This software is provided by the copyright holders and contributors "as is" and 31// any express or implied warranties, including, but not limited to, the implied 32// warranties of merchantability and fitness for a particular purpose are disclaimed. 33// In no event shall the Intel Corporation or contributors be liable for any direct, 34// indirect, incidental, special, exemplary, or consequential damages 35// (including, but not limited to, procurement of substitute goods or services; 36// loss of use, data, or profits; or business interruption) however caused 37// and on any theory of liability, whether in contract, strict liability, 38// or tort (including negligence or otherwise) arising in any way out of 39// the use of this software, even if advised of the possibility of such damage. 40// 41//M*/ 42 43#if !defined CUDA_DISABLER 44 45#include "opencv2/core/cuda/common.hpp" 46#include "opencv2/core/cuda/utility.hpp" 47#include "opencv2/core/cuda/limits.hpp" 48#include "opencv2/core/cuda/vec_distance.hpp" 49#include "opencv2/core/cuda/datamov_utils.hpp" 50 51namespace cv { namespace cuda { namespace device 52{ 53 namespace bf_radius_match 54 { 55 /////////////////////////////////////////////////////////////////////////////// 56 // Match Unrolled 57 58 template <int BLOCK_SIZE, int MAX_DESC_LEN, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask> 59 __global__ void matchUnrolled(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask, 60 PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount) 61 { 62 extern __shared__ int smem[]; 63 64 const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y; 65 const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x; 66 67 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); 68 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE); 69 70 Dist dist; 71 72 #pragma unroll 73 for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i) 74 { 75 const int loadX = threadIdx.x + i * BLOCK_SIZE; 76 77 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; 78 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; 79 80 if (loadX < query.cols) 81 { 82 T val; 83 84 ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val); 85 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val; 86 87 ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val); 88 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val; 89 } 90 91 __syncthreads(); 92 93 #pragma unroll 94 for (int j = 0; j < BLOCK_SIZE; ++j) 95 dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]); 96 97 __syncthreads(); 98 } 99 100 float distVal = (typename Dist::result_type)dist; 101 102 if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx) && distVal < maxDistance) 103 { 104 unsigned int ind = atomicInc(nMatches + queryIdx, (unsigned int) -1); 105 if (ind < maxCount) 106 { 107 bestTrainIdx.ptr(queryIdx)[ind] = trainIdx; 108 if (SAVE_IMG_IDX) bestImgIdx.ptr(queryIdx)[ind] = imgIdx; 109 bestDistance.ptr(queryIdx)[ind] = distVal; 110 } 111 } 112 } 113 114 template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask> 115 void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask, 116 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream) 117 { 118 const dim3 block(BLOCK_SIZE, BLOCK_SIZE); 119 const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE)); 120 121 const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); 122 123 matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask, 124 trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols); 125 cudaSafeCall( cudaGetLastError() ); 126 127 if (stream == 0) 128 cudaSafeCall( cudaDeviceSynchronize() ); 129 } 130 131 template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T> 132 void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks, 133 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 134 cudaStream_t stream) 135 { 136 const dim3 block(BLOCK_SIZE, BLOCK_SIZE); 137 138 const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); 139 140 for (int i = 0; i < n; ++i) 141 { 142 const PtrStepSz<T> train = trains[i]; 143 144 const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE)); 145 146 if (masks != 0 && masks[i].data) 147 { 148 matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]), 149 trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); 150 } 151 else 152 { 153 matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(), 154 trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); 155 } 156 cudaSafeCall( cudaGetLastError() ); 157 } 158 159 if (stream == 0) 160 cudaSafeCall( cudaDeviceSynchronize() ); 161 } 162 163 /////////////////////////////////////////////////////////////////////////////// 164 // Match 165 166 template <int BLOCK_SIZE, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask> 167 __global__ void match(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask, 168 PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount) 169 { 170 extern __shared__ int smem[]; 171 172 const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y; 173 const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x; 174 175 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); 176 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE); 177 178 Dist dist; 179 180 for (int i = 0, endi = (query.cols + BLOCK_SIZE - 1) / BLOCK_SIZE; i < endi; ++i) 181 { 182 const int loadX = threadIdx.x + i * BLOCK_SIZE; 183 184 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; 185 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; 186 187 if (loadX < query.cols) 188 { 189 T val; 190 191 ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val); 192 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val; 193 194 ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val); 195 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val; 196 } 197 198 __syncthreads(); 199 200 #pragma unroll 201 for (int j = 0; j < BLOCK_SIZE; ++j) 202 dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]); 203 204 __syncthreads(); 205 } 206 207 float distVal = (typename Dist::result_type)dist; 208 209 if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx) && distVal < maxDistance) 210 { 211 unsigned int ind = atomicInc(nMatches + queryIdx, (unsigned int) -1); 212 if (ind < maxCount) 213 { 214 bestTrainIdx.ptr(queryIdx)[ind] = trainIdx; 215 if (SAVE_IMG_IDX) bestImgIdx.ptr(queryIdx)[ind] = imgIdx; 216 bestDistance.ptr(queryIdx)[ind] = distVal; 217 } 218 } 219 } 220 221 template <int BLOCK_SIZE, typename Dist, typename T, typename Mask> 222 void match(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask, 223 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 224 cudaStream_t stream) 225 { 226 const dim3 block(BLOCK_SIZE, BLOCK_SIZE); 227 const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE)); 228 229 const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); 230 231 match<BLOCK_SIZE, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask, 232 trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols); 233 cudaSafeCall( cudaGetLastError() ); 234 235 if (stream == 0) 236 cudaSafeCall( cudaDeviceSynchronize() ); 237 } 238 239 template <int BLOCK_SIZE, typename Dist, typename T> 240 void match(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks, 241 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 242 cudaStream_t stream) 243 { 244 const dim3 block(BLOCK_SIZE, BLOCK_SIZE); 245 246 const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); 247 248 for (int i = 0; i < n; ++i) 249 { 250 const PtrStepSz<T> train = trains[i]; 251 252 const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE)); 253 254 if (masks != 0 && masks[i].data) 255 { 256 match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]), 257 trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); 258 } 259 else 260 { 261 match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(), 262 trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); 263 } 264 cudaSafeCall( cudaGetLastError() ); 265 } 266 267 if (stream == 0) 268 cudaSafeCall( cudaDeviceSynchronize() ); 269 } 270 271 /////////////////////////////////////////////////////////////////////////////// 272 // Match dispatcher 273 274 template <typename Dist, typename T, typename Mask> 275 void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask, 276 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 277 cudaStream_t stream) 278 { 279 if (query.cols <= 64) 280 { 281 matchUnrolled<16, 64, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); 282 } 283 else if (query.cols <= 128) 284 { 285 matchUnrolled<16, 128, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); 286 } 287 /*else if (query.cols <= 256) 288 { 289 matchUnrolled<16, 256, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); 290 } 291 else if (query.cols <= 512) 292 { 293 matchUnrolled<16, 512, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); 294 } 295 else if (query.cols <= 1024) 296 { 297 matchUnrolled<16, 1024, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); 298 }*/ 299 else 300 { 301 match<16, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); 302 } 303 } 304 305 template <typename Dist, typename T> 306 void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks, 307 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 308 cudaStream_t stream) 309 { 310 if (query.cols <= 64) 311 { 312 matchUnrolled<16, 64, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); 313 } 314 else if (query.cols <= 128) 315 { 316 matchUnrolled<16, 128, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); 317 } 318 /*else if (query.cols <= 256) 319 { 320 matchUnrolled<16, 256, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); 321 } 322 else if (query.cols <= 512) 323 { 324 matchUnrolled<16, 512, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); 325 } 326 else if (query.cols <= 1024) 327 { 328 matchUnrolled<16, 1024, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); 329 }*/ 330 else 331 { 332 match<16, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); 333 } 334 } 335 336 /////////////////////////////////////////////////////////////////////////////// 337 // Radius Match caller 338 339 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, 340 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 341 cudaStream_t stream) 342 { 343 if (mask.data) 344 { 345 matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask), 346 trainIdx, distance, nMatches, 347 stream); 348 } 349 else 350 { 351 matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(), 352 trainIdx, distance, nMatches, 353 stream); 354 } 355 } 356 357 template void matchL1_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 358 //template void matchL1_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 359 template void matchL1_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 360 template void matchL1_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 361 template void matchL1_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 362 template void matchL1_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 363 364 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, 365 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 366 cudaStream_t stream) 367 { 368 if (mask.data) 369 { 370 matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask), 371 trainIdx, distance, nMatches, 372 stream); 373 } 374 else 375 { 376 matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(), 377 trainIdx, distance, nMatches, 378 stream); 379 } 380 } 381 382 //template void matchL2_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 383 //template void matchL2_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 384 //template void matchL2_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 385 //template void matchL2_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 386 //template void matchL2_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 387 template void matchL2_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 388 389 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, 390 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 391 cudaStream_t stream) 392 { 393 if (mask.data) 394 { 395 matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask), 396 trainIdx, distance, nMatches, 397 stream); 398 } 399 else 400 { 401 matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(), 402 trainIdx, distance, nMatches, 403 stream); 404 } 405 } 406 407 template void matchHamming_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 408 //template void matchHamming_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 409 template void matchHamming_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 410 //template void matchHamming_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 411 template void matchHamming_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 412 413 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, 414 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 415 cudaStream_t stream) 416 { 417 matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks, 418 trainIdx, imgIdx, distance, nMatches, 419 stream); 420 } 421 422 template void matchL1_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 423 //template void matchL1_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 424 template void matchL1_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 425 template void matchL1_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 426 template void matchL1_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 427 template void matchL1_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 428 429 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, 430 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 431 cudaStream_t stream) 432 { 433 matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks, 434 trainIdx, imgIdx, distance, nMatches, 435 stream); 436 } 437 438 //template void matchL2_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 439 //template void matchL2_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 440 //template void matchL2_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 441 //template void matchL2_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 442 //template void matchL2_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 443 template void matchL2_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 444 445 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, 446 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 447 cudaStream_t stream) 448 { 449 matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks, 450 trainIdx, imgIdx, distance, nMatches, 451 stream); 452 } 453 454 template void matchHamming_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 455 //template void matchHamming_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 456 template void matchHamming_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 457 //template void matchHamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 458 template void matchHamming_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 459 } // namespace bf_radius_match 460}}} // namespace cv { namespace cuda { namespace cudev 461 462 463#endif /* CUDA_DISABLER */ 464