1/*M/////////////////////////////////////////////////////////////////////////////////////// 2// 3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4// 5// By downloading, copying, installing or using the software you agree to this license. 6// If you do not agree to this license, do not download, install, 7// copy or use the software. 8// 9// 10// Intel License Agreement 11// For Open Source Computer Vision Library 12// 13// Copyright (C) 2000, Intel Corporation, all rights reserved. 14// Copyright (C) 2014, Itseez Inc., all rights reserved. 15// Third party copyrights are property of their respective owners. 16// 17// Redistribution and use in source and binary forms, with or without modification, 18// are permitted provided that the following conditions are met: 19// 20// * Redistribution's of source code must retain the above copyright notice, 21// this list of conditions and the following disclaimer. 22// 23// * Redistribution's in binary form must reproduce the above copyright notice, 24// this list of conditions and the following disclaimer in the documentation 25// and/or other materials provided with the distribution. 26// 27// * The name of Intel Corporation may not be used to endorse or promote products 28// derived from this software without specific prior written permission. 29// 30// This software is provided by the copyright holders and contributors "as is" and 31// any express or implied warranties, including, but not limited to, the implied 32// warranties of merchantability and fitness for a particular purpose are disclaimed. 33// In no event shall the Intel Corporation or contributors be liable for any direct, 34// indirect, incidental, special, exemplary, or consequential damages 35// (including, but not limited to, procurement of substitute goods or services; 36// loss of use, data, or profits; or business interruption) however caused 37// and on any theory of liability, whether in contract, strict liability, 38// or tort (including negligence or otherwise) arising in any way out of 39// the use of this software, even if advised of the possibility of such damage. 40// 41//M*/ 42 43#include "precomp.hpp" 44#include "opencl_kernels_imgproc.hpp" 45 46 47#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 48#define USE_IPP_CANNY 1 49#else 50#undef USE_IPP_CANNY 51#endif 52 53 54namespace cv 55{ 56 57#ifdef USE_IPP_CANNY 58static bool ippCanny(const Mat& _src, Mat& _dst, float low, float high) 59{ 60 int size = 0, size1 = 0; 61 IppiSize roi = { _src.cols, _src.rows }; 62 63 if (ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size) < 0) 64 return false; 65 if (ippiFilterSobelHorizGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size1) < 0) 66 return false; 67 size = std::max(size, size1); 68 69 if (ippiCannyGetSize(roi, &size1) < 0) 70 return false; 71 size = std::max(size, size1); 72 73 AutoBuffer<uchar> buf(size + 64); 74 uchar* buffer = alignPtr((uchar*)buf, 32); 75 76 Mat _dx(_src.rows, _src.cols, CV_16S); 77 if( ippiFilterSobelNegVertBorder_8u16s_C1R(_src.ptr(), (int)_src.step, 78 _dx.ptr<short>(), (int)_dx.step, roi, 79 ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 ) 80 return false; 81 82 Mat _dy(_src.rows, _src.cols, CV_16S); 83 if( ippiFilterSobelHorizBorder_8u16s_C1R(_src.ptr(), (int)_src.step, 84 _dy.ptr<short>(), (int)_dy.step, roi, 85 ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 ) 86 return false; 87 88 if( ippiCanny_16s8u_C1R(_dx.ptr<short>(), (int)_dx.step, 89 _dy.ptr<short>(), (int)_dy.step, 90 _dst.ptr(), (int)_dst.step, roi, low, high, buffer) < 0 ) 91 return false; 92 return true; 93} 94#endif 95 96#ifdef HAVE_OPENCL 97 98static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float high_thresh, 99 int aperture_size, bool L2gradient, int cn, const Size & size) 100{ 101 UMat map; 102 103 const ocl::Device &dev = ocl::Device::getDefault(); 104 int max_wg_size = (int)dev.maxWorkGroupSize(); 105 106 int lSizeX = 32; 107 int lSizeY = max_wg_size / 32; 108 109 if (lSizeY == 0) 110 { 111 lSizeX = 16; 112 lSizeY = max_wg_size / 16; 113 } 114 if (lSizeY == 0) 115 { 116 lSizeY = 1; 117 } 118 119 if (L2gradient) 120 { 121 low_thresh = std::min(32767.0f, low_thresh); 122 high_thresh = std::min(32767.0f, high_thresh); 123 124 if (low_thresh > 0) 125 low_thresh *= low_thresh; 126 if (high_thresh > 0) 127 high_thresh *= high_thresh; 128 } 129 int low = cvFloor(low_thresh), high = cvFloor(high_thresh); 130 131 if (aperture_size == 3 && !_src.isSubmatrix()) 132 { 133 /* 134 stage1_with_sobel: 135 Sobel operator 136 Calc magnitudes 137 Non maxima suppression 138 Double thresholding 139 */ 140 char cvt[40]; 141 ocl::Kernel with_sobel("stage1_with_sobel", ocl::imgproc::canny_oclsrc, 142 format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_floatN=%s -D floatN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s", 143 cn, ocl::memopTypeToStr(_src.depth()), 144 ocl::convertTypeStr(_src.depth(), CV_32F, cn, cvt), 145 ocl::typeToStr(CV_MAKE_TYPE(CV_32F, cn)), 146 lSizeX, lSizeY, 147 L2gradient ? " -D L2GRAD" : "")); 148 if (with_sobel.empty()) 149 return false; 150 151 UMat src = _src.getUMat(); 152 map.create(size, CV_32S); 153 with_sobel.args(ocl::KernelArg::ReadOnly(src), 154 ocl::KernelArg::WriteOnlyNoSize(map), 155 (float) low, (float) high); 156 157 size_t globalsize[2] = { size.width, size.height }, 158 localsize[2] = { lSizeX, lSizeY }; 159 160 if (!with_sobel.run(2, globalsize, localsize, false)) 161 return false; 162 } 163 else 164 { 165 /* 166 stage1_without_sobel: 167 Calc magnitudes 168 Non maxima suppression 169 Double thresholding 170 */ 171 UMat dx, dy; 172 Sobel(_src, dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE); 173 Sobel(_src, dy, CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE); 174 175 ocl::Kernel without_sobel("stage1_without_sobel", ocl::imgproc::canny_oclsrc, 176 format("-D WITHOUT_SOBEL -D cn=%d -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s", 177 cn, lSizeX, lSizeY, L2gradient ? " -D L2GRAD" : "")); 178 if (without_sobel.empty()) 179 return false; 180 181 map.create(size, CV_32S); 182 without_sobel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy), 183 ocl::KernelArg::WriteOnly(map), 184 low, high); 185 186 size_t globalsize[2] = { size.width, size.height }, 187 localsize[2] = { lSizeX, lSizeY }; 188 189 if (!without_sobel.run(2, globalsize, localsize, false)) 190 return false; 191 } 192 193 int PIX_PER_WI = 8; 194 /* 195 stage2: 196 hysteresis (add weak edges if they are connected with strong edges) 197 */ 198 199 int sizey = lSizeY / PIX_PER_WI; 200 if (sizey == 0) 201 sizey = 1; 202 203 size_t globalsize[2] = { size.width, (size.height + PIX_PER_WI - 1) / PIX_PER_WI }, localsize[2] = { lSizeX, sizey }; 204 205 ocl::Kernel edgesHysteresis("stage2_hysteresis", ocl::imgproc::canny_oclsrc, 206 format("-D STAGE2 -D PIX_PER_WI=%d -D LOCAL_X=%d -D LOCAL_Y=%d", 207 PIX_PER_WI, lSizeX, sizey)); 208 209 if (edgesHysteresis.empty()) 210 return false; 211 212 edgesHysteresis.args(ocl::KernelArg::ReadWrite(map)); 213 if (!edgesHysteresis.run(2, globalsize, localsize, false)) 214 return false; 215 216 // get edges 217 218 ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc, 219 format("-D GET_EDGES -D PIX_PER_WI=%d", PIX_PER_WI)); 220 if (getEdgesKernel.empty()) 221 return false; 222 223 _dst.create(size, CV_8UC1); 224 UMat dst = _dst.getUMat(); 225 226 getEdgesKernel.args(ocl::KernelArg::ReadOnly(map), ocl::KernelArg::WriteOnlyNoSize(dst)); 227 228 return getEdgesKernel.run(2, globalsize, NULL, false); 229} 230 231#endif 232 233#ifdef HAVE_TBB 234 235// Queue with peaks that will processed serially. 236static tbb::concurrent_queue<uchar*> borderPeaks; 237 238class tbbCanny 239{ 240public: 241 tbbCanny(const Range _boundaries, const Mat& _src, uchar* _map, int _low, 242 int _high, int _aperture_size, bool _L2gradient) 243 : boundaries(_boundaries), src(_src), map(_map), low(_low), high(_high), 244 aperture_size(_aperture_size), L2gradient(_L2gradient) 245 {} 246 247 // This parallel version of Canny algorithm splits the src image in threadsNumber horizontal slices. 248 // The first row of each slice contains the last row of the previous slice and 249 // the last row of each slice contains the first row of the next slice 250 // so that each slice is independent and no mutexes are required. 251 void operator()() const 252 { 253#if CV_SSE2 254 bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2); 255#endif 256 257 const int type = src.type(), cn = CV_MAT_CN(type); 258 259 Mat dx, dy; 260 261 ptrdiff_t mapstep = src.cols + 2; 262 263 // In sobel transform we calculate ksize2 extra lines for the first and last rows of each slice 264 // because IPPDerivSobel expects only isolated ROIs, in contrast with the opencv version which 265 // uses the pixels outside of the ROI to form a border. 266 uchar ksize2 = aperture_size / 2; 267 268 if (boundaries.start == 0 && boundaries.end == src.rows) 269 { 270 Mat tempdx(boundaries.end - boundaries.start + 2, src.cols, CV_16SC(cn)); 271 Mat tempdy(boundaries.end - boundaries.start + 2, src.cols, CV_16SC(cn)); 272 273 memset(tempdx.ptr<short>(0), 0, cn * src.cols*sizeof(short)); 274 memset(tempdy.ptr<short>(0), 0, cn * src.cols*sizeof(short)); 275 memset(tempdx.ptr<short>(tempdx.rows - 1), 0, cn * src.cols*sizeof(short)); 276 memset(tempdy.ptr<short>(tempdy.rows - 1), 0, cn * src.cols*sizeof(short)); 277 278 Sobel(src, tempdx.rowRange(1, tempdx.rows - 1), CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE); 279 Sobel(src, tempdy.rowRange(1, tempdy.rows - 1), CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE); 280 281 dx = tempdx; 282 dy = tempdy; 283 } 284 else if (boundaries.start == 0) 285 { 286 Mat tempdx(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn)); 287 Mat tempdy(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn)); 288 289 memset(tempdx.ptr<short>(0), 0, cn * src.cols*sizeof(short)); 290 memset(tempdy.ptr<short>(0), 0, cn * src.cols*sizeof(short)); 291 292 Sobel(src.rowRange(boundaries.start, boundaries.end + 1 + ksize2), tempdx.rowRange(1, tempdx.rows), 293 CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE); 294 Sobel(src.rowRange(boundaries.start, boundaries.end + 1 + ksize2), tempdy.rowRange(1, tempdy.rows), 295 CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE); 296 297 dx = tempdx.rowRange(0, tempdx.rows - ksize2); 298 dy = tempdy.rowRange(0, tempdy.rows - ksize2); 299 } 300 else if (boundaries.end == src.rows) 301 { 302 Mat tempdx(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn)); 303 Mat tempdy(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn)); 304 305 memset(tempdx.ptr<short>(tempdx.rows - 1), 0, cn * src.cols*sizeof(short)); 306 memset(tempdy.ptr<short>(tempdy.rows - 1), 0, cn * src.cols*sizeof(short)); 307 308 Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end), tempdx.rowRange(0, tempdx.rows - 1), 309 CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE); 310 Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end), tempdy.rowRange(0, tempdy.rows - 1), 311 CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE); 312 313 dx = tempdx.rowRange(ksize2, tempdx.rows); 314 dy = tempdy.rowRange(ksize2, tempdy.rows); 315 } 316 else 317 { 318 Mat tempdx(boundaries.end - boundaries.start + 2 + 2*ksize2, src.cols, CV_16SC(cn)); 319 Mat tempdy(boundaries.end - boundaries.start + 2 + 2*ksize2, src.cols, CV_16SC(cn)); 320 321 Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end + 1 + ksize2), tempdx, 322 CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE); 323 Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end + 1 + ksize2), tempdy, 324 CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE); 325 326 dx = tempdx.rowRange(ksize2, tempdx.rows - ksize2); 327 dy = tempdy.rowRange(ksize2, tempdy.rows - ksize2); 328 } 329 330 int maxsize = std::max(1 << 10, src.cols * (boundaries.end - boundaries.start) / 10); 331 std::vector<uchar*> stack(maxsize); 332 uchar **stack_top = &stack[0]; 333 uchar **stack_bottom = &stack[0]; 334 335 AutoBuffer<uchar> buffer(cn * mapstep * 3 * sizeof(int)); 336 337 int* mag_buf[3]; 338 mag_buf[0] = (int*)(uchar*)buffer; 339 mag_buf[1] = mag_buf[0] + mapstep*cn; 340 mag_buf[2] = mag_buf[1] + mapstep*cn; 341 342 // calculate magnitude and angle of gradient, perform non-maxima suppression. 343 // fill the map with one of the following values: 344 // 0 - the pixel might belong to an edge 345 // 1 - the pixel can not belong to an edge 346 // 2 - the pixel does belong to an edge 347 for (int i = boundaries.start - 1; i <= boundaries.end; i++) 348 { 349 int* _norm = mag_buf[(i > boundaries.start) - (i == boundaries.start - 1) + 1] + 1; 350 351 short* _dx = dx.ptr<short>(i - boundaries.start + 1); 352 short* _dy = dy.ptr<short>(i - boundaries.start + 1); 353 354 if (!L2gradient) 355 { 356 int j = 0, width = src.cols * cn; 357#if CV_SSE2 358 if (haveSSE2) 359 { 360 __m128i v_zero = _mm_setzero_si128(); 361 for ( ; j <= width - 8; j += 8) 362 { 363 __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j)); 364 __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j)); 365 v_dx = _mm_max_epi16(v_dx, _mm_sub_epi16(v_zero, v_dx)); 366 v_dy = _mm_max_epi16(v_dy, _mm_sub_epi16(v_zero, v_dy)); 367 368 __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx, v_zero), _mm_unpacklo_epi16(v_dy, v_zero)); 369 _mm_storeu_si128((__m128i *)(_norm + j), v_norm); 370 371 v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx, v_zero), _mm_unpackhi_epi16(v_dy, v_zero)); 372 _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm); 373 } 374 } 375#elif CV_NEON 376 for ( ; j <= width - 8; j += 8) 377 { 378 int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j); 379 vst1q_s32(_norm + j, vaddq_s32(vabsq_s32(vmovl_s16(vget_low_s16(v_dx))), 380 vabsq_s32(vmovl_s16(vget_low_s16(v_dy))))); 381 vst1q_s32(_norm + j + 4, vaddq_s32(vabsq_s32(vmovl_s16(vget_high_s16(v_dx))), 382 vabsq_s32(vmovl_s16(vget_high_s16(v_dy))))); 383 } 384#endif 385 for ( ; j < width; ++j) 386 _norm[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j])); 387 } 388 else 389 { 390 int j = 0, width = src.cols * cn; 391#if CV_SSE2 392 if (haveSSE2) 393 { 394 for ( ; j <= width - 8; j += 8) 395 { 396 __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j)); 397 __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j)); 398 399 __m128i v_dx_ml = _mm_mullo_epi16(v_dx, v_dx), v_dx_mh = _mm_mulhi_epi16(v_dx, v_dx); 400 __m128i v_dy_ml = _mm_mullo_epi16(v_dy, v_dy), v_dy_mh = _mm_mulhi_epi16(v_dy, v_dy); 401 402 __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx_ml, v_dx_mh), _mm_unpacklo_epi16(v_dy_ml, v_dy_mh)); 403 _mm_storeu_si128((__m128i *)(_norm + j), v_norm); 404 405 v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx_ml, v_dx_mh), _mm_unpackhi_epi16(v_dy_ml, v_dy_mh)); 406 _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm); 407 } 408 } 409#elif CV_NEON 410 for ( ; j <= width - 8; j += 8) 411 { 412 int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j); 413 int16x4_t v_dxp = vget_low_s16(v_dx), v_dyp = vget_low_s16(v_dy); 414 int32x4_t v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp); 415 vst1q_s32(_norm + j, v_dst); 416 417 v_dxp = vget_high_s16(v_dx), v_dyp = vget_high_s16(v_dy); 418 v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp); 419 vst1q_s32(_norm + j + 4, v_dst); 420 } 421#endif 422 for ( ; j < width; ++j) 423 _norm[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j]; 424 } 425 426 if (cn > 1) 427 { 428 for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn) 429 { 430 int maxIdx = jn; 431 for(int k = 1; k < cn; ++k) 432 if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k; 433 _norm[j] = _norm[maxIdx]; 434 _dx[j] = _dx[maxIdx]; 435 _dy[j] = _dy[maxIdx]; 436 } 437 } 438 _norm[-1] = _norm[src.cols] = 0; 439 440 // at the very beginning we do not have a complete ring 441 // buffer of 3 magnitude rows for non-maxima suppression 442 if (i <= boundaries.start) 443 continue; 444 445 uchar* _map = map + mapstep*i + 1; 446 _map[-1] = _map[src.cols] = 1; 447 448 int* _mag = mag_buf[1] + 1; // take the central row 449 ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1]; 450 ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1]; 451 452 const short* _x = dx.ptr<short>(i - boundaries.start); 453 const short* _y = dy.ptr<short>(i - boundaries.start); 454 455 if ((stack_top - stack_bottom) + src.cols > maxsize) 456 { 457 int sz = (int)(stack_top - stack_bottom); 458 maxsize = std::max(maxsize * 3/2, sz + src.cols); 459 stack.resize(maxsize); 460 stack_bottom = &stack[0]; 461 stack_top = stack_bottom + sz; 462 } 463 464#define CANNY_PUSH(d) *(d) = uchar(2), *stack_top++ = (d) 465#define CANNY_POP(d) (d) = *--stack_top 466 467 int prev_flag = 0; 468 bool canny_push = false; 469 for (int j = 0; j < src.cols; j++) 470 { 471 #define CANNY_SHIFT 15 472 const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5); 473 474 int m = _mag[j]; 475 476 if (m > low) 477 { 478 int xs = _x[j]; 479 int ys = _y[j]; 480 int x = std::abs(xs); 481 int y = std::abs(ys) << CANNY_SHIFT; 482 483 int tg22x = x * TG22; 484 485 if (y < tg22x) 486 { 487 if (m > _mag[j-1] && m >= _mag[j+1]) canny_push = true; 488 } 489 else 490 { 491 int tg67x = tg22x + (x << (CANNY_SHIFT+1)); 492 if (y > tg67x) 493 { 494 if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) canny_push = true; 495 } 496 else 497 { 498 int s = (xs ^ ys) < 0 ? -1 : 1; 499 if (m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) canny_push = true; 500 } 501 } 502 } 503 if (!canny_push) 504 { 505 prev_flag = 0; 506 _map[j] = uchar(1); 507 continue; 508 } 509 else 510 { 511 // _map[j-mapstep] is short-circuited at the start because previous thread is 512 // responsible for initializing it. 513 if (!prev_flag && m > high && (i <= boundaries.start+1 || _map[j-mapstep] != 2) ) 514 { 515 CANNY_PUSH(_map + j); 516 prev_flag = 1; 517 } 518 else 519 _map[j] = 0; 520 521 canny_push = false; 522 } 523 } 524 525 // scroll the ring buffer 526 _mag = mag_buf[0]; 527 mag_buf[0] = mag_buf[1]; 528 mag_buf[1] = mag_buf[2]; 529 mag_buf[2] = _mag; 530 } 531 532 // now track the edges (hysteresis thresholding) 533 while (stack_top > stack_bottom) 534 { 535 if ((stack_top - stack_bottom) + 8 > maxsize) 536 { 537 int sz = (int)(stack_top - stack_bottom); 538 maxsize = maxsize * 3/2; 539 stack.resize(maxsize); 540 stack_bottom = &stack[0]; 541 stack_top = stack_bottom + sz; 542 } 543 544 uchar* m; 545 CANNY_POP(m); 546 547 // Stops thresholding from expanding to other slices by sending pixels in the borders of each 548 // slice in a queue to be serially processed later. 549 if ( (m < map + (boundaries.start + 2) * mapstep) || (m >= map + boundaries.end * mapstep) ) 550 { 551 borderPeaks.push(m); 552 continue; 553 } 554 555 if (!m[-1]) CANNY_PUSH(m - 1); 556 if (!m[1]) CANNY_PUSH(m + 1); 557 if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1); 558 if (!m[-mapstep]) CANNY_PUSH(m - mapstep); 559 if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1); 560 if (!m[mapstep-1]) CANNY_PUSH(m + mapstep - 1); 561 if (!m[mapstep]) CANNY_PUSH(m + mapstep); 562 if (!m[mapstep+1]) CANNY_PUSH(m + mapstep + 1); 563 } 564 } 565 566private: 567 const Range boundaries; 568 const Mat& src; 569 uchar* map; 570 int low; 571 int high; 572 int aperture_size; 573 bool L2gradient; 574}; 575 576#endif 577 578} // namespace cv 579 580void cv::Canny( InputArray _src, OutputArray _dst, 581 double low_thresh, double high_thresh, 582 int aperture_size, bool L2gradient ) 583{ 584 const int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 585 const Size size = _src.size(); 586 587 CV_Assert( depth == CV_8U ); 588 _dst.create(size, CV_8U); 589 590 if (!L2gradient && (aperture_size & CV_CANNY_L2_GRADIENT) == CV_CANNY_L2_GRADIENT) 591 { 592 // backward compatibility 593 aperture_size &= ~CV_CANNY_L2_GRADIENT; 594 L2gradient = true; 595 } 596 597 if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7))) 598 CV_Error(CV_StsBadFlag, "Aperture size should be odd"); 599 600 if (low_thresh > high_thresh) 601 std::swap(low_thresh, high_thresh); 602 603 CV_OCL_RUN(_dst.isUMat() && (cn == 1 || cn == 3), 604 ocl_Canny(_src, _dst, (float)low_thresh, (float)high_thresh, aperture_size, L2gradient, cn, size)) 605 606 Mat src = _src.getMat(), dst = _dst.getMat(); 607 608#ifdef HAVE_TEGRA_OPTIMIZATION 609 if (tegra::useTegra() && tegra::canny(src, dst, low_thresh, high_thresh, aperture_size, L2gradient)) 610 return; 611#endif 612 613#ifdef USE_IPP_CANNY 614 CV_IPP_CHECK() 615 { 616 if( aperture_size == 3 && !L2gradient && 1 == cn ) 617 { 618 if (ippCanny(src, dst, (float)low_thresh, (float)high_thresh)) 619 { 620 CV_IMPL_ADD(CV_IMPL_IPP); 621 return; 622 } 623 setIppErrorStatus(); 624 } 625 } 626#endif 627 628#ifdef HAVE_TBB 629 630if (L2gradient) 631{ 632 low_thresh = std::min(32767.0, low_thresh); 633 high_thresh = std::min(32767.0, high_thresh); 634 635 if (low_thresh > 0) low_thresh *= low_thresh; 636 if (high_thresh > 0) high_thresh *= high_thresh; 637} 638int low = cvFloor(low_thresh); 639int high = cvFloor(high_thresh); 640 641ptrdiff_t mapstep = src.cols + 2; 642AutoBuffer<uchar> buffer((src.cols+2)*(src.rows+2)); 643 644uchar* map = (uchar*)buffer; 645memset(map, 1, mapstep); 646memset(map + mapstep*(src.rows + 1), 1, mapstep); 647 648int threadsNumber = tbb::task_scheduler_init::default_num_threads(); 649int grainSize = src.rows / threadsNumber; 650 651// Make a fallback for pictures with too few rows. 652uchar ksize2 = aperture_size / 2; 653int minGrainSize = 1 + ksize2; 654int maxGrainSize = src.rows - 2 - 2*ksize2; 655if ( !( minGrainSize <= grainSize && grainSize <= maxGrainSize ) ) 656{ 657 threadsNumber = 1; 658 grainSize = src.rows; 659} 660 661tbb::task_group g; 662 663for (int i = 0; i < threadsNumber; ++i) 664{ 665 if (i < threadsNumber - 1) 666 g.run(tbbCanny(Range(i * grainSize, (i + 1) * grainSize), src, map, low, high, aperture_size, L2gradient)); 667 else 668 g.run(tbbCanny(Range(i * grainSize, src.rows), src, map, low, high, aperture_size, L2gradient)); 669} 670 671g.wait(); 672 673#define CANNY_PUSH_SERIAL(d) *(d) = uchar(2), borderPeaks.push(d) 674 675// now track the edges (hysteresis thresholding) 676uchar* m; 677while (borderPeaks.try_pop(m)) 678{ 679 if (!m[-1]) CANNY_PUSH_SERIAL(m - 1); 680 if (!m[1]) CANNY_PUSH_SERIAL(m + 1); 681 if (!m[-mapstep-1]) CANNY_PUSH_SERIAL(m - mapstep - 1); 682 if (!m[-mapstep]) CANNY_PUSH_SERIAL(m - mapstep); 683 if (!m[-mapstep+1]) CANNY_PUSH_SERIAL(m - mapstep + 1); 684 if (!m[mapstep-1]) CANNY_PUSH_SERIAL(m + mapstep - 1); 685 if (!m[mapstep]) CANNY_PUSH_SERIAL(m + mapstep); 686 if (!m[mapstep+1]) CANNY_PUSH_SERIAL(m + mapstep + 1); 687} 688 689#else 690 691 Mat dx(src.rows, src.cols, CV_16SC(cn)); 692 Mat dy(src.rows, src.cols, CV_16SC(cn)); 693 694 Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE); 695 Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE); 696 697 if (L2gradient) 698 { 699 low_thresh = std::min(32767.0, low_thresh); 700 high_thresh = std::min(32767.0, high_thresh); 701 702 if (low_thresh > 0) low_thresh *= low_thresh; 703 if (high_thresh > 0) high_thresh *= high_thresh; 704 } 705 int low = cvFloor(low_thresh); 706 int high = cvFloor(high_thresh); 707 708 ptrdiff_t mapstep = src.cols + 2; 709 AutoBuffer<uchar> buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int)); 710 711 int* mag_buf[3]; 712 mag_buf[0] = (int*)(uchar*)buffer; 713 mag_buf[1] = mag_buf[0] + mapstep*cn; 714 mag_buf[2] = mag_buf[1] + mapstep*cn; 715 memset(mag_buf[0], 0, /* cn* */mapstep*sizeof(int)); 716 717 uchar* map = (uchar*)(mag_buf[2] + mapstep*cn); 718 memset(map, 1, mapstep); 719 memset(map + mapstep*(src.rows + 1), 1, mapstep); 720 721 int maxsize = std::max(1 << 10, src.cols * src.rows / 10); 722 std::vector<uchar*> stack(maxsize); 723 uchar **stack_top = &stack[0]; 724 uchar **stack_bottom = &stack[0]; 725 726 /* sector numbers 727 (Top-Left Origin) 728 729 1 2 3 730 * * * 731 * * * 732 0*******0 733 * * * 734 * * * 735 3 2 1 736 */ 737 738 #define CANNY_PUSH(d) *(d) = uchar(2), *stack_top++ = (d) 739 #define CANNY_POP(d) (d) = *--stack_top 740 741#if CV_SSE2 742 bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2); 743#endif 744 745 // calculate magnitude and angle of gradient, perform non-maxima suppression. 746 // fill the map with one of the following values: 747 // 0 - the pixel might belong to an edge 748 // 1 - the pixel can not belong to an edge 749 // 2 - the pixel does belong to an edge 750 for (int i = 0; i <= src.rows; i++) 751 { 752 int* _norm = mag_buf[(i > 0) + 1] + 1; 753 if (i < src.rows) 754 { 755 short* _dx = dx.ptr<short>(i); 756 short* _dy = dy.ptr<short>(i); 757 758 if (!L2gradient) 759 { 760 int j = 0, width = src.cols * cn; 761#if CV_SSE2 762 if (haveSSE2) 763 { 764 __m128i v_zero = _mm_setzero_si128(); 765 for ( ; j <= width - 8; j += 8) 766 { 767 __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j)); 768 __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j)); 769 v_dx = _mm_max_epi16(v_dx, _mm_sub_epi16(v_zero, v_dx)); 770 v_dy = _mm_max_epi16(v_dy, _mm_sub_epi16(v_zero, v_dy)); 771 772 __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx, v_zero), _mm_unpacklo_epi16(v_dy, v_zero)); 773 _mm_storeu_si128((__m128i *)(_norm + j), v_norm); 774 775 v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx, v_zero), _mm_unpackhi_epi16(v_dy, v_zero)); 776 _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm); 777 } 778 } 779#elif CV_NEON 780 for ( ; j <= width - 8; j += 8) 781 { 782 int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j); 783 vst1q_s32(_norm + j, vaddq_s32(vabsq_s32(vmovl_s16(vget_low_s16(v_dx))), 784 vabsq_s32(vmovl_s16(vget_low_s16(v_dy))))); 785 vst1q_s32(_norm + j + 4, vaddq_s32(vabsq_s32(vmovl_s16(vget_high_s16(v_dx))), 786 vabsq_s32(vmovl_s16(vget_high_s16(v_dy))))); 787 } 788#endif 789 for ( ; j < width; ++j) 790 _norm[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j])); 791 } 792 else 793 { 794 int j = 0, width = src.cols * cn; 795#if CV_SSE2 796 if (haveSSE2) 797 { 798 for ( ; j <= width - 8; j += 8) 799 { 800 __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j)); 801 __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j)); 802 803 __m128i v_dx_ml = _mm_mullo_epi16(v_dx, v_dx), v_dx_mh = _mm_mulhi_epi16(v_dx, v_dx); 804 __m128i v_dy_ml = _mm_mullo_epi16(v_dy, v_dy), v_dy_mh = _mm_mulhi_epi16(v_dy, v_dy); 805 806 __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx_ml, v_dx_mh), _mm_unpacklo_epi16(v_dy_ml, v_dy_mh)); 807 _mm_storeu_si128((__m128i *)(_norm + j), v_norm); 808 809 v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx_ml, v_dx_mh), _mm_unpackhi_epi16(v_dy_ml, v_dy_mh)); 810 _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm); 811 } 812 } 813#elif CV_NEON 814 for ( ; j <= width - 8; j += 8) 815 { 816 int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j); 817 int16x4_t v_dxp = vget_low_s16(v_dx), v_dyp = vget_low_s16(v_dy); 818 int32x4_t v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp); 819 vst1q_s32(_norm + j, v_dst); 820 821 v_dxp = vget_high_s16(v_dx), v_dyp = vget_high_s16(v_dy); 822 v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp); 823 vst1q_s32(_norm + j + 4, v_dst); 824 } 825#endif 826 for ( ; j < width; ++j) 827 _norm[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j]; 828 } 829 830 if (cn > 1) 831 { 832 for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn) 833 { 834 int maxIdx = jn; 835 for(int k = 1; k < cn; ++k) 836 if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k; 837 _norm[j] = _norm[maxIdx]; 838 _dx[j] = _dx[maxIdx]; 839 _dy[j] = _dy[maxIdx]; 840 } 841 } 842 _norm[-1] = _norm[src.cols] = 0; 843 } 844 else 845 memset(_norm-1, 0, /* cn* */mapstep*sizeof(int)); 846 847 // at the very beginning we do not have a complete ring 848 // buffer of 3 magnitude rows for non-maxima suppression 849 if (i == 0) 850 continue; 851 852 uchar* _map = map + mapstep*i + 1; 853 _map[-1] = _map[src.cols] = 1; 854 855 int* _mag = mag_buf[1] + 1; // take the central row 856 ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1]; 857 ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1]; 858 859 const short* _x = dx.ptr<short>(i-1); 860 const short* _y = dy.ptr<short>(i-1); 861 862 if ((stack_top - stack_bottom) + src.cols > maxsize) 863 { 864 int sz = (int)(stack_top - stack_bottom); 865 maxsize = std::max(maxsize * 3/2, sz + src.cols); 866 stack.resize(maxsize); 867 stack_bottom = &stack[0]; 868 stack_top = stack_bottom + sz; 869 } 870 871 int prev_flag = 0; 872 for (int j = 0; j < src.cols; j++) 873 { 874 #define CANNY_SHIFT 15 875 const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5); 876 877 int m = _mag[j]; 878 879 if (m > low) 880 { 881 int xs = _x[j]; 882 int ys = _y[j]; 883 int x = std::abs(xs); 884 int y = std::abs(ys) << CANNY_SHIFT; 885 886 int tg22x = x * TG22; 887 888 if (y < tg22x) 889 { 890 if (m > _mag[j-1] && m >= _mag[j+1]) goto __ocv_canny_push; 891 } 892 else 893 { 894 int tg67x = tg22x + (x << (CANNY_SHIFT+1)); 895 if (y > tg67x) 896 { 897 if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) goto __ocv_canny_push; 898 } 899 else 900 { 901 int s = (xs ^ ys) < 0 ? -1 : 1; 902 if (m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) goto __ocv_canny_push; 903 } 904 } 905 } 906 prev_flag = 0; 907 _map[j] = uchar(1); 908 continue; 909__ocv_canny_push: 910 if (!prev_flag && m > high && _map[j-mapstep] != 2) 911 { 912 CANNY_PUSH(_map + j); 913 prev_flag = 1; 914 } 915 else 916 _map[j] = 0; 917 } 918 919 // scroll the ring buffer 920 _mag = mag_buf[0]; 921 mag_buf[0] = mag_buf[1]; 922 mag_buf[1] = mag_buf[2]; 923 mag_buf[2] = _mag; 924 } 925 926 // now track the edges (hysteresis thresholding) 927 while (stack_top > stack_bottom) 928 { 929 uchar* m; 930 if ((stack_top - stack_bottom) + 8 > maxsize) 931 { 932 int sz = (int)(stack_top - stack_bottom); 933 maxsize = maxsize * 3/2; 934 stack.resize(maxsize); 935 stack_bottom = &stack[0]; 936 stack_top = stack_bottom + sz; 937 } 938 939 CANNY_POP(m); 940 941 if (!m[-1]) CANNY_PUSH(m - 1); 942 if (!m[1]) CANNY_PUSH(m + 1); 943 if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1); 944 if (!m[-mapstep]) CANNY_PUSH(m - mapstep); 945 if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1); 946 if (!m[mapstep-1]) CANNY_PUSH(m + mapstep - 1); 947 if (!m[mapstep]) CANNY_PUSH(m + mapstep); 948 if (!m[mapstep+1]) CANNY_PUSH(m + mapstep + 1); 949 } 950 951#endif 952 953 // the final pass, form the final image 954 const uchar* pmap = map + mapstep + 1; 955 uchar* pdst = dst.ptr(); 956 for (int i = 0; i < src.rows; i++, pmap += mapstep, pdst += dst.step) 957 { 958 for (int j = 0; j < src.cols; j++) 959 pdst[j] = (uchar)-(pmap[j] >> 1); 960 } 961} 962 963void cvCanny( const CvArr* image, CvArr* edges, double threshold1, 964 double threshold2, int aperture_size ) 965{ 966 cv::Mat src = cv::cvarrToMat(image), dst = cv::cvarrToMat(edges); 967 CV_Assert( src.size == dst.size && src.depth() == CV_8U && dst.type() == CV_8U ); 968 969 cv::Canny(src, dst, threshold1, threshold2, aperture_size & 255, 970 (aperture_size & CV_CANNY_L2_GRADIENT) != 0); 971} 972 973/* End of file. */ 974