1/*M/////////////////////////////////////////////////////////////////////////////////////// 2// 3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4// 5// By downloading, copying, installing or using the software you agree to this license. 6// If you do not agree to this license, do not download, install, 7// copy or use the software. 8// 9// 10// License Agreement 11// For Open Source Computer Vision Library 12// 13// Copyright (C) 2014, Itseez Inc., all rights reserved. 14// Third party copyrights are property of their respective owners. 15// 16// Redistribution and use in source and binary forms, with or without modification, 17// are permitted provided that the following conditions are met: 18// 19// * Redistribution's of source code must retain the above copyright notice, 20// this list of conditions and the following disclaimer. 21// 22// * Redistribution's in binary form must reproduce the above copyright notice, 23// this list of conditions and the following disclaimer in the documentation 24// and/or other materials provided with the distribution. 25// 26// * The name of the copyright holders may not be used to endorse or promote products 27// derived from this software without specific prior written permission. 28// 29// This software is provided by the copyright holders and contributors "as is" and 30// any express or implied warranties, including, but not limited to, the implied 31// warranties of merchantability and fitness for a particular purpose are disclaimed. 32// In no event shall the Intel Corporation or contributors be liable for any direct, 33// indirect, incidental, special, exemplary, or consequential damages 34// (including, but not limited to, procurement of substitute goods or services; 35// loss of use, data, or profits; or business interruption) however caused 36// and on any theory of liability, whether in contract, strict liability, 37// or tort (including negligence or otherwise) arising in any way out of 38// the use of this software, even if advised of the possibility of such damage. 39// 40//M*/ 41 42#include "precomp.hpp" 43#include "opencl_kernels_core.hpp" 44 45///////////////////////////////// UMat implementation /////////////////////////////// 46 47namespace cv { 48 49// it should be a prime number for the best hash function 50enum { UMAT_NLOCKS = 31 }; 51static Mutex umatLocks[UMAT_NLOCKS]; 52 53UMatData::UMatData(const MatAllocator* allocator) 54{ 55 prevAllocator = currAllocator = allocator; 56 urefcount = refcount = 0; 57 data = origdata = 0; 58 size = 0; 59 flags = 0; 60 handle = 0; 61 userdata = 0; 62 allocatorFlags_ = 0; 63} 64 65UMatData::~UMatData() 66{ 67 prevAllocator = currAllocator = 0; 68 urefcount = refcount = 0; 69 data = origdata = 0; 70 size = 0; 71 flags = 0; 72 handle = 0; 73 userdata = 0; 74 allocatorFlags_ = 0; 75} 76 77void UMatData::lock() 78{ 79 umatLocks[(size_t)(void*)this % UMAT_NLOCKS].lock(); 80} 81 82void UMatData::unlock() 83{ 84 umatLocks[(size_t)(void*)this % UMAT_NLOCKS].unlock(); 85} 86 87 88MatAllocator* UMat::getStdAllocator() 89{ 90#ifdef HAVE_OPENCL 91 if( ocl::haveOpenCL() && ocl::useOpenCL() ) 92 return ocl::getOpenCLAllocator(); 93#endif 94 return Mat::getStdAllocator(); 95} 96 97void swap( UMat& a, UMat& b ) 98{ 99 std::swap(a.flags, b.flags); 100 std::swap(a.dims, b.dims); 101 std::swap(a.rows, b.rows); 102 std::swap(a.cols, b.cols); 103 std::swap(a.allocator, b.allocator); 104 std::swap(a.u, b.u); 105 std::swap(a.offset, b.offset); 106 107 std::swap(a.size.p, b.size.p); 108 std::swap(a.step.p, b.step.p); 109 std::swap(a.step.buf[0], b.step.buf[0]); 110 std::swap(a.step.buf[1], b.step.buf[1]); 111 112 if( a.step.p == b.step.buf ) 113 { 114 a.step.p = a.step.buf; 115 a.size.p = &a.rows; 116 } 117 118 if( b.step.p == a.step.buf ) 119 { 120 b.step.p = b.step.buf; 121 b.size.p = &b.rows; 122 } 123} 124 125 126static inline void setSize( UMat& m, int _dims, const int* _sz, 127 const size_t* _steps, bool autoSteps=false ) 128{ 129 CV_Assert( 0 <= _dims && _dims <= CV_MAX_DIM ); 130 if( m.dims != _dims ) 131 { 132 if( m.step.p != m.step.buf ) 133 { 134 fastFree(m.step.p); 135 m.step.p = m.step.buf; 136 m.size.p = &m.rows; 137 } 138 if( _dims > 2 ) 139 { 140 m.step.p = (size_t*)fastMalloc(_dims*sizeof(m.step.p[0]) + (_dims+1)*sizeof(m.size.p[0])); 141 m.size.p = (int*)(m.step.p + _dims) + 1; 142 m.size.p[-1] = _dims; 143 m.rows = m.cols = -1; 144 } 145 } 146 147 m.dims = _dims; 148 if( !_sz ) 149 return; 150 151 size_t esz = CV_ELEM_SIZE(m.flags), total = esz; 152 int i; 153 for( i = _dims-1; i >= 0; i-- ) 154 { 155 int s = _sz[i]; 156 CV_Assert( s >= 0 ); 157 m.size.p[i] = s; 158 159 if( _steps ) 160 m.step.p[i] = i < _dims-1 ? _steps[i] : esz; 161 else if( autoSteps ) 162 { 163 m.step.p[i] = total; 164 int64 total1 = (int64)total*s; 165 if( (uint64)total1 != (size_t)total1 ) 166 CV_Error( CV_StsOutOfRange, "The total matrix size does not fit to \"size_t\" type" ); 167 total = (size_t)total1; 168 } 169 } 170 171 if( _dims == 1 ) 172 { 173 m.dims = 2; 174 m.cols = 1; 175 m.step[1] = esz; 176 } 177} 178 179static void updateContinuityFlag(UMat& m) 180{ 181 int i, j; 182 for( i = 0; i < m.dims; i++ ) 183 { 184 if( m.size[i] > 1 ) 185 break; 186 } 187 188 for( j = m.dims-1; j > i; j-- ) 189 { 190 if( m.step[j]*m.size[j] < m.step[j-1] ) 191 break; 192 } 193 194 uint64 total = (uint64)m.step[0]*m.size[0]; 195 if( j <= i && total == (size_t)total ) 196 m.flags |= UMat::CONTINUOUS_FLAG; 197 else 198 m.flags &= ~UMat::CONTINUOUS_FLAG; 199} 200 201 202static void finalizeHdr(UMat& m) 203{ 204 updateContinuityFlag(m); 205 int d = m.dims; 206 if( d > 2 ) 207 m.rows = m.cols = -1; 208} 209 210UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const 211{ 212 UMat hdr; 213 if(!data) 214 return hdr; 215 UMatData* temp_u = u; 216 if(!temp_u) 217 { 218 MatAllocator *a = allocator, *a0 = getStdAllocator(); 219 if(!a) 220 a = a0; 221 temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags); 222 temp_u->refcount = 1; 223 } 224 UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); // TODO result is not checked 225 hdr.flags = flags; 226 setSize(hdr, dims, size.p, step.p); 227 finalizeHdr(hdr); 228 hdr.u = temp_u; 229 hdr.offset = data - datastart; 230 hdr.addref(); 231 return hdr; 232} 233 234void UMat::create(int d, const int* _sizes, int _type, UMatUsageFlags _usageFlags) 235{ 236 this->usageFlags = _usageFlags; 237 238 int i; 239 CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes); 240 _type = CV_MAT_TYPE(_type); 241 242 if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() ) 243 { 244 if( d == 2 && rows == _sizes[0] && cols == _sizes[1] ) 245 return; 246 for( i = 0; i < d; i++ ) 247 if( size[i] != _sizes[i] ) 248 break; 249 if( i == d && (d > 1 || size[1] == 1)) 250 return; 251 } 252 253 release(); 254 if( d == 0 ) 255 return; 256 flags = (_type & CV_MAT_TYPE_MASK) | MAGIC_VAL; 257 setSize(*this, d, _sizes, 0, true); 258 offset = 0; 259 260 if( total() > 0 ) 261 { 262 MatAllocator *a = allocator, *a0 = getStdAllocator(); 263 if(!a) 264 a = a0; 265 try 266 { 267 u = a->allocate(dims, size, _type, 0, step.p, 0, usageFlags); 268 CV_Assert(u != 0); 269 } 270 catch(...) 271 { 272 if(a != a0) 273 u = a0->allocate(dims, size, _type, 0, step.p, 0, usageFlags); 274 CV_Assert(u != 0); 275 } 276 CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) ); 277 } 278 279 finalizeHdr(*this); 280 addref(); 281} 282 283void UMat::copySize(const UMat& m) 284{ 285 setSize(*this, m.dims, 0, 0); 286 for( int i = 0; i < dims; i++ ) 287 { 288 size[i] = m.size[i]; 289 step[i] = m.step[i]; 290 } 291} 292 293 294UMat::~UMat() 295{ 296 release(); 297 if( step.p != step.buf ) 298 fastFree(step.p); 299} 300 301void UMat::deallocate() 302{ 303 u->currAllocator->deallocate(u); 304 u = NULL; 305} 306 307 308UMat::UMat(const UMat& m, const Range& _rowRange, const Range& _colRange) 309 : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows) 310{ 311 CV_Assert( m.dims >= 2 ); 312 if( m.dims > 2 ) 313 { 314 AutoBuffer<Range> rs(m.dims); 315 rs[0] = _rowRange; 316 rs[1] = _colRange; 317 for( int i = 2; i < m.dims; i++ ) 318 rs[i] = Range::all(); 319 *this = m(rs); 320 return; 321 } 322 323 *this = m; 324 if( _rowRange != Range::all() && _rowRange != Range(0,rows) ) 325 { 326 CV_Assert( 0 <= _rowRange.start && _rowRange.start <= _rowRange.end && _rowRange.end <= m.rows ); 327 rows = _rowRange.size(); 328 offset += step*_rowRange.start; 329 flags |= SUBMATRIX_FLAG; 330 } 331 332 if( _colRange != Range::all() && _colRange != Range(0,cols) ) 333 { 334 CV_Assert( 0 <= _colRange.start && _colRange.start <= _colRange.end && _colRange.end <= m.cols ); 335 cols = _colRange.size(); 336 offset += _colRange.start*elemSize(); 337 flags &= cols < m.cols ? ~CONTINUOUS_FLAG : -1; 338 flags |= SUBMATRIX_FLAG; 339 } 340 341 if( rows == 1 ) 342 flags |= CONTINUOUS_FLAG; 343 344 if( rows <= 0 || cols <= 0 ) 345 { 346 release(); 347 rows = cols = 0; 348 } 349} 350 351 352UMat::UMat(const UMat& m, const Rect& roi) 353 : flags(m.flags), dims(2), rows(roi.height), cols(roi.width), 354 allocator(m.allocator), usageFlags(m.usageFlags), u(m.u), offset(m.offset + roi.y*m.step[0]), size(&rows) 355{ 356 CV_Assert( m.dims <= 2 ); 357 flags &= roi.width < m.cols ? ~CONTINUOUS_FLAG : -1; 358 flags |= roi.height == 1 ? CONTINUOUS_FLAG : 0; 359 360 size_t esz = CV_ELEM_SIZE(flags); 361 offset += roi.x*esz; 362 CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && 363 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows ); 364 if( u ) 365 CV_XADD(&(u->urefcount), 1); 366 if( roi.width < m.cols || roi.height < m.rows ) 367 flags |= SUBMATRIX_FLAG; 368 369 step[0] = m.step[0]; step[1] = esz; 370 371 if( rows <= 0 || cols <= 0 ) 372 { 373 release(); 374 rows = cols = 0; 375 } 376} 377 378 379UMat::UMat(const UMat& m, const Range* ranges) 380 : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows) 381{ 382 int i, d = m.dims; 383 384 CV_Assert(ranges); 385 for( i = 0; i < d; i++ ) 386 { 387 Range r = ranges[i]; 388 CV_Assert( r == Range::all() || (0 <= r.start && r.start < r.end && r.end <= m.size[i]) ); 389 } 390 *this = m; 391 for( i = 0; i < d; i++ ) 392 { 393 Range r = ranges[i]; 394 if( r != Range::all() && r != Range(0, size.p[i])) 395 { 396 size.p[i] = r.end - r.start; 397 offset += r.start*step.p[i]; 398 flags |= SUBMATRIX_FLAG; 399 } 400 } 401 updateContinuityFlag(*this); 402} 403 404UMat UMat::diag(int d) const 405{ 406 CV_Assert( dims <= 2 ); 407 UMat m = *this; 408 size_t esz = elemSize(); 409 int len; 410 411 if( d >= 0 ) 412 { 413 len = std::min(cols - d, rows); 414 m.offset += esz*d; 415 } 416 else 417 { 418 len = std::min(rows + d, cols); 419 m.offset -= step[0]*d; 420 } 421 CV_DbgAssert( len > 0 ); 422 423 m.size[0] = m.rows = len; 424 m.size[1] = m.cols = 1; 425 m.step[0] += (len > 1 ? esz : 0); 426 427 if( m.rows > 1 ) 428 m.flags &= ~CONTINUOUS_FLAG; 429 else 430 m.flags |= CONTINUOUS_FLAG; 431 432 if( size() != Size(1,1) ) 433 m.flags |= SUBMATRIX_FLAG; 434 435 return m; 436} 437 438void UMat::locateROI( Size& wholeSize, Point& ofs ) const 439{ 440 CV_Assert( dims <= 2 && step[0] > 0 ); 441 size_t esz = elemSize(), minstep; 442 ptrdiff_t delta1 = (ptrdiff_t)offset, delta2 = (ptrdiff_t)u->size; 443 444 if( delta1 == 0 ) 445 ofs.x = ofs.y = 0; 446 else 447 { 448 ofs.y = (int)(delta1/step[0]); 449 ofs.x = (int)((delta1 - step[0]*ofs.y)/esz); 450 CV_DbgAssert( offset == (size_t)(ofs.y*step[0] + ofs.x*esz) ); 451 } 452 minstep = (ofs.x + cols)*esz; 453 wholeSize.height = (int)((delta2 - minstep)/step[0] + 1); 454 wholeSize.height = std::max(wholeSize.height, ofs.y + rows); 455 wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz); 456 wholeSize.width = std::max(wholeSize.width, ofs.x + cols); 457} 458 459 460UMat& UMat::adjustROI( int dtop, int dbottom, int dleft, int dright ) 461{ 462 CV_Assert( dims <= 2 && step[0] > 0 ); 463 Size wholeSize; Point ofs; 464 size_t esz = elemSize(); 465 locateROI( wholeSize, ofs ); 466 int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height); 467 int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width); 468 offset += (row1 - ofs.y)*step + (col1 - ofs.x)*esz; 469 rows = row2 - row1; cols = col2 - col1; 470 size.p[0] = rows; size.p[1] = cols; 471 if( esz*cols == step[0] || rows == 1 ) 472 flags |= CONTINUOUS_FLAG; 473 else 474 flags &= ~CONTINUOUS_FLAG; 475 return *this; 476} 477 478 479UMat UMat::reshape(int new_cn, int new_rows) const 480{ 481 int cn = channels(); 482 UMat hdr = *this; 483 484 if( dims > 2 && new_rows == 0 && new_cn != 0 && size[dims-1]*cn % new_cn == 0 ) 485 { 486 hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT); 487 hdr.step[dims-1] = CV_ELEM_SIZE(hdr.flags); 488 hdr.size[dims-1] = hdr.size[dims-1]*cn / new_cn; 489 return hdr; 490 } 491 492 CV_Assert( dims <= 2 ); 493 494 if( new_cn == 0 ) 495 new_cn = cn; 496 497 int total_width = cols * cn; 498 499 if( (new_cn > total_width || total_width % new_cn != 0) && new_rows == 0 ) 500 new_rows = rows * total_width / new_cn; 501 502 if( new_rows != 0 && new_rows != rows ) 503 { 504 int total_size = total_width * rows; 505 if( !isContinuous() ) 506 CV_Error( CV_BadStep, 507 "The matrix is not continuous, thus its number of rows can not be changed" ); 508 509 if( (unsigned)new_rows > (unsigned)total_size ) 510 CV_Error( CV_StsOutOfRange, "Bad new number of rows" ); 511 512 total_width = total_size / new_rows; 513 514 if( total_width * new_rows != total_size ) 515 CV_Error( CV_StsBadArg, "The total number of matrix elements " 516 "is not divisible by the new number of rows" ); 517 518 hdr.rows = new_rows; 519 hdr.step[0] = total_width * elemSize1(); 520 } 521 522 int new_width = total_width / new_cn; 523 524 if( new_width * new_cn != total_width ) 525 CV_Error( CV_BadNumChannels, 526 "The total width is not divisible by the new number of channels" ); 527 528 hdr.cols = new_width; 529 hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT); 530 hdr.step[1] = CV_ELEM_SIZE(hdr.flags); 531 return hdr; 532} 533 534UMat UMat::diag(const UMat& d) 535{ 536 CV_Assert( d.cols == 1 || d.rows == 1 ); 537 int len = d.rows + d.cols - 1; 538 UMat m(len, len, d.type(), Scalar(0)); 539 UMat md = m.diag(); 540 if( d.cols == 1 ) 541 d.copyTo(md); 542 else 543 transpose(d, md); 544 return m; 545} 546 547int UMat::checkVector(int _elemChannels, int _depth, bool _requireContinuous) const 548{ 549 return (depth() == _depth || _depth <= 0) && 550 (isContinuous() || !_requireContinuous) && 551 ((dims == 2 && (((rows == 1 || cols == 1) && channels() == _elemChannels) || 552 (cols == _elemChannels && channels() == 1))) || 553 (dims == 3 && channels() == 1 && size.p[2] == _elemChannels && (size.p[0] == 1 || size.p[1] == 1) && 554 (isContinuous() || step.p[1] == step.p[2]*size.p[2]))) 555 ? (int)(total()*channels()/_elemChannels) : -1; 556} 557 558UMat UMat::reshape(int _cn, int _newndims, const int* _newsz) const 559{ 560 if(_newndims == dims) 561 { 562 if(_newsz == 0) 563 return reshape(_cn); 564 if(_newndims == 2) 565 return reshape(_cn, _newsz[0]); 566 } 567 568 CV_Error(CV_StsNotImplemented, ""); 569 // TBD 570 return UMat(); 571} 572 573 574Mat UMat::getMat(int accessFlags) const 575{ 576 if(!u) 577 return Mat(); 578 u->currAllocator->map(u, accessFlags | ACCESS_READ); // TODO Support ACCESS_WRITE without unnecessary data transfers 579 CV_Assert(u->data != 0); 580 Mat hdr(dims, size.p, type(), u->data + offset, step.p); 581 hdr.flags = flags; 582 hdr.u = u; 583 hdr.datastart = u->data; 584 hdr.data = u->data + offset; 585 hdr.datalimit = hdr.dataend = u->data + u->size; 586 CV_XADD(&hdr.u->refcount, 1); 587 return hdr; 588} 589 590void* UMat::handle(int accessFlags) const 591{ 592 if( !u ) 593 return 0; 594 595 // check flags: if CPU copy is newer, copy it back to GPU. 596 if( u->deviceCopyObsolete() ) 597 { 598 CV_Assert(u->refcount == 0); 599 u->currAllocator->unmap(u); 600 } 601 602 if ((accessFlags & ACCESS_WRITE) != 0) 603 u->markHostCopyObsolete(true); 604 605 return u->handle; 606} 607 608void UMat::ndoffset(size_t* ofs) const 609{ 610 // offset = step[0]*ofs[0] + step[1]*ofs[1] + step[2]*ofs[2] + ...; 611 size_t val = offset; 612 for( int i = 0; i < dims; i++ ) 613 { 614 size_t s = step.p[i]; 615 ofs[i] = val / s; 616 val -= ofs[i]*s; 617 } 618} 619 620void UMat::copyTo(OutputArray _dst) const 621{ 622 int dtype = _dst.type(); 623 if( _dst.fixedType() && dtype != type() ) 624 { 625 CV_Assert( channels() == CV_MAT_CN(dtype) ); 626 convertTo( _dst, dtype ); 627 return; 628 } 629 630 if( empty() ) 631 { 632 _dst.release(); 633 return; 634 } 635 636 size_t i, sz[CV_MAX_DIM], srcofs[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize(); 637 for( i = 0; i < (size_t)dims; i++ ) 638 sz[i] = size.p[i]; 639 sz[dims-1] *= esz; 640 ndoffset(srcofs); 641 srcofs[dims-1] *= esz; 642 643 _dst.create( dims, size.p, type() ); 644 if( _dst.isUMat() ) 645 { 646 UMat dst = _dst.getUMat(); 647 if( u == dst.u && dst.offset == offset ) 648 return; 649 650 if (u->currAllocator == dst.u->currAllocator) 651 { 652 dst.ndoffset(dstofs); 653 dstofs[dims-1] *= esz; 654 u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false); 655 return; 656 } 657 } 658 659 Mat dst = _dst.getMat(); 660 u->currAllocator->download(u, dst.ptr(), dims, sz, srcofs, step.p, dst.step.p); 661} 662 663void UMat::copyTo(OutputArray _dst, InputArray _mask) const 664{ 665 if( _mask.empty() ) 666 { 667 copyTo(_dst); 668 return; 669 } 670#ifdef HAVE_OPENCL 671 int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype); 672 CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) ); 673 674 if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2) 675 { 676 UMatData * prevu = _dst.getUMat().u; 677 _dst.create( dims, size, type() ); 678 679 UMat dst = _dst.getUMat(); 680 681 bool haveDstUninit = false; 682 if( prevu != dst.u ) // do not leave dst uninitialized 683 haveDstUninit = true; 684 685 String opts = format("-D COPY_TO_MASK -D T1=%s -D scn=%d -D mcn=%d%s", 686 ocl::memopTypeToStr(depth()), cn, mcn, 687 haveDstUninit ? " -D HAVE_DST_UNINIT" : ""); 688 689 ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, opts); 690 if (!k.empty()) 691 { 692 k.args(ocl::KernelArg::ReadOnlyNoSize(*this), 693 ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()), 694 haveDstUninit ? ocl::KernelArg::WriteOnly(dst) : 695 ocl::KernelArg::ReadWrite(dst)); 696 697 size_t globalsize[2] = { cols, rows }; 698 if (k.run(2, globalsize, NULL, false)) 699 { 700 CV_IMPL_ADD(CV_IMPL_OCL); 701 return; 702 } 703 } 704 } 705#endif 706 Mat src = getMat(ACCESS_READ); 707 src.copyTo(_dst, _mask); 708} 709 710void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) const 711{ 712 bool noScale = std::fabs(alpha - 1) < DBL_EPSILON && std::fabs(beta) < DBL_EPSILON; 713 int stype = type(), cn = CV_MAT_CN(stype); 714 715 if( _type < 0 ) 716 _type = _dst.fixedType() ? _dst.type() : stype; 717 else 718 _type = CV_MAKETYPE(CV_MAT_DEPTH(_type), cn); 719 720 int sdepth = CV_MAT_DEPTH(stype), ddepth = CV_MAT_DEPTH(_type); 721 if( sdepth == ddepth && noScale ) 722 { 723 copyTo(_dst); 724 return; 725 } 726#ifdef HAVE_OPENCL 727 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; 728 bool needDouble = sdepth == CV_64F || ddepth == CV_64F; 729 if( dims <= 2 && cn && _dst.isUMat() && ocl::useOpenCL() && 730 ((needDouble && doubleSupport) || !needDouble) ) 731 { 732 int wdepth = std::max(CV_32F, sdepth), rowsPerWI = 4; 733 734 char cvt[2][40]; 735 ocl::Kernel k("convertTo", ocl::core::convert_oclsrc, 736 format("-D srcT=%s -D WT=%s -D dstT=%s -D convertToWT=%s -D convertToDT=%s%s", 737 ocl::typeToStr(sdepth), ocl::typeToStr(wdepth), ocl::typeToStr(ddepth), 738 ocl::convertTypeStr(sdepth, wdepth, 1, cvt[0]), 739 ocl::convertTypeStr(wdepth, ddepth, 1, cvt[1]), 740 doubleSupport ? " -D DOUBLE_SUPPORT" : "")); 741 if (!k.empty()) 742 { 743 UMat src = *this; 744 _dst.create( size(), _type ); 745 UMat dst = _dst.getUMat(); 746 747 float alphaf = (float)alpha, betaf = (float)beta; 748 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), 749 dstarg = ocl::KernelArg::WriteOnly(dst, cn); 750 751 if (wdepth == CV_32F) 752 k.args(srcarg, dstarg, alphaf, betaf, rowsPerWI); 753 else 754 k.args(srcarg, dstarg, alpha, beta, rowsPerWI); 755 756 size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI }; 757 if (k.run(2, globalsize, NULL, false)) 758 { 759 CV_IMPL_ADD(CV_IMPL_OCL); 760 return; 761 } 762 } 763 } 764#endif 765 Mat m = getMat(ACCESS_READ); 766 m.convertTo(_dst, _type, alpha, beta); 767} 768 769UMat& UMat::setTo(InputArray _value, InputArray _mask) 770{ 771 bool haveMask = !_mask.empty(); 772#ifdef HAVE_OPENCL 773 int tp = type(), cn = CV_MAT_CN(tp), d = CV_MAT_DEPTH(tp); 774 775 if( dims <= 2 && cn <= 4 && CV_MAT_DEPTH(tp) < CV_64F && ocl::useOpenCL() ) 776 { 777 Mat value = _value.getMat(); 778 CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) ); 779 int kercn = haveMask || cn == 3 ? cn : std::max(cn, ocl::predictOptimalVectorWidth(*this)), 780 kertp = CV_MAKE_TYPE(d, kercn); 781 782 double buf[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 783 0, 0, 0, 0, 0, 0, 0, 0 }; 784 convertAndUnrollScalar(value, tp, (uchar *)buf, kercn / cn); 785 786 int scalarcn = kercn == 3 ? 4 : kercn, rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1; 787 String opts = format("-D dstT=%s -D rowsPerWI=%d -D dstST=%s -D dstT1=%s -D cn=%d", 788 ocl::memopTypeToStr(kertp), rowsPerWI, 789 ocl::memopTypeToStr(CV_MAKETYPE(d, scalarcn)), 790 ocl::memopTypeToStr(d), kercn); 791 792 ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts); 793 if( !setK.empty() ) 794 { 795 ocl::KernelArg scalararg(0, 0, 0, 0, buf, CV_ELEM_SIZE(d) * scalarcn); 796 UMat mask; 797 798 if( haveMask ) 799 { 800 mask = _mask.getUMat(); 801 CV_Assert( mask.size() == size() && mask.type() == CV_8UC1 ); 802 ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask), 803 dstarg = ocl::KernelArg::ReadWrite(*this); 804 setK.args(maskarg, dstarg, scalararg); 805 } 806 else 807 { 808 ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this, cn, kercn); 809 setK.args(dstarg, scalararg); 810 } 811 812 size_t globalsize[] = { cols * cn / kercn, (rows + rowsPerWI - 1) / rowsPerWI }; 813 if( setK.run(2, globalsize, NULL, false) ) 814 { 815 CV_IMPL_ADD(CV_IMPL_OCL); 816 return *this; 817 } 818 } 819 } 820#endif 821 Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE); 822 m.setTo(_value, _mask); 823 return *this; 824} 825 826UMat& UMat::operator = (const Scalar& s) 827{ 828 setTo(s); 829 return *this; 830} 831 832UMat UMat::t() const 833{ 834 UMat m; 835 transpose(*this, m); 836 return m; 837} 838 839UMat UMat::inv(int method) const 840{ 841 UMat m; 842 invert(*this, m, method); 843 return m; 844} 845 846UMat UMat::mul(InputArray m, double scale) const 847{ 848 UMat dst; 849 multiply(*this, m, dst, scale); 850 return dst; 851} 852 853#ifdef HAVE_OPENCL 854 855static bool ocl_dot( InputArray _src1, InputArray _src2, double & res ) 856{ 857 UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1); 858 859 int type = src1.type(), depth = CV_MAT_DEPTH(type), 860 kercn = ocl::predictOptimalVectorWidth(src1, src2); 861 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; 862 863 if ( !doubleSupport && depth == CV_64F ) 864 return false; 865 866 int dbsize = ocl::Device::getDefault().maxComputeUnits(); 867 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); 868 int ddepth = std::max(CV_32F, depth); 869 870 int wgs2_aligned = 1; 871 while (wgs2_aligned < (int)wgs) 872 wgs2_aligned <<= 1; 873 wgs2_aligned >>= 1; 874 875 char cvt[40]; 876 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, 877 format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT " 878 "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d", 879 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth), 880 ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), 881 ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt), 882 (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", 883 _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "", 884 _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn)); 885 if (k.empty()) 886 return false; 887 888 UMat db(1, dbsize, ddepth); 889 890 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1), 891 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), 892 dbarg = ocl::KernelArg::PtrWriteOnly(db); 893 894 k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg); 895 896 size_t globalsize = dbsize * wgs; 897 if (k.run(1, &globalsize, &wgs, false)) 898 { 899 res = sum(db.getMat(ACCESS_READ))[0]; 900 return true; 901 } 902 return false; 903} 904 905#endif 906 907double UMat::dot(InputArray m) const 908{ 909 CV_Assert(m.sameSize(*this) && m.type() == type()); 910 911#ifdef HAVE_OPENCL 912 double r = 0; 913 CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r) 914#endif 915 916 return getMat(ACCESS_READ).dot(m); 917} 918 919UMat UMat::zeros(int rows, int cols, int type) 920{ 921 return UMat(rows, cols, type, Scalar::all(0)); 922} 923 924UMat UMat::zeros(Size size, int type) 925{ 926 return UMat(size, type, Scalar::all(0)); 927} 928 929UMat UMat::zeros(int ndims, const int* sz, int type) 930{ 931 return UMat(ndims, sz, type, Scalar::all(0)); 932} 933 934UMat UMat::ones(int rows, int cols, int type) 935{ 936 return UMat::ones(Size(cols, rows), type); 937} 938 939UMat UMat::ones(Size size, int type) 940{ 941 return UMat(size, type, Scalar(1)); 942} 943 944UMat UMat::ones(int ndims, const int* sz, int type) 945{ 946 return UMat(ndims, sz, type, Scalar(1)); 947} 948 949UMat UMat::eye(int rows, int cols, int type) 950{ 951 return UMat::eye(Size(cols, rows), type); 952} 953 954UMat UMat::eye(Size size, int type) 955{ 956 UMat m(size, type); 957 setIdentity(m); 958 return m; 959} 960 961} 962 963/* End of file. */ 964