1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                           License Agreement
11//                For Open Source Computer Vision Library
12//
13// Copyright (C) 2014, Itseez Inc., all rights reserved.
14// Third party copyrights are property of their respective owners.
15//
16// Redistribution and use in source and binary forms, with or without modification,
17// are permitted provided that the following conditions are met:
18//
19//   * Redistribution's of source code must retain the above copyright notice,
20//     this list of conditions and the following disclaimer.
21//
22//   * Redistribution's in binary form must reproduce the above copyright notice,
23//     this list of conditions and the following disclaimer in the documentation
24//     and/or other materials provided with the distribution.
25//
26//   * The name of the copyright holders may not be used to endorse or promote products
27//     derived from this software without specific prior written permission.
28//
29// This software is provided by the copyright holders and contributors "as is" and
30// any express or implied warranties, including, but not limited to, the implied
31// warranties of merchantability and fitness for a particular purpose are disclaimed.
32// In no event shall the Intel Corporation or contributors be liable for any direct,
33// indirect, incidental, special, exemplary, or consequential damages
34// (including, but not limited to, procurement of substitute goods or services;
35// loss of use, data, or profits; or business interruption) however caused
36// and on any theory of liability, whether in contract, strict liability,
37// or tort (including negligence or otherwise) arising in any way out of
38// the use of this software, even if advised of the possibility of such damage.
39//
40//M*/
41
42#include "precomp.hpp"
43#include "opencl_kernels_core.hpp"
44
45///////////////////////////////// UMat implementation ///////////////////////////////
46
47namespace cv {
48
49// it should be a prime number for the best hash function
50enum { UMAT_NLOCKS = 31 };
51static Mutex umatLocks[UMAT_NLOCKS];
52
53UMatData::UMatData(const MatAllocator* allocator)
54{
55    prevAllocator = currAllocator = allocator;
56    urefcount = refcount = 0;
57    data = origdata = 0;
58    size = 0;
59    flags = 0;
60    handle = 0;
61    userdata = 0;
62    allocatorFlags_ = 0;
63}
64
65UMatData::~UMatData()
66{
67    prevAllocator = currAllocator = 0;
68    urefcount = refcount = 0;
69    data = origdata = 0;
70    size = 0;
71    flags = 0;
72    handle = 0;
73    userdata = 0;
74    allocatorFlags_ = 0;
75}
76
77void UMatData::lock()
78{
79    umatLocks[(size_t)(void*)this % UMAT_NLOCKS].lock();
80}
81
82void UMatData::unlock()
83{
84    umatLocks[(size_t)(void*)this % UMAT_NLOCKS].unlock();
85}
86
87
88MatAllocator* UMat::getStdAllocator()
89{
90#ifdef HAVE_OPENCL
91    if( ocl::haveOpenCL() && ocl::useOpenCL() )
92        return ocl::getOpenCLAllocator();
93#endif
94    return Mat::getStdAllocator();
95}
96
97void swap( UMat& a, UMat& b )
98{
99    std::swap(a.flags, b.flags);
100    std::swap(a.dims, b.dims);
101    std::swap(a.rows, b.rows);
102    std::swap(a.cols, b.cols);
103    std::swap(a.allocator, b.allocator);
104    std::swap(a.u, b.u);
105    std::swap(a.offset, b.offset);
106
107    std::swap(a.size.p, b.size.p);
108    std::swap(a.step.p, b.step.p);
109    std::swap(a.step.buf[0], b.step.buf[0]);
110    std::swap(a.step.buf[1], b.step.buf[1]);
111
112    if( a.step.p == b.step.buf )
113    {
114        a.step.p = a.step.buf;
115        a.size.p = &a.rows;
116    }
117
118    if( b.step.p == a.step.buf )
119    {
120        b.step.p = b.step.buf;
121        b.size.p = &b.rows;
122    }
123}
124
125
126static inline void setSize( UMat& m, int _dims, const int* _sz,
127                            const size_t* _steps, bool autoSteps=false )
128{
129    CV_Assert( 0 <= _dims && _dims <= CV_MAX_DIM );
130    if( m.dims != _dims )
131    {
132        if( m.step.p != m.step.buf )
133        {
134            fastFree(m.step.p);
135            m.step.p = m.step.buf;
136            m.size.p = &m.rows;
137        }
138        if( _dims > 2 )
139        {
140            m.step.p = (size_t*)fastMalloc(_dims*sizeof(m.step.p[0]) + (_dims+1)*sizeof(m.size.p[0]));
141            m.size.p = (int*)(m.step.p + _dims) + 1;
142            m.size.p[-1] = _dims;
143            m.rows = m.cols = -1;
144        }
145    }
146
147    m.dims = _dims;
148    if( !_sz )
149        return;
150
151    size_t esz = CV_ELEM_SIZE(m.flags), total = esz;
152    int i;
153    for( i = _dims-1; i >= 0; i-- )
154    {
155        int s = _sz[i];
156        CV_Assert( s >= 0 );
157        m.size.p[i] = s;
158
159        if( _steps )
160            m.step.p[i] = i < _dims-1 ? _steps[i] : esz;
161        else if( autoSteps )
162        {
163            m.step.p[i] = total;
164            int64 total1 = (int64)total*s;
165            if( (uint64)total1 != (size_t)total1 )
166                CV_Error( CV_StsOutOfRange, "The total matrix size does not fit to \"size_t\" type" );
167            total = (size_t)total1;
168        }
169    }
170
171    if( _dims == 1 )
172    {
173        m.dims = 2;
174        m.cols = 1;
175        m.step[1] = esz;
176    }
177}
178
179static void updateContinuityFlag(UMat& m)
180{
181    int i, j;
182    for( i = 0; i < m.dims; i++ )
183    {
184        if( m.size[i] > 1 )
185            break;
186    }
187
188    for( j = m.dims-1; j > i; j-- )
189    {
190        if( m.step[j]*m.size[j] < m.step[j-1] )
191            break;
192    }
193
194    uint64 total = (uint64)m.step[0]*m.size[0];
195    if( j <= i && total == (size_t)total )
196        m.flags |= UMat::CONTINUOUS_FLAG;
197    else
198        m.flags &= ~UMat::CONTINUOUS_FLAG;
199}
200
201
202static void finalizeHdr(UMat& m)
203{
204    updateContinuityFlag(m);
205    int d = m.dims;
206    if( d > 2 )
207        m.rows = m.cols = -1;
208}
209
210UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
211{
212    UMat hdr;
213    if(!data)
214        return hdr;
215    UMatData* temp_u = u;
216    if(!temp_u)
217    {
218        MatAllocator *a = allocator, *a0 = getStdAllocator();
219        if(!a)
220            a = a0;
221        temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
222        temp_u->refcount = 1;
223    }
224    UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); // TODO result is not checked
225    hdr.flags = flags;
226    setSize(hdr, dims, size.p, step.p);
227    finalizeHdr(hdr);
228    hdr.u = temp_u;
229    hdr.offset = data - datastart;
230    hdr.addref();
231    return hdr;
232}
233
234void UMat::create(int d, const int* _sizes, int _type, UMatUsageFlags _usageFlags)
235{
236    this->usageFlags = _usageFlags;
237
238    int i;
239    CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes);
240    _type = CV_MAT_TYPE(_type);
241
242    if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() )
243    {
244        if( d == 2 && rows == _sizes[0] && cols == _sizes[1] )
245            return;
246        for( i = 0; i < d; i++ )
247            if( size[i] != _sizes[i] )
248                break;
249        if( i == d && (d > 1 || size[1] == 1))
250            return;
251    }
252
253    release();
254    if( d == 0 )
255        return;
256    flags = (_type & CV_MAT_TYPE_MASK) | MAGIC_VAL;
257    setSize(*this, d, _sizes, 0, true);
258    offset = 0;
259
260    if( total() > 0 )
261    {
262        MatAllocator *a = allocator, *a0 = getStdAllocator();
263        if(!a)
264            a = a0;
265        try
266        {
267            u = a->allocate(dims, size, _type, 0, step.p, 0, usageFlags);
268            CV_Assert(u != 0);
269        }
270        catch(...)
271        {
272            if(a != a0)
273                u = a0->allocate(dims, size, _type, 0, step.p, 0, usageFlags);
274            CV_Assert(u != 0);
275        }
276        CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
277    }
278
279    finalizeHdr(*this);
280    addref();
281}
282
283void UMat::copySize(const UMat& m)
284{
285    setSize(*this, m.dims, 0, 0);
286    for( int i = 0; i < dims; i++ )
287    {
288        size[i] = m.size[i];
289        step[i] = m.step[i];
290    }
291}
292
293
294UMat::~UMat()
295{
296    release();
297    if( step.p != step.buf )
298        fastFree(step.p);
299}
300
301void UMat::deallocate()
302{
303    u->currAllocator->deallocate(u);
304    u = NULL;
305}
306
307
308UMat::UMat(const UMat& m, const Range& _rowRange, const Range& _colRange)
309    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
310{
311    CV_Assert( m.dims >= 2 );
312    if( m.dims > 2 )
313    {
314        AutoBuffer<Range> rs(m.dims);
315        rs[0] = _rowRange;
316        rs[1] = _colRange;
317        for( int i = 2; i < m.dims; i++ )
318            rs[i] = Range::all();
319        *this = m(rs);
320        return;
321    }
322
323    *this = m;
324    if( _rowRange != Range::all() && _rowRange != Range(0,rows) )
325    {
326        CV_Assert( 0 <= _rowRange.start && _rowRange.start <= _rowRange.end && _rowRange.end <= m.rows );
327        rows = _rowRange.size();
328        offset += step*_rowRange.start;
329        flags |= SUBMATRIX_FLAG;
330    }
331
332    if( _colRange != Range::all() && _colRange != Range(0,cols) )
333    {
334        CV_Assert( 0 <= _colRange.start && _colRange.start <= _colRange.end && _colRange.end <= m.cols );
335        cols = _colRange.size();
336        offset += _colRange.start*elemSize();
337        flags &= cols < m.cols ? ~CONTINUOUS_FLAG : -1;
338        flags |= SUBMATRIX_FLAG;
339    }
340
341    if( rows == 1 )
342        flags |= CONTINUOUS_FLAG;
343
344    if( rows <= 0 || cols <= 0 )
345    {
346        release();
347        rows = cols = 0;
348    }
349}
350
351
352UMat::UMat(const UMat& m, const Rect& roi)
353    : flags(m.flags), dims(2), rows(roi.height), cols(roi.width),
354    allocator(m.allocator), usageFlags(m.usageFlags), u(m.u), offset(m.offset + roi.y*m.step[0]), size(&rows)
355{
356    CV_Assert( m.dims <= 2 );
357    flags &= roi.width < m.cols ? ~CONTINUOUS_FLAG : -1;
358    flags |= roi.height == 1 ? CONTINUOUS_FLAG : 0;
359
360    size_t esz = CV_ELEM_SIZE(flags);
361    offset += roi.x*esz;
362    CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols &&
363              0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows );
364    if( u )
365        CV_XADD(&(u->urefcount), 1);
366    if( roi.width < m.cols || roi.height < m.rows )
367        flags |= SUBMATRIX_FLAG;
368
369    step[0] = m.step[0]; step[1] = esz;
370
371    if( rows <= 0 || cols <= 0 )
372    {
373        release();
374        rows = cols = 0;
375    }
376}
377
378
379UMat::UMat(const UMat& m, const Range* ranges)
380    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
381{
382    int i, d = m.dims;
383
384    CV_Assert(ranges);
385    for( i = 0; i < d; i++ )
386    {
387        Range r = ranges[i];
388        CV_Assert( r == Range::all() || (0 <= r.start && r.start < r.end && r.end <= m.size[i]) );
389    }
390    *this = m;
391    for( i = 0; i < d; i++ )
392    {
393        Range r = ranges[i];
394        if( r != Range::all() && r != Range(0, size.p[i]))
395        {
396            size.p[i] = r.end - r.start;
397            offset += r.start*step.p[i];
398            flags |= SUBMATRIX_FLAG;
399        }
400    }
401    updateContinuityFlag(*this);
402}
403
404UMat UMat::diag(int d) const
405{
406    CV_Assert( dims <= 2 );
407    UMat m = *this;
408    size_t esz = elemSize();
409    int len;
410
411    if( d >= 0 )
412    {
413        len = std::min(cols - d, rows);
414        m.offset += esz*d;
415    }
416    else
417    {
418        len = std::min(rows + d, cols);
419        m.offset -= step[0]*d;
420    }
421    CV_DbgAssert( len > 0 );
422
423    m.size[0] = m.rows = len;
424    m.size[1] = m.cols = 1;
425    m.step[0] += (len > 1 ? esz : 0);
426
427    if( m.rows > 1 )
428        m.flags &= ~CONTINUOUS_FLAG;
429    else
430        m.flags |= CONTINUOUS_FLAG;
431
432    if( size() != Size(1,1) )
433        m.flags |= SUBMATRIX_FLAG;
434
435    return m;
436}
437
438void UMat::locateROI( Size& wholeSize, Point& ofs ) const
439{
440    CV_Assert( dims <= 2 && step[0] > 0 );
441    size_t esz = elemSize(), minstep;
442    ptrdiff_t delta1 = (ptrdiff_t)offset, delta2 = (ptrdiff_t)u->size;
443
444    if( delta1 == 0 )
445        ofs.x = ofs.y = 0;
446    else
447    {
448        ofs.y = (int)(delta1/step[0]);
449        ofs.x = (int)((delta1 - step[0]*ofs.y)/esz);
450        CV_DbgAssert( offset == (size_t)(ofs.y*step[0] + ofs.x*esz) );
451    }
452    minstep = (ofs.x + cols)*esz;
453    wholeSize.height = (int)((delta2 - minstep)/step[0] + 1);
454    wholeSize.height = std::max(wholeSize.height, ofs.y + rows);
455    wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz);
456    wholeSize.width = std::max(wholeSize.width, ofs.x + cols);
457}
458
459
460UMat& UMat::adjustROI( int dtop, int dbottom, int dleft, int dright )
461{
462    CV_Assert( dims <= 2 && step[0] > 0 );
463    Size wholeSize; Point ofs;
464    size_t esz = elemSize();
465    locateROI( wholeSize, ofs );
466    int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height);
467    int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width);
468    offset += (row1 - ofs.y)*step + (col1 - ofs.x)*esz;
469    rows = row2 - row1; cols = col2 - col1;
470    size.p[0] = rows; size.p[1] = cols;
471    if( esz*cols == step[0] || rows == 1 )
472        flags |= CONTINUOUS_FLAG;
473    else
474        flags &= ~CONTINUOUS_FLAG;
475    return *this;
476}
477
478
479UMat UMat::reshape(int new_cn, int new_rows) const
480{
481    int cn = channels();
482    UMat hdr = *this;
483
484    if( dims > 2 && new_rows == 0 && new_cn != 0 && size[dims-1]*cn % new_cn == 0 )
485    {
486        hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT);
487        hdr.step[dims-1] = CV_ELEM_SIZE(hdr.flags);
488        hdr.size[dims-1] = hdr.size[dims-1]*cn / new_cn;
489        return hdr;
490    }
491
492    CV_Assert( dims <= 2 );
493
494    if( new_cn == 0 )
495        new_cn = cn;
496
497    int total_width = cols * cn;
498
499    if( (new_cn > total_width || total_width % new_cn != 0) && new_rows == 0 )
500        new_rows = rows * total_width / new_cn;
501
502    if( new_rows != 0 && new_rows != rows )
503    {
504        int total_size = total_width * rows;
505        if( !isContinuous() )
506            CV_Error( CV_BadStep,
507            "The matrix is not continuous, thus its number of rows can not be changed" );
508
509        if( (unsigned)new_rows > (unsigned)total_size )
510            CV_Error( CV_StsOutOfRange, "Bad new number of rows" );
511
512        total_width = total_size / new_rows;
513
514        if( total_width * new_rows != total_size )
515            CV_Error( CV_StsBadArg, "The total number of matrix elements "
516                                    "is not divisible by the new number of rows" );
517
518        hdr.rows = new_rows;
519        hdr.step[0] = total_width * elemSize1();
520    }
521
522    int new_width = total_width / new_cn;
523
524    if( new_width * new_cn != total_width )
525        CV_Error( CV_BadNumChannels,
526        "The total width is not divisible by the new number of channels" );
527
528    hdr.cols = new_width;
529    hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT);
530    hdr.step[1] = CV_ELEM_SIZE(hdr.flags);
531    return hdr;
532}
533
534UMat UMat::diag(const UMat& d)
535{
536    CV_Assert( d.cols == 1 || d.rows == 1 );
537    int len = d.rows + d.cols - 1;
538    UMat m(len, len, d.type(), Scalar(0));
539    UMat md = m.diag();
540    if( d.cols == 1 )
541        d.copyTo(md);
542    else
543        transpose(d, md);
544    return m;
545}
546
547int UMat::checkVector(int _elemChannels, int _depth, bool _requireContinuous) const
548{
549    return (depth() == _depth || _depth <= 0) &&
550        (isContinuous() || !_requireContinuous) &&
551        ((dims == 2 && (((rows == 1 || cols == 1) && channels() == _elemChannels) ||
552                        (cols == _elemChannels && channels() == 1))) ||
553        (dims == 3 && channels() == 1 && size.p[2] == _elemChannels && (size.p[0] == 1 || size.p[1] == 1) &&
554         (isContinuous() || step.p[1] == step.p[2]*size.p[2])))
555    ? (int)(total()*channels()/_elemChannels) : -1;
556}
557
558UMat UMat::reshape(int _cn, int _newndims, const int* _newsz) const
559{
560    if(_newndims == dims)
561    {
562        if(_newsz == 0)
563            return reshape(_cn);
564        if(_newndims == 2)
565            return reshape(_cn, _newsz[0]);
566    }
567
568    CV_Error(CV_StsNotImplemented, "");
569    // TBD
570    return UMat();
571}
572
573
574Mat UMat::getMat(int accessFlags) const
575{
576    if(!u)
577        return Mat();
578    u->currAllocator->map(u, accessFlags | ACCESS_READ); // TODO Support ACCESS_WRITE without unnecessary data transfers
579    CV_Assert(u->data != 0);
580    Mat hdr(dims, size.p, type(), u->data + offset, step.p);
581    hdr.flags = flags;
582    hdr.u = u;
583    hdr.datastart = u->data;
584    hdr.data = u->data + offset;
585    hdr.datalimit = hdr.dataend = u->data + u->size;
586    CV_XADD(&hdr.u->refcount, 1);
587    return hdr;
588}
589
590void* UMat::handle(int accessFlags) const
591{
592    if( !u )
593        return 0;
594
595    // check flags: if CPU copy is newer, copy it back to GPU.
596    if( u->deviceCopyObsolete() )
597    {
598        CV_Assert(u->refcount == 0);
599        u->currAllocator->unmap(u);
600    }
601
602    if ((accessFlags & ACCESS_WRITE) != 0)
603        u->markHostCopyObsolete(true);
604
605    return u->handle;
606}
607
608void UMat::ndoffset(size_t* ofs) const
609{
610    // offset = step[0]*ofs[0] + step[1]*ofs[1] + step[2]*ofs[2] + ...;
611    size_t val = offset;
612    for( int i = 0; i < dims; i++ )
613    {
614        size_t s = step.p[i];
615        ofs[i] = val / s;
616        val -= ofs[i]*s;
617    }
618}
619
620void UMat::copyTo(OutputArray _dst) const
621{
622    int dtype = _dst.type();
623    if( _dst.fixedType() && dtype != type() )
624    {
625        CV_Assert( channels() == CV_MAT_CN(dtype) );
626        convertTo( _dst, dtype );
627        return;
628    }
629
630    if( empty() )
631    {
632        _dst.release();
633        return;
634    }
635
636    size_t i, sz[CV_MAX_DIM], srcofs[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize();
637    for( i = 0; i < (size_t)dims; i++ )
638        sz[i] = size.p[i];
639    sz[dims-1] *= esz;
640    ndoffset(srcofs);
641    srcofs[dims-1] *= esz;
642
643    _dst.create( dims, size.p, type() );
644    if( _dst.isUMat() )
645    {
646        UMat dst = _dst.getUMat();
647        if( u == dst.u && dst.offset == offset )
648            return;
649
650        if (u->currAllocator == dst.u->currAllocator)
651        {
652            dst.ndoffset(dstofs);
653            dstofs[dims-1] *= esz;
654            u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false);
655            return;
656        }
657    }
658
659    Mat dst = _dst.getMat();
660    u->currAllocator->download(u, dst.ptr(), dims, sz, srcofs, step.p, dst.step.p);
661}
662
663void UMat::copyTo(OutputArray _dst, InputArray _mask) const
664{
665    if( _mask.empty() )
666    {
667        copyTo(_dst);
668        return;
669    }
670#ifdef HAVE_OPENCL
671    int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype);
672    CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) );
673
674    if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2)
675    {
676        UMatData * prevu = _dst.getUMat().u;
677        _dst.create( dims, size, type() );
678
679        UMat dst = _dst.getUMat();
680
681        bool haveDstUninit = false;
682        if( prevu != dst.u ) // do not leave dst uninitialized
683            haveDstUninit = true;
684
685        String opts = format("-D COPY_TO_MASK -D T1=%s -D scn=%d -D mcn=%d%s",
686                             ocl::memopTypeToStr(depth()), cn, mcn,
687                             haveDstUninit ? " -D HAVE_DST_UNINIT" : "");
688
689        ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, opts);
690        if (!k.empty())
691        {
692            k.args(ocl::KernelArg::ReadOnlyNoSize(*this),
693                   ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()),
694                   haveDstUninit ? ocl::KernelArg::WriteOnly(dst) :
695                                   ocl::KernelArg::ReadWrite(dst));
696
697            size_t globalsize[2] = { cols, rows };
698            if (k.run(2, globalsize, NULL, false))
699            {
700                CV_IMPL_ADD(CV_IMPL_OCL);
701                return;
702            }
703        }
704    }
705#endif
706    Mat src = getMat(ACCESS_READ);
707    src.copyTo(_dst, _mask);
708}
709
710void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) const
711{
712    bool noScale = std::fabs(alpha - 1) < DBL_EPSILON && std::fabs(beta) < DBL_EPSILON;
713    int stype = type(), cn = CV_MAT_CN(stype);
714
715    if( _type < 0 )
716        _type = _dst.fixedType() ? _dst.type() : stype;
717    else
718        _type = CV_MAKETYPE(CV_MAT_DEPTH(_type), cn);
719
720    int sdepth = CV_MAT_DEPTH(stype), ddepth = CV_MAT_DEPTH(_type);
721    if( sdepth == ddepth && noScale )
722    {
723        copyTo(_dst);
724        return;
725    }
726#ifdef HAVE_OPENCL
727    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
728    bool needDouble = sdepth == CV_64F || ddepth == CV_64F;
729    if( dims <= 2 && cn && _dst.isUMat() && ocl::useOpenCL() &&
730            ((needDouble && doubleSupport) || !needDouble) )
731    {
732        int wdepth = std::max(CV_32F, sdepth), rowsPerWI = 4;
733
734        char cvt[2][40];
735        ocl::Kernel k("convertTo", ocl::core::convert_oclsrc,
736                      format("-D srcT=%s -D WT=%s -D dstT=%s -D convertToWT=%s -D convertToDT=%s%s",
737                             ocl::typeToStr(sdepth), ocl::typeToStr(wdepth), ocl::typeToStr(ddepth),
738                             ocl::convertTypeStr(sdepth, wdepth, 1, cvt[0]),
739                             ocl::convertTypeStr(wdepth, ddepth, 1, cvt[1]),
740                             doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
741        if (!k.empty())
742        {
743            UMat src = *this;
744            _dst.create( size(), _type );
745            UMat dst = _dst.getUMat();
746
747            float alphaf = (float)alpha, betaf = (float)beta;
748            ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
749                    dstarg = ocl::KernelArg::WriteOnly(dst, cn);
750
751            if (wdepth == CV_32F)
752                k.args(srcarg, dstarg, alphaf, betaf, rowsPerWI);
753            else
754                k.args(srcarg, dstarg, alpha, beta, rowsPerWI);
755
756            size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
757            if (k.run(2, globalsize, NULL, false))
758            {
759                CV_IMPL_ADD(CV_IMPL_OCL);
760                return;
761            }
762        }
763    }
764#endif
765    Mat m = getMat(ACCESS_READ);
766    m.convertTo(_dst, _type, alpha, beta);
767}
768
769UMat& UMat::setTo(InputArray _value, InputArray _mask)
770{
771    bool haveMask = !_mask.empty();
772#ifdef HAVE_OPENCL
773    int tp = type(), cn = CV_MAT_CN(tp), d = CV_MAT_DEPTH(tp);
774
775    if( dims <= 2 && cn <= 4 && CV_MAT_DEPTH(tp) < CV_64F && ocl::useOpenCL() )
776    {
777        Mat value = _value.getMat();
778        CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) );
779        int kercn = haveMask || cn == 3 ? cn : std::max(cn, ocl::predictOptimalVectorWidth(*this)),
780                kertp = CV_MAKE_TYPE(d, kercn);
781
782        double buf[16] = { 0, 0, 0, 0, 0, 0, 0, 0,
783                           0, 0, 0, 0, 0, 0, 0, 0 };
784        convertAndUnrollScalar(value, tp, (uchar *)buf, kercn / cn);
785
786        int scalarcn = kercn == 3 ? 4 : kercn, rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
787        String opts = format("-D dstT=%s -D rowsPerWI=%d -D dstST=%s -D dstT1=%s -D cn=%d",
788                             ocl::memopTypeToStr(kertp), rowsPerWI,
789                             ocl::memopTypeToStr(CV_MAKETYPE(d, scalarcn)),
790                             ocl::memopTypeToStr(d), kercn);
791
792        ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts);
793        if( !setK.empty() )
794        {
795            ocl::KernelArg scalararg(0, 0, 0, 0, buf, CV_ELEM_SIZE(d) * scalarcn);
796            UMat mask;
797
798            if( haveMask )
799            {
800                mask = _mask.getUMat();
801                CV_Assert( mask.size() == size() && mask.type() == CV_8UC1 );
802                ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
803                        dstarg = ocl::KernelArg::ReadWrite(*this);
804                setK.args(maskarg, dstarg, scalararg);
805            }
806            else
807            {
808                ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this, cn, kercn);
809                setK.args(dstarg, scalararg);
810            }
811
812            size_t globalsize[] = { cols * cn / kercn, (rows + rowsPerWI - 1) / rowsPerWI };
813            if( setK.run(2, globalsize, NULL, false) )
814            {
815                CV_IMPL_ADD(CV_IMPL_OCL);
816                return *this;
817            }
818        }
819    }
820#endif
821    Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE);
822    m.setTo(_value, _mask);
823    return *this;
824}
825
826UMat& UMat::operator = (const Scalar& s)
827{
828    setTo(s);
829    return *this;
830}
831
832UMat UMat::t() const
833{
834    UMat m;
835    transpose(*this, m);
836    return m;
837}
838
839UMat UMat::inv(int method) const
840{
841    UMat m;
842    invert(*this, m, method);
843    return m;
844}
845
846UMat UMat::mul(InputArray m, double scale) const
847{
848    UMat dst;
849    multiply(*this, m, dst, scale);
850    return dst;
851}
852
853#ifdef HAVE_OPENCL
854
855static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
856{
857    UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1);
858
859    int type = src1.type(), depth = CV_MAT_DEPTH(type),
860            kercn = ocl::predictOptimalVectorWidth(src1, src2);
861    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
862
863    if ( !doubleSupport && depth == CV_64F )
864        return false;
865
866    int dbsize = ocl::Device::getDefault().maxComputeUnits();
867    size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
868    int ddepth = std::max(CV_32F, depth);
869
870    int wgs2_aligned = 1;
871    while (wgs2_aligned < (int)wgs)
872        wgs2_aligned <<= 1;
873    wgs2_aligned >>= 1;
874
875    char cvt[40];
876    ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
877                  format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT "
878                         "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d",
879                         ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth),
880                         ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
881                         ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt),
882                         (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
883                         _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "",
884                         _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn));
885    if (k.empty())
886        return false;
887
888    UMat db(1, dbsize, ddepth);
889
890    ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
891            src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
892            dbarg = ocl::KernelArg::PtrWriteOnly(db);
893
894    k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg);
895
896    size_t globalsize = dbsize * wgs;
897    if (k.run(1, &globalsize, &wgs, false))
898    {
899        res = sum(db.getMat(ACCESS_READ))[0];
900        return true;
901    }
902    return false;
903}
904
905#endif
906
907double UMat::dot(InputArray m) const
908{
909    CV_Assert(m.sameSize(*this) && m.type() == type());
910
911#ifdef HAVE_OPENCL
912    double r = 0;
913    CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r)
914#endif
915
916    return getMat(ACCESS_READ).dot(m);
917}
918
919UMat UMat::zeros(int rows, int cols, int type)
920{
921    return UMat(rows, cols, type, Scalar::all(0));
922}
923
924UMat UMat::zeros(Size size, int type)
925{
926    return UMat(size, type, Scalar::all(0));
927}
928
929UMat UMat::zeros(int ndims, const int* sz, int type)
930{
931    return UMat(ndims, sz, type, Scalar::all(0));
932}
933
934UMat UMat::ones(int rows, int cols, int type)
935{
936    return UMat::ones(Size(cols, rows), type);
937}
938
939UMat UMat::ones(Size size, int type)
940{
941    return UMat(size, type, Scalar(1));
942}
943
944UMat UMat::ones(int ndims, const int* sz, int type)
945{
946    return UMat(ndims, sz, type, Scalar(1));
947}
948
949UMat UMat::eye(int rows, int cols, int type)
950{
951    return UMat::eye(Size(cols, rows), type);
952}
953
954UMat UMat::eye(Size size, int type)
955{
956    UMat m(size, type);
957    setIdentity(m);
958    return m;
959}
960
961}
962
963/* End of file. */
964