16acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn/*M///////////////////////////////////////////////////////////////////////////////////////
26acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//
36acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
46acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//
56acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//  By downloading, copying, installing or using the software you agree to this license.
66acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//  If you do not agree to this license, do not download, install,
76acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//  copy or use the software.
86acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//
96acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//
106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//                        Intel License Agreement
116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//                For Open Source Computer Vision Library
126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//
136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// Copyright (C) 2000, Intel Corporation, all rights reserved.
146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// Third party copyrights are property of their respective owners.
156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//
166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// Redistribution and use in source and binary forms, with or without modification,
176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// are permitted provided that the following conditions are met:
186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//
196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//   * Redistribution's of source code must retain the above copyright notice,
206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//     this list of conditions and the following disclaimer.
216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//
226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//   * Redistribution's in binary form must reproduce the above copyright notice,
236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//     this list of conditions and the following disclaimer in the documentation
246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//     and/or other materials provided with the distribution.
256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//
266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//   * The name of Intel Corporation may not be used to endorse or promote products
276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//     derived from this software without specific prior written permission.
286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//
296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// This software is provided by the copyright holders and contributors "as is" and
306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// any express or implied warranties, including, but not limited to, the implied
316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// warranties of merchantability and fitness for a particular purpose are disclaimed.
326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// In no event shall the Intel Corporation or contributors be liable for any direct,
336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// indirect, incidental, special, exemplary, or consequential damages
346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// (including, but not limited to, procurement of substitute goods or services;
356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// loss of use, data, or profits; or business interruption) however caused
366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// and on any theory of liability, whether in contract, strict liability,
376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// or tort (including negligence or otherwise) arising in any way out of
386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn// the use of this software, even if advised of the possibility of such damage.
396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//
406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn//M*/
416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#include "_cxcore.h"
436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn/****************************************************************************************\
456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn*                                         cvGEMM                                         *
466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn\****************************************************************************************/
476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvBLAS_GEMM_32f_t icvBLAS_GEMM_32f_p = 0;
496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvBLAS_GEMM_64f_t icvBLAS_GEMM_64f_p = 0;
506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvBLAS_GEMM_32fc_t icvBLAS_GEMM_32fc_p = 0;
516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvBLAS_GEMM_64fc_t icvBLAS_GEMM_64fc_p = 0;
526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic void
546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvGEMM_CopyBlock( const uchar* src, int src_step,
556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   uchar* dst, int dst_step,
566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   CvSize size, int pix_size )
576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int j;
596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    size.width = size.width * (pix_size / sizeof(int));
606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( ; size.height--; src += src_step, dst += dst_step )
626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( j = 0; j <= size.width - 4; j += 4 )
646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int t0 = ((const int*)src)[j];
666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int t1 = ((const int*)src)[j+1];
676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            ((int*)dst)[j] = t0;
686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            ((int*)dst)[j+1] = t1;
696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            t0 = ((const int*)src)[j+2];
706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            t1 = ((const int*)src)[j+3];
716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            ((int*)dst)[j+2] = t0;
726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            ((int*)dst)[j+3] = t1;
736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( ; j < size.width; j++ )
766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            ((int*)dst)[j] = ((const int*)src)[j];
776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic void
826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvGEMM_TransposeBlock( const uchar* src, int src_step,
836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        uchar* dst, int dst_step,
846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        CvSize size, int pix_size )
856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int i, j;
876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i < size.width; i++, dst += dst_step, src += pix_size )
886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        const uchar* _src = src;
906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        switch( pix_size )
916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        case sizeof(int):
936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j < size.height; j++, _src += src_step )
946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ((int*)dst)[j] = ((int*)_src)[0];
956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            break;
966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        case sizeof(int)*2:
976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j < size.height*2; j += 2, _src += src_step )
986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                int t0 = ((int*)_src)[0];
1006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                int t1 = ((int*)_src)[1];
1016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ((int*)dst)[j] = t0;
1026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ((int*)dst)[j+1] = t1;
1036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
1046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            break;
1056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        case sizeof(int)*4:
1066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j < size.height*4; j += 4, _src += src_step )
1076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
1086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                int t0 = ((int*)_src)[0];
1096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                int t1 = ((int*)_src)[1];
1106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ((int*)dst)[j] = t0;
1116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ((int*)dst)[j+1] = t1;
1126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                t0 = ((int*)_src)[2];
1136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                t1 = ((int*)_src)[3];
1146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ((int*)dst)[j+2] = t0;
1156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ((int*)dst)[j+3] = t1;
1166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
1176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            break;
1186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        default:
1196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            assert(0);
1206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            return;
1216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
1226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
1236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
1246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
1256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define ICV_DEF_GEMM_SINGLE_MUL( flavor, arrtype, worktype )                \
1266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                                  \
1276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvGEMMSingleMul_##flavor( const arrtype* a_data, size_t a_step,            \
1286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                         const arrtype* b_data, size_t b_step,              \
1296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                         const arrtype* c_data, size_t c_step,              \
1306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                         arrtype* d_data, size_t d_step,                    \
1316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                         CvSize a_size, CvSize d_size,                      \
1326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                         double alpha, double beta, int flags )             \
1336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                           \
1346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int i, j, k, n = a_size.width, m = d_size.width, drows = d_size.height; \
1356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    const arrtype *_a_data = a_data, *_b_data = b_data, *_c_data = c_data;  \
1366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    arrtype* a_buf = 0;                                                     \
1376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    size_t a_step0, a_step1, c_step0, c_step1, t_step;                      \
1386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
1396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    a_step /= sizeof(a_data[0]);                                            \
1406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    b_step /= sizeof(b_data[0]);                                            \
1416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    c_step /= sizeof(c_data[0]);                                            \
1426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    d_step /= sizeof(d_data[0]);                                            \
1436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    a_step0 = a_step;                                                       \
1446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    a_step1 = 1;                                                            \
1456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
1466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !c_data )                                                           \
1476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        c_step0 = c_step1 = 0;                                              \
1486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else if( !(flags & CV_GEMM_C_T) )                                       \
1496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        c_step0 = c_step, c_step1 = 1;                                      \
1506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else                                                                    \
1516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        c_step0 = 1, c_step1 = c_step;                                      \
1526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
1536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( flags & CV_GEMM_A_T )                                               \
1546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                       \
1556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_SWAP( a_step0, a_step1, t_step );                                \
1566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        n = a_size.height;                                                  \
1576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( a_step > 1 && n > 1 )                                           \
1586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            a_buf = (arrtype*)cvStackAlloc(n*sizeof(a_data[0]));            \
1596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                       \
1606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
1616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( n == 1 ) /* external product */                                     \
1626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                       \
1636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        arrtype* b_buf = 0;                                                 \
1646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
1656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( a_step > 1 )                                                    \
1666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                   \
1676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            a_buf = (arrtype*)cvStackAlloc(drows*sizeof(a_data[0]));        \
1686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( k = 0; k < drows; k++ )                                    \
1696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                a_buf[k] = a_data[a_step*k];                                \
1706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            a_data = a_buf;                                                 \
1716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                   \
1726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
1736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( b_step > 1 )                                                    \
1746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                   \
1756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            b_buf = (arrtype*)cvStackAlloc(d_size.width*sizeof(b_buf[0]) ); \
1766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j < d_size.width; j++ )                             \
1776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                b_buf[j] = b_data[j*b_step];                                \
1786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            b_data = b_buf;                                                 \
1796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                   \
1806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
1816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < drows; i++, _c_data += c_step0,                     \
1826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                    d_data += d_step )                      \
1836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                   \
1846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            worktype al = worktype(a_data[i])*alpha;                        \
1856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            c_data = _c_data;                                               \
1866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j <= d_size.width - 2; j += 2, c_data += 2*c_step1 )\
1876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                               \
1886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype s0 = al*b_data[j];                                 \
1896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype s1 = al*b_data[j+1];                               \
1906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( !c_data )                                               \
1916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                           \
1926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j] = arrtype(s0);                                \
1936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j+1] = arrtype(s1);                              \
1946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                           \
1956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else                                                        \
1966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                           \
1976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j] = arrtype(s0 + c_data[0]*beta);               \
1986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j+1] = arrtype(s1 + c_data[c_step1]*beta);       \
1996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                           \
2006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                               \
2016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
2026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( ; j < d_size.width; j++, c_data += c_step1 )               \
2036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                               \
2046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype s0 = al*b_data[j];                                 \
2056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( !c_data )                                               \
2066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j] = arrtype(s0);                                \
2076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else                                                        \
2086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j] = arrtype(s0 + c_data[0]*beta);               \
2096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                               \
2106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                   \
2116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                       \
2126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else if( flags & CV_GEMM_B_T ) /* A * Bt */                             \
2136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                       \
2146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < drows; i++, _a_data += a_step0,                     \
2156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                    _c_data += c_step0,                     \
2166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                    d_data += d_step )                      \
2176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                   \
2186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            a_data = _a_data;                                               \
2196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            b_data = _b_data;                                               \
2206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            c_data = _c_data;                                               \
2216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
2226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( a_buf )                                                     \
2236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                               \
2246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < n; k++ )                                    \
2256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    a_buf[k] = a_data[a_step1*k];                           \
2266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                a_data = a_buf;                                             \
2276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                               \
2286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
2296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j < d_size.width; j++, b_data += b_step,            \
2306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                               c_data += c_step1 )          \
2316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                               \
2326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype s0(0), s1(0), s2(0), s3(0);                        \
2336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
2346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k <= n - 4; k += 4 )                            \
2356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                           \
2366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 += worktype(a_data[k])*b_data[k];                    \
2376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s1 += worktype(a_data[k+1])*b_data[k+1];                \
2386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s2 += worktype(a_data[k+2])*b_data[k+2];                \
2396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s3 += worktype(a_data[k+3])*b_data[k+3];                \
2406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                           \
2416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
2426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( ; k < n; k++ )                                         \
2436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 += worktype(a_data[k])*b_data[k];                    \
2446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                s0 = (s0+s1+s2+s3)*alpha;                                   \
2456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
2466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( !c_data )                                               \
2476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j] = arrtype(s0);                                \
2486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else                                                        \
2496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j] = arrtype(s0 + c_data[0]*beta);               \
2506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                               \
2516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                   \
2526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                       \
2536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else if( d_size.width*sizeof(d_data[0]) <= 1600 )                       \
2546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                       \
2556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < drows; i++, _a_data += a_step0,                     \
2566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                    _c_data += c_step0,                     \
2576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                    d_data += d_step )                      \
2586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                   \
2596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            a_data = _a_data, c_data = _c_data;                             \
2606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
2616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( a_buf )                                                     \
2626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                               \
2636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < n; k++ )                                    \
2646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    a_buf[k] = a_data[a_step1*k];                           \
2656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                a_data = a_buf;                                             \
2666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                               \
2676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
2686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j <= m - 4; j += 4, c_data += 4*c_step1 )           \
2696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                               \
2706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const arrtype* b = _b_data + j;                             \
2716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype s0(0), s1(0), s2(0), s3(0);                        \
2726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
2736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < n; k++, b += b_step )                       \
2746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                           \
2756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    worktype a(a_data[k]);                                  \
2766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 += a * b[0]; s1 += a * b[1];                         \
2776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s2 += a * b[2]; s3 += a * b[3];                         \
2786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                           \
2796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
2806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( !c_data )                                               \
2816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                           \
2826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j] = arrtype(s0*alpha);                          \
2836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j+1] = arrtype(s1*alpha);                        \
2846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j+2] = arrtype(s2*alpha);                        \
2856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j+3] = arrtype(s3*alpha);                        \
2866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                           \
2876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else                                                        \
2886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                           \
2896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 = s0*alpha; s1 = s1*alpha;                           \
2906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s2 = s2*alpha; s3 = s3*alpha;                           \
2916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j] = arrtype(s0 + c_data[0]*beta);               \
2926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j+1] = arrtype(s1 + c_data[c_step1]*beta);       \
2936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j+2] = arrtype(s2 + c_data[c_step1*2]*beta);     \
2946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j+3] = arrtype(s3 + c_data[c_step1*3]*beta);     \
2956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                           \
2966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                               \
2976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
2986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( ; j < m; j++, c_data += c_step1 )                          \
2996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                               \
3006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const arrtype* b = _b_data + j;                             \
3016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype s0(0);                                             \
3026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
3036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < n; k++, b += b_step )                       \
3046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 += worktype(a_data[k]) * b[0];                       \
3056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
3066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                s0 = s0*alpha;                                              \
3076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( !c_data )                                               \
3086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j] = arrtype(s0);                                \
3096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else                                                        \
3106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j] = arrtype(s0 + c_data[0]*beta);               \
3116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                               \
3126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                   \
3136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                       \
3146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else                                                                    \
3156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                       \
3166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        worktype* d_buf = (worktype*)cvStackAlloc(m*sizeof(d_buf[0]));      \
3176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
3186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < drows; i++, _a_data += a_step0,                     \
3196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                            _c_data += c_step0,             \
3206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                            d_data += d_step )              \
3216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                   \
3226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            a_data = _a_data;                                               \
3236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            b_data = _b_data;                                               \
3246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            c_data = _c_data;                                               \
3256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
3266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( a_buf )                                                     \
3276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                               \
3286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < n; k++ )                                    \
3296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    a_buf[k] = _a_data[a_step1*k];                          \
3306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                a_data = a_buf;                                             \
3316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                               \
3326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
3336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j < m; j++ )                                        \
3346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_buf[j] = worktype(0);                                     \
3356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
3366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( k = 0; k < n; k++, b_data += b_step )                      \
3376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                               \
3386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype al(a_data[k]);                                     \
3396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
3406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( j = 0; j <= m - 4; j += 4 )                            \
3416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                           \
3426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    worktype t0 = d_buf[j] + b_data[j]*al;                  \
3436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    worktype t1 = d_buf[j+1] + b_data[j+1]*al;              \
3446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_buf[j] = t0;                                          \
3456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_buf[j+1] = t1;                                        \
3466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    t0 = d_buf[j+2] + b_data[j+2]*al;                       \
3476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    t1 = d_buf[j+3] + b_data[j+3]*al;                       \
3486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_buf[j+2] = t0;                                        \
3496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_buf[j+3] = t1;                                        \
3506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                           \
3516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
3526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( ; j < m; j++ )                                         \
3536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_buf[j] += b_data[j]*al;                               \
3546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                               \
3556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
3566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( !c_data )                                                   \
3576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( j = 0; j < m; j++ )                                    \
3586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j] = arrtype(d_buf[j]*alpha);                    \
3596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else                                                            \
3606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( j = 0; j < m; j++, c_data += c_step1 )                 \
3616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                           \
3626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    worktype t = d_buf[j]*alpha;                            \
3636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    d_data[j] = arrtype(t + c_data[0]*beta);                \
3646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                           \
3656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                   \
3666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                       \
3676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_OK;                                                           \
3686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
3696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
3706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
3716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define ICV_DEF_GEMM_BLOCK_MUL( flavor, arrtype, worktype )         \
3726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                          \
3736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvGEMMBlockMul_##flavor( const arrtype* a_data, size_t a_step,     \
3746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        const arrtype* b_data, size_t b_step,       \
3756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        worktype* d_data, size_t d_step,            \
3766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        CvSize a_size, CvSize d_size, int flags )   \
3776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                   \
3786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int i, j, k, n = a_size.width, m = d_size.width;                \
3796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    const arrtype *_a_data = a_data, *_b_data = b_data;             \
3806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    arrtype* a_buf = 0;                                             \
3816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    size_t a_step0, a_step1, t_step;                                \
3826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int do_acc = flags & 16;                                        \
3836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
3846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    a_step /= sizeof(a_data[0]);                                    \
3856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    b_step /= sizeof(b_data[0]);                                    \
3866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    d_step /= sizeof(d_data[0]);                                    \
3876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
3886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    a_step0 = a_step;                                               \
3896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    a_step1 = 1;                                                    \
3906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
3916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( flags & CV_GEMM_A_T )                                       \
3926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
3936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_SWAP( a_step0, a_step1, t_step );                        \
3946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        n = a_size.height;                                          \
3956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        a_buf = (arrtype*)cvStackAlloc(n*sizeof(a_data[0]));        \
3966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                               \
3976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
3986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( flags & CV_GEMM_B_T )                                       \
3996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
4006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        /* second operand is transposed */                          \
4016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < d_size.height; i++, _a_data += a_step0,     \
4026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                            d_data += d_step )      \
4036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                           \
4046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            a_data = _a_data; b_data = _b_data;                     \
4056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( a_buf )                                             \
4076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                       \
4086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < n; k++ )                            \
4096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    a_buf[k] = a_data[a_step1*k];                   \
4106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                a_data = a_buf;                                     \
4116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                       \
4126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j < d_size.width; j++, b_data += b_step )   \
4146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                       \
4156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype s0 = do_acc ? d_data[j]:worktype(0), s1(0);\
4166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k <= n - 2; k += 2 )                    \
4176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                   \
4186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 += worktype(a_data[k])*b_data[k];            \
4196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s1 += worktype(a_data[k+1])*b_data[k+1];        \
4206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                   \
4216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( ; k < n; k++ )                                 \
4236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 += worktype(a_data[k])*b_data[k];            \
4246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j] = s0 + s1;                                \
4266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                       \
4276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                           \
4286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                               \
4296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else                                                            \
4306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
4316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < d_size.height; i++, _a_data += a_step0,     \
4326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                            d_data += d_step )      \
4336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                           \
4346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            a_data = _a_data, b_data = _b_data;                     \
4356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( a_buf )                                             \
4376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                       \
4386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < n; k++ )                            \
4396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    a_buf[k] = a_data[a_step1*k];                   \
4406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                a_data = a_buf;                                     \
4416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                       \
4426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j <= m - 4; j += 4 )                        \
4446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                       \
4456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype s0, s1, s2, s3;                            \
4466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const arrtype* b = b_data + j;                      \
4476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( do_acc )                                        \
4496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                   \
4506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 = d_data[j]; s1 = d_data[j+1];               \
4516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s2 = d_data[j+2]; s3 = d_data[j+3];             \
4526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                   \
4536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else                                                \
4546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 = s1 = s2 = s3 = worktype(0);                \
4556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < n; k++, b += b_step )               \
4576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                   \
4586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    worktype a(a_data[k]);                          \
4596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 += a * b[0]; s1 += a * b[1];                 \
4606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s2 += a * b[2]; s3 += a * b[3];                 \
4616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                   \
4626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j] = s0; d_data[j+1] = s1;                   \
4646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j+2] = s2; d_data[j+3] = s3;                 \
4656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                       \
4666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( ; j < m; j++ )                                     \
4686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                       \
4696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const arrtype* b = b_data + j;                      \
4706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype s0 = do_acc ? d_data[j] : worktype(0);     \
4716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < n; k++, b += b_step )               \
4736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 += worktype(a_data[k]) * b[0];               \
4746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j] = s0;                                     \
4766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                       \
4776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                           \
4786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                               \
4796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_OK;                                                   \
4816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
4826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
4836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
4846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define ICV_DEF_GEMM_STORE( flavor, arrtype, worktype )             \
4856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                          \
4866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvGEMMStore_##flavor( const arrtype* c_data, size_t c_step,        \
4876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       const worktype* d_buf, size_t d_buf_step,    \
4886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       arrtype* d_data, size_t d_step, CvSize d_size,\
4896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       double alpha, double beta, int flags )       \
4906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                   \
4916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    const arrtype* _c_data = c_data;                                \
4926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int j;                                                          \
4936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    size_t c_step0, c_step1;                                        \
4946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    c_step /= sizeof(c_data[0]);                                    \
4966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    d_buf_step /= sizeof(d_buf[0]);                                 \
4976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    d_step /= sizeof(d_data[0]);                                    \
4986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
4996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !c_data )                                                   \
5006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        c_step0 = c_step1 = 0;                                      \
5016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else if( !(flags & CV_GEMM_C_T) )                               \
5026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        c_step0 = c_step, c_step1 = 1;                              \
5036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else                                                            \
5046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        c_step0 = 1, c_step1 = c_step;                              \
5056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
5066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( ; d_size.height--; _c_data += c_step0,                     \
5076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            d_buf += d_buf_step,                    \
5086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            d_data += d_step )                      \
5096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
5106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( _c_data )                                               \
5116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                           \
5126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            c_data = _c_data;                                       \
5136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j <= d_size.width - 4; j += 4, c_data += 4*c_step1 )\
5146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                       \
5156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype t0 = alpha*d_buf[j];                       \
5166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype t1 = alpha*d_buf[j+1];                     \
5176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                t0 += beta*worktype(c_data[0]);                     \
5186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                t1 += beta*worktype(c_data[c_step1]);               \
5196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j] = arrtype(t0);                            \
5206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j+1] = arrtype(t1);                          \
5216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                t0 = alpha*d_buf[j+2];                              \
5226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                t1 = alpha*d_buf[j+3];                              \
5236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                t0 += beta*worktype(c_data[c_step1*2]);             \
5246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                t1 += beta*worktype(c_data[c_step1*3]);             \
5256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j+2] = arrtype(t0);                          \
5266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j+3] = arrtype(t1);                          \
5276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                       \
5286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( ; j < d_size.width; j++, c_data += c_step1 )       \
5296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                       \
5306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype t0 = alpha*d_buf[j];                       \
5316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j] = arrtype(t0 + beta*c_data[0]);           \
5326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                       \
5336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                           \
5346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        else                                                        \
5356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                           \
5366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j <= d_size.width - 4; j += 4 )             \
5376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                       \
5386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype t0 = alpha*d_buf[j];                       \
5396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                worktype t1 = alpha*d_buf[j+1];                     \
5406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j] = arrtype(t0);                            \
5416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j+1] = arrtype(t1);                          \
5426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                t0 = alpha*d_buf[j+2];                              \
5436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                t1 = alpha*d_buf[j+3];                              \
5446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j+2] = arrtype(t0);                          \
5456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j+3] = arrtype(t1);                          \
5466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                       \
5476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( ; j < d_size.width; j++ )                          \
5486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                d_data[j] = arrtype(alpha*d_buf[j]);                \
5496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                           \
5506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                               \
5516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_OK;                                                   \
5526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
5536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
5546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
5556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_GEMM_SINGLE_MUL( 32f_C1R, float, double)
5566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_GEMM_BLOCK_MUL( 32f_C1R, float, double)
5576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_GEMM_STORE( 32f_C1R, float, double)
5586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
5596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_GEMM_SINGLE_MUL( 64f_C1R, double, double)
5606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_GEMM_BLOCK_MUL( 64f_C1R, double, double)
5616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_GEMM_STORE( 64f_C1R, double, double)
5626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
5636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_GEMM_SINGLE_MUL( 32f_C2R, CvComplex32f, CvComplex64f)
5646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_GEMM_BLOCK_MUL( 32f_C2R, CvComplex32f, CvComplex64f)
5656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_GEMM_STORE( 32f_C2R, CvComplex32f, CvComplex64f)
5666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
5676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_GEMM_SINGLE_MUL( 64f_C2R, CvComplex64f, CvComplex64f)
5686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_GEMM_BLOCK_MUL( 64f_C2R, CvComplex64f, CvComplex64f)
5696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_GEMM_STORE( 64f_C2R, CvComplex64f, CvComplex64f)
5706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
5716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renntypedef CvStatus (CV_STDCALL *CvGEMMSingleMulFunc)( const void* src1, size_t step1,
5726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   const void* src2, size_t step2, const void* src3, size_t step3,
5736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   void* dst, size_t dststep, CvSize srcsize, CvSize dstsize,
5746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   double alpha, double beta, int flags );
5756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
5766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renntypedef CvStatus (CV_STDCALL *CvGEMMBlockMulFunc)( const void* src1, size_t step1,
5776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   const void* src2, size_t step2, void* dst, size_t dststep,
5786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   CvSize srcsize, CvSize dstsize, int flags );
5796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
5806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renntypedef CvStatus (CV_STDCALL *CvGEMMStoreFunc)( const void* src1, size_t step1,
5816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   const void* src2, size_t step2, void* dst, size_t dststep,
5826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   CvSize dstsize, double alpha, double beta, int flags );
5836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
5846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
5856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic void icvInitGEMMTable( CvBigFuncTable* single_mul_tab,
5866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                              CvBigFuncTable* block_mul_tab,
5876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                              CvBigFuncTable* store_tab )
5886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
5896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    single_mul_tab->fn_2d[CV_32FC1] = (void*)icvGEMMSingleMul_32f_C1R;
5906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    single_mul_tab->fn_2d[CV_64FC1] = (void*)icvGEMMSingleMul_64f_C1R;
5916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    single_mul_tab->fn_2d[CV_32FC2] = (void*)icvGEMMSingleMul_32f_C2R;
5926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    single_mul_tab->fn_2d[CV_64FC2] = (void*)icvGEMMSingleMul_64f_C2R;
5936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    block_mul_tab->fn_2d[CV_32FC1] = (void*)icvGEMMBlockMul_32f_C1R;
5946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    block_mul_tab->fn_2d[CV_64FC1] = (void*)icvGEMMBlockMul_64f_C1R;
5956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    block_mul_tab->fn_2d[CV_32FC2] = (void*)icvGEMMBlockMul_32f_C2R;
5966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    block_mul_tab->fn_2d[CV_64FC2] = (void*)icvGEMMBlockMul_64f_C2R;
5976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    store_tab->fn_2d[CV_32FC1] = (void*)icvGEMMStore_32f_C1R;
5986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    store_tab->fn_2d[CV_64FC1] = (void*)icvGEMMStore_64f_C1R;
5996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    store_tab->fn_2d[CV_32FC2] = (void*)icvGEMMStore_32f_C2R;
6006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    store_tab->fn_2d[CV_64FC2] = (void*)icvGEMMStore_64f_C2R;
6016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
6026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennCV_IMPL void
6056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RenncvGEMM( const CvArr* Aarr, const CvArr* Barr, double alpha,
6066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        const CvArr* Carr, double beta, CvArr* Darr, int flags )
6076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
6086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    const int block_lin_size = 128;
6096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    const int block_size = block_lin_size * block_lin_size;
6106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static CvBigFuncTable single_mul_tab, block_mul_tab, store_tab;
6126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static int inittab = 0;
6136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static double zero[] = {0,0,0,0};
6146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static float zerof[] = {0,0,0,0};
6156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    uchar* buffer = 0;
6176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int local_alloc = 0;
6186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    uchar* block_buffer = 0;
6196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CV_FUNCNAME( "cvGEMM" );
6216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __BEGIN__;
6236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat *A = (CvMat*)Aarr;
6256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat *B = (CvMat*)Barr;
6266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat *C = (CvMat*)Carr;
6276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat *D = (CvMat*)Darr;
6286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int len = 0;
6296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat stub, stub1, stub2, stub3;
6316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvSize a_size, d_size;
6326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int type;
6336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT( A ))
6356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
6366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int coi = 0;
6376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( A = cvGetMat( A, &stub1, &coi ));
6386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( coi != 0 )
6406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_BadCOI, "" );
6416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
6426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT( B ))
6446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
6456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int coi = 0;
6466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( B = cvGetMat( B, &stub2, &coi ));
6476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( coi != 0 )
6496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_BadCOI, "" );
6506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
6516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT( D ))
6536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
6546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int coi = 0;
6556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( D = cvGetMat( D, &stub, &coi ));
6566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( coi != 0 )
6586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_BadCOI, "" );
6596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
6606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( beta == 0 )
6626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        C = 0;
6636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( C )
6656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
6666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !CV_IS_MAT( C ))
6676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
6686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int coi = 0;
6696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_CALL( C = cvGetMat( C, &stub3, &coi ));
6706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( coi != 0 )
6726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CV_ERROR( CV_BadCOI, "" );
6736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
6746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !CV_ARE_TYPES_EQ( C, D ))
6766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnmatchedFormats, "" );
6776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( ((flags&CV_GEMM_C_T) == 0 && (C->cols != D->cols || C->rows != D->rows)) ||
6796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            ((flags&CV_GEMM_C_T) != 0 && (C->rows != D->cols || C->cols != D->rows)))
6806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnmatchedSizes, "" );
6816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( (flags & CV_GEMM_C_T) != 0 && C->data.ptr == D->data.ptr )
6836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
6846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            cvTranspose( C, D );
6856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            C = D;
6866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            flags &= ~CV_GEMM_C_T;
6876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
6886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
6896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else
6906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
6916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        C = &stub3;
6926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        C->data.ptr = 0;
6936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        C->step = 0;
6946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        C->type = CV_MAT_CONT_FLAG;
6956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
6966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
6976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    type = CV_MAT_TYPE(A->type);
6986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_ARE_TYPES_EQ( A, B ) || !CV_ARE_TYPES_EQ( A, D ) )
6996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedFormats, "" );
7006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
7016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    a_size.width = A->cols;
7026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    a_size.height = A->rows;
7036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    d_size.width = D->cols;
7046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    d_size.height = D->rows;
7056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
7066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    switch( flags & (CV_GEMM_A_T|CV_GEMM_B_T) )
7076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
7086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    case 0:
7096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        len = B->rows;
7106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( a_size.width != len ||
7116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            B->cols != d_size.width ||
7126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            a_size.height != d_size.height )
7136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnmatchedSizes, "" );
7146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        break;
7156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    case 1:
7166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        len = B->rows;
7176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( a_size.height != len ||
7186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            B->cols != d_size.width ||
7196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            a_size.width != d_size.height )
7206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnmatchedSizes, "" );
7216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        break;
7226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    case 2:
7236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        len = B->cols;
7246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( a_size.width != len ||
7256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            B->rows != d_size.width ||
7266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            a_size.height != d_size.height )
7276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnmatchedSizes, "" );
7286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        break;
7296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    case 3:
7306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        len = B->cols;
7316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( a_size.height != len ||
7326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            B->rows != d_size.width ||
7336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            a_size.width != d_size.height )
7346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnmatchedSizes, "" );
7356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        break;
7366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
7376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
7386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( flags == 0 && 2 <= len && len <= 4 && (len == d_size.width || len == d_size.height) )
7396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
7406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int i;
7416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( type == CV_64F )
7426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
7436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            double* d = D->data.db;
7446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            const double *a = A->data.db, *b = B->data.db, *c = C->data.db;
7456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            size_t d_step = D->step/sizeof(d[0]),
7466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   a_step = A->step/sizeof(a[0]),
7476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   b_step = B->step/sizeof(b[0]),
7486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   c_step = C->step/sizeof(c[0]);
7496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
7506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( !c )
7516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                c = zero;
7526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
7536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            switch( len )
7546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
7556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            case 2:
7566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( len == d_size.width && b != d )
7576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
7586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step )
7596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
7606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t0 = a[0]*b[0] + a[1]*b[b_step];
7616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t1 = a[0]*b[1] + a[1]*b[b_step+1];
7626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[0] = t0*alpha + c[0]*beta;
7636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[1] = t1*alpha + c[1]*beta;
7646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
7656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
7666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else if( a != d )
7676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
7686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    int c_step0 = 1;
7696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    if( c == zero )
7706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
7716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        c_step0 = 0;
7726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        c_step = 1;
7736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
7746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
7756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 )
7766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
7776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t0 = a[0]*b[0] + a[1]*b[b_step];
7786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step];
7796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[0] = t0*alpha + c[0]*beta;
7806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[d_step] = t1*alpha + c[c_step]*beta;
7816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
7826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
7836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else
7846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    break;
7856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                EXIT;
7866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            case 3:
7876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( len == d_size.width && b != d )
7886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
7896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step )
7906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
7916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2];
7926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t1 = a[0]*b[1] + a[1]*b[b_step+1] + a[2]*b[b_step*2+1];
7936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t2 = a[0]*b[2] + a[1]*b[b_step+2] + a[2]*b[b_step*2+2];
7946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[0] = t0*alpha + c[0]*beta;
7956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[1] = t1*alpha + c[1]*beta;
7966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[2] = t2*alpha + c[2]*beta;
7976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
7986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
7996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else if( a != d )
8006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
8016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    int c_step0 = 1;
8026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    if( c == zero )
8036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
8046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        c_step0 = 0;
8056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        c_step = 1;
8066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
8076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
8086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 )
8096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
8106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2];
8116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step] + a[a_step+2]*b[b_step*2];
8126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t2 = a[a_step*2]*b[0] + a[a_step*2+1]*b[b_step] + a[a_step*2+2]*b[b_step*2];
8136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
8146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[0] = t0*alpha + c[0]*beta;
8156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[d_step] = t1*alpha + c[c_step]*beta;
8166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[d_step*2] = t2*alpha + c[c_step*2]*beta;
8176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
8186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
8196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else
8206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    break;
8216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                EXIT;
8226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            case 4:
8236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( len == d_size.width && b != d )
8246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
8256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step )
8266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
8276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2] + a[3]*b[b_step*3];
8286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t1 = a[0]*b[1] + a[1]*b[b_step+1] + a[2]*b[b_step*2+1] + a[3]*b[b_step*3+1];
8296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t2 = a[0]*b[2] + a[1]*b[b_step+2] + a[2]*b[b_step*2+2] + a[3]*b[b_step*3+2];
8306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t3 = a[0]*b[3] + a[1]*b[b_step+3] + a[2]*b[b_step*2+3] + a[3]*b[b_step*3+3];
8316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[0] = t0*alpha + c[0]*beta;
8326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[1] = t1*alpha + c[1]*beta;
8336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[2] = t2*alpha + c[2]*beta;
8346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[3] = t3*alpha + c[3]*beta;
8356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
8366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
8376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else if( d_size.width <= 16 && a != d )
8386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
8396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    int c_step0 = 1;
8406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    if( c == zero )
8416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
8426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        c_step0 = 0;
8436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        c_step = 1;
8446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
8456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
8466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 )
8476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
8486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2] + a[3]*b[b_step*3];
8496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step] +
8506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                    a[a_step+2]*b[b_step*2] + a[a_step+3]*b[b_step*3];
8516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t2 = a[a_step*2]*b[0] + a[a_step*2+1]*b[b_step] +
8526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                    a[a_step*2+2]*b[b_step*2] + a[a_step*2+3]*b[b_step*3];
8536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double t3 = a[a_step*3]*b[0] + a[a_step*3+1]*b[b_step] +
8546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                    a[a_step*3+2]*b[b_step*2] + a[a_step*3+3]*b[b_step*3];
8556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[0] = t0*alpha + c[0]*beta;
8566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[d_step] = t1*alpha + c[c_step]*beta;
8576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[d_step*2] = t2*alpha + c[c_step*2]*beta;
8586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[d_step*3] = t3*alpha + c[c_step*3]*beta;
8596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
8606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
8616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else
8626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    break;
8636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                EXIT;
8646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
8656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
8666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
8676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( type == CV_32F )
8686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
8696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            float* d = D->data.fl;
8706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            const float *a = A->data.fl, *b = B->data.fl, *c = C->data.fl;
8716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            size_t d_step = D->step/sizeof(d[0]),
8726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   a_step = A->step/sizeof(a[0]),
8736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   b_step = B->step/sizeof(b[0]),
8746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   c_step = C->step/sizeof(c[0]);
8756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
8766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( !c )
8776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                c = zerof;
8786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
8796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            switch( len )
8806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
8816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            case 2:
8826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( len == d_size.width && b != d )
8836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
8846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step )
8856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
8866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t0 = a[0]*b[0] + a[1]*b[b_step];
8876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t1 = a[0]*b[1] + a[1]*b[b_step+1];
8886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[0] = (float)(t0*alpha + c[0]*beta);
8896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[1] = (float)(t1*alpha + c[1]*beta);
8906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
8916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
8926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else if( a != d )
8936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
8946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    int c_step0 = 1;
8956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    if( c == zerof )
8966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
8976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        c_step0 = 0;
8986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        c_step = 1;
8996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
9006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
9016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 )
9026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
9036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t0 = a[0]*b[0] + a[1]*b[b_step];
9046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step];
9056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[0] = (float)(t0*alpha + c[0]*beta);
9066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[d_step] = (float)(t1*alpha + c[c_step]*beta);
9076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
9086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
9096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else
9106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    break;
9116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                EXIT;
9126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            case 3:
9136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( len == d_size.width && b != d )
9146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
9156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step )
9166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
9176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2];
9186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t1 = a[0]*b[1] + a[1]*b[b_step+1] + a[2]*b[b_step*2+1];
9196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t2 = a[0]*b[2] + a[1]*b[b_step+2] + a[2]*b[b_step*2+2];
9206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[0] = (float)(t0*alpha + c[0]*beta);
9216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[1] = (float)(t1*alpha + c[1]*beta);
9226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[2] = (float)(t2*alpha + c[2]*beta);
9236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
9246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
9256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else if( a != d )
9266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
9276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    int c_step0 = 1;
9286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    if( c == zerof )
9296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
9306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        c_step0 = 0;
9316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        c_step = 1;
9326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
9336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
9346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 )
9356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
9366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2];
9376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step] + a[a_step+2]*b[b_step*2];
9386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t2 = a[a_step*2]*b[0] + a[a_step*2+1]*b[b_step] + a[a_step*2+2]*b[b_step*2];
9396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
9406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[0] = (float)(t0*alpha + c[0]*beta);
9416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[d_step] = (float)(t1*alpha + c[c_step]*beta);
9426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[d_step*2] = (float)(t2*alpha + c[c_step*2]*beta);
9436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
9446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
9456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else
9466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    break;
9476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                EXIT;
9486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            case 4:
9496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( len == d_size.width && b != d )
9506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
9516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step )
9526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
9536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2] + a[3]*b[b_step*3];
9546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t1 = a[0]*b[1] + a[1]*b[b_step+1] + a[2]*b[b_step*2+1] + a[3]*b[b_step*3+1];
9556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t2 = a[0]*b[2] + a[1]*b[b_step+2] + a[2]*b[b_step*2+2] + a[3]*b[b_step*3+2];
9566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t3 = a[0]*b[3] + a[1]*b[b_step+3] + a[2]*b[b_step*2+3] + a[3]*b[b_step*3+3];
9576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[0] = (float)(t0*alpha + c[0]*beta);
9586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[1] = (float)(t1*alpha + c[1]*beta);
9596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[2] = (float)(t2*alpha + c[2]*beta);
9606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[3] = (float)(t3*alpha + c[3]*beta);
9616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
9626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
9636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else if( len <= 16 && a != d )
9646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
9656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    int c_step0 = 1;
9666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    if( c == zerof )
9676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
9686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        c_step0 = 0;
9696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        c_step = 1;
9706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
9716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
9726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 )
9736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
9746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2] + a[3]*b[b_step*3];
9756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step] +
9766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                   a[a_step+2]*b[b_step*2] + a[a_step+3]*b[b_step*3];
9776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t2 = a[a_step*2]*b[0] + a[a_step*2+1]*b[b_step] +
9786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                   a[a_step*2+2]*b[b_step*2] + a[a_step*2+3]*b[b_step*3];
9796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        float t3 = a[a_step*3]*b[0] + a[a_step*3+1]*b[b_step] +
9806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                   a[a_step*3+2]*b[b_step*2] + a[a_step*3+3]*b[b_step*3];
9816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[0] = (float)(t0*alpha + c[0]*beta);
9826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[d_step] = (float)(t1*alpha + c[c_step]*beta);
9836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[d_step*2] = (float)(t2*alpha + c[c_step*2]*beta);
9846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        d[d_step*3] = (float)(t3*alpha + c[c_step*3]*beta);
9856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
9866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
9876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else
9886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    break;
9896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                EXIT;
9906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
9916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
9926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
9936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
9946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
9956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int b_step = B->step;
9966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CvGEMMSingleMulFunc single_mul_func;
9976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CvMat tmat, *D0 = D;
9986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        icvBLAS_GEMM_32f_t blas_func = 0;
9996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
10006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !inittab )
10016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
10026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            icvInitGEMMTable( &single_mul_tab, &block_mul_tab, &store_tab );
10036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            inittab = 1;
10046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
10056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
10066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        single_mul_func = (CvGEMMSingleMulFunc)single_mul_tab.fn_2d[type];
10076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !single_mul_func )
10086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnsupportedFormat, "" );
10096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
10106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( D->data.ptr == A->data.ptr || D->data.ptr == B->data.ptr )
10116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
10126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int buf_size = d_size.width*d_size.height*CV_ELEM_SIZE(type);
10136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( d_size.width <= CV_MAX_LOCAL_MAT_SIZE )
10146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
10156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                buffer = (uchar*)cvStackAlloc( buf_size );
10166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                local_alloc = 1;
10176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
10186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else
10196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
10206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
10216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            tmat = cvMat( d_size.height, d_size.width, type, buffer );
10226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            D = &tmat;
10236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
10246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
10256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( (d_size.width == 1 || len == 1) && !(flags & CV_GEMM_B_T) && CV_IS_MAT_CONT(B->type) )
10266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
10276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            b_step = d_size.width == 1 ? 0 : CV_ELEM_SIZE(type);
10286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            flags |= CV_GEMM_B_T;
10296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
10306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
10316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( (d_size.width | d_size.height | len) >= 16 && icvBLAS_GEMM_32f_p != 0 )
10326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
10336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            blas_func = type == CV_32FC1 ? (icvBLAS_GEMM_32f_t)icvBLAS_GEMM_32f_p :
10346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        type == CV_64FC1 ? (icvBLAS_GEMM_32f_t)icvBLAS_GEMM_64f_p :
10356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        type == CV_32FC2 ? (icvBLAS_GEMM_32f_t)icvBLAS_GEMM_32fc_p :
10366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        type == CV_64FC2 ? (icvBLAS_GEMM_32f_t)icvBLAS_GEMM_64fc_p : 0;
10376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
10386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
10396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( blas_func )
10406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
10416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            const char* transa = flags & CV_GEMM_A_T ? "t" : "n";
10426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            const char* transb = flags & CV_GEMM_B_T ? "t" : "n";
10436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int lda, ldb, ldd;
10446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
10456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( C->data.ptr )
10466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
10476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( C->data.ptr != D->data.ptr )
10486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
10496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    if( !(flags & CV_GEMM_C_T) )
10506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        cvCopy( C, D );
10516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    else
10526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        cvTranspose( C, D );
10536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
10546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
10556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
10566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( CV_MAT_DEPTH(type) == CV_32F )
10576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
10586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CvComplex32f _alpha, _beta;
10596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
10606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                lda = A->step/sizeof(float);
10616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ldb = b_step/sizeof(float);
10626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ldd = D->step/sizeof(float);
10636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                _alpha.re = (float)alpha;
10646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                _alpha.im = 0;
10656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                _beta.re = C->data.ptr ? (float)beta : 0;
10666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                _beta.im = 0;
10676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( CV_MAT_CN(type) == 2 )
10686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    lda /= 2, ldb /= 2, ldd /= 2;
10696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
10706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                blas_func( transb, transa, &d_size.width, &d_size.height, &len,
10716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       &_alpha, B->data.ptr, &ldb, A->data.ptr, &lda,
10726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       &_beta, D->data.ptr, &ldd );
10736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
10746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else
10756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
10766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CvComplex64f _alpha, _beta;
10776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
10786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                lda = A->step/sizeof(double);
10796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ldb = b_step/sizeof(double);
10806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ldd = D->step/sizeof(double);
10816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                _alpha.re = alpha;
10826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                _alpha.im = 0;
10836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                _beta.re = C->data.ptr ? beta : 0;
10846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                _beta.im = 0;
10856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( CV_MAT_CN(type) == 2 )
10866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    lda /= 2, ldb /= 2, ldd /= 2;
10876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
10886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                blas_func( transb, transa, &d_size.width, &d_size.height, &len,
10896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       &_alpha, B->data.ptr, &ldb, A->data.ptr, &lda,
10906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       &_beta, D->data.ptr, &ldd );
10916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
10926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
10936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        else if( ((d_size.height <= block_lin_size/2 || d_size.width <= block_lin_size/2) &&
10946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            len <= 10000) || len <= 10 ||
10956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (d_size.width <= block_lin_size && d_size.height <= block_lin_size && len <= block_lin_size) )
10966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
10976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            single_mul_func( A->data.ptr, A->step, B->data.ptr, b_step,
10986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                             C->data.ptr, C->step, D->data.ptr, D->step,
10996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                             a_size, d_size, alpha, beta, flags );
11006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
11016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        else
11026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
11036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int is_a_t = flags & CV_GEMM_A_T;
11046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int is_b_t = flags & CV_GEMM_B_T;
11056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int elem_size = CV_ELEM_SIZE(type);
11066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int dk0_1, dk0_2;
11076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int a_buf_size = 0, b_buf_size, d_buf_size;
11086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            uchar* a_buf = 0;
11096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            uchar* b_buf = 0;
11106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            uchar* d_buf = 0;
11116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int i, j, k, di = 0, dj = 0, dk = 0;
11126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int dm0, dn0, dk0;
11136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int a_step0, a_step1, b_step0, b_step1, c_step0, c_step1;
11146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int work_elem_size = elem_size << (CV_MAT_DEPTH(type) == CV_32F ? 1 : 0);
11156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CvGEMMBlockMulFunc block_mul_func = (CvGEMMBlockMulFunc)block_mul_tab.fn_2d[type];
11166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CvGEMMStoreFunc store_func = (CvGEMMStoreFunc)store_tab.fn_2d[type];
11176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            assert( block_mul_func && store_func );
11196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( !is_a_t )
11216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                a_step0 = A->step, a_step1 = elem_size;
11226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else
11236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                a_step0 = elem_size, a_step1 = A->step;
11246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( !is_b_t )
11266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                b_step0 = b_step, b_step1 = elem_size;
11276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else
11286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                b_step0 = elem_size, b_step1 = b_step;
11296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( !C->data.ptr )
11316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
11326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                c_step0 = c_step1 = 0;
11336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                flags &= ~CV_GEMM_C_T;
11346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
11356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else if( !(flags & CV_GEMM_C_T) )
11366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                c_step0 = C->step, c_step1 = elem_size;
11376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else
11386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                c_step0 = elem_size, c_step1 = C->step;
11396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dm0 = MIN( block_lin_size, d_size.height );
11416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dn0 = MIN( block_lin_size, d_size.width );
11426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dk0_1 = block_size / dm0;
11436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dk0_2 = block_size / dn0;
11446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dk0 = MAX( dk0_1, dk0_2 );
11456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dk0 = MIN( dk0, len );
11466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( dk0*dm0 > block_size )
11476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                dm0 = block_size / dk0;
11486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( dk0*dn0 > block_size )
11496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                dn0 = block_size / dk0;
11506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dk0_1 = (dn0+dn0/8+2) & -2;
11526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            b_buf_size = (dk0+dk0/8+1)*dk0_1*elem_size;
11536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            d_buf_size = (dk0+dk0/8+1)*dk0_1*work_elem_size;
11546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( is_a_t )
11566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
11576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                a_buf_size = (dm0+dm0/8+1)*((dk0+dk0/8+2)&-2)*elem_size;
11586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                flags &= ~CV_GEMM_A_T;
11596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
11606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_CALL( block_buffer = (uchar*)cvAlloc(a_buf_size + b_buf_size + d_buf_size));
11626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            d_buf = block_buffer;
11636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            b_buf = d_buf + d_buf_size;
11646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( is_a_t )
11666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                a_buf = b_buf + b_buf_size;
11676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( i = 0; i < d_size.height; i += di )
11696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
11706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                di = dm0;
11716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( i + di >= d_size.height || 8*(i + di) + di > 8*d_size.height )
11726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    di = d_size.height - i;
11736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( j = 0; j < d_size.width; j += dj )
11756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
11766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    uchar* _d = D->data.ptr + i*D->step + j*elem_size;
11776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    const uchar* _c = C->data.ptr + i*c_step0 + j*c_step1;
11786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    int _d_step = D->step;
11796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    dj = dn0;
11806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    if( j + dj >= d_size.width || 8*(j + dj) + dj > 8*d_size.width )
11826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        dj = d_size.width - j;
11836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    flags &= 15;
11856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    if( dk0 < len )
11866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
11876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        _d = d_buf;
11886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        _d_step = dj*work_elem_size;
11896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
11906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( k = 0; k < len; k += dk )
11926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
11936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        const uchar* _a = A->data.ptr + i*a_step0 + k*a_step1;
11946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        int _a_step = A->step;
11956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        const uchar* _b = B->data.ptr + k*b_step0 + j*b_step1;
11966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        int _b_step = b_step;
11976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        CvSize a_bl_size;
11986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
11996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        dk = dk0;
12006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        if( k + dk >= len || 8*(k + dk) + dk > 8*len )
12016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            dk = len - k;
12026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        if( !is_a_t )
12046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            a_bl_size.width = dk, a_bl_size.height = di;
12056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        else
12066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            a_bl_size.width = di, a_bl_size.height = dk;
12076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        if( a_buf && is_a_t )
12096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        {
12106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            int t;
12116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            _a_step = dk*elem_size;
12126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            icvGEMM_TransposeBlock( _a, A->step, a_buf, _a_step, a_bl_size, elem_size );
12136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            CV_SWAP( a_bl_size.width, a_bl_size.height, t );
12146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            _a = a_buf;
12156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        }
12166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        if( dj < d_size.width )
12186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        {
12196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            CvSize b_size;
12206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            if( !is_b_t )
12216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                b_size.width = dj, b_size.height = dk;
12226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            else
12236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                b_size.width = dk, b_size.height = dj;
12246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            _b_step = b_size.width*elem_size;
12266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            icvGEMM_CopyBlock( _b, b_step, b_buf, _b_step, b_size, elem_size );
12276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            _b = b_buf;
12286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        }
12296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        if( dk0 < len )
12316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            block_mul_func( _a, _a_step, _b, _b_step, _d, _d_step,
12326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                            a_bl_size, cvSize(dj,di), flags );
12336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        else
12346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            single_mul_func( _a, _a_step, _b, _b_step, _c, C->step, _d, _d_step,
12356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                             a_bl_size, cvSize(dj,di), alpha, beta, flags );
12366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        flags |= 16;
12376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
12386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    if( dk0 < len )
12406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        store_func( _c, C->step, _d, _d_step, D->data.ptr + i*D->step + j*elem_size,
12416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                    D->step, cvSize(dj,di), alpha, beta, flags );
12426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
12436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
12446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
12456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( D0 != D )
12476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_CALL( cvCopy( D, D0 ));
12486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
12496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __END__;
12516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( buffer && !local_alloc )
12536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        cvFree( &buffer );
12546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( block_buffer )
12556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        cvFree( &block_buffer );
12566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
12576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn/****************************************************************************************\
12606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn*                                        cvTransform                                     *
12616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn\****************************************************************************************/
12626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define  ICV_DEF_TRANSFORM_CASE_C1( arrtype, temptype, _ld_,        \
12646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                   _cast_macro1_, _cast_macro2_ )   \
12656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                   \
12666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i < size.width; i++, dst += dst_cn )                \
12676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
12686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        const double* _mat = mat;                                   \
12696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        double v0 = _ld_(src[i]);                                   \
12706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( k = 0; k < dst_cn; k++, _mat += 2 )                    \
12716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                           \
12726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            temptype t0 = _cast_macro1_(_mat[0]*v0 + _mat[1]);      \
12736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dst[k] = _cast_macro2_(t0);                             \
12746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                           \
12756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                               \
12766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    src += size.width;                                              \
12776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
12786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define  ICV_DEF_DIAG_TRANSFORM_CASE_C1( arrtype, temptype, _ld_,   \
12816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                  _cast_macro1_, _cast_macro2_ )    \
12826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i < size.width; i++ )                               \
12836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
12846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        double ft0;                                                 \
12856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t0;                                                \
12866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft0 = mat[0]*_ld_(src[i]) + mat[1];                         \
12876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t0 = _cast_macro1_(ft0);                                    \
12886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i] = _cast_macro2_(t0);                                 \
12896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
12906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
12926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define  ICV_DEF_TRANSFORM_CASE_C2( arrtype, temptype, _ld_,        \
12936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                  _cast_macro1_, _cast_macro2_ )    \
12946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennif( dst_cn == 2 )                                                   \
12956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                   \
12966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i < size.width*2; i += 2 )                          \
12976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
12986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        double ft0, ft1;                                            \
12996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t0, t1;                                            \
13006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft0 = mat[0]*_ld_(src[i]) + mat[1]*_ld_(src[i+1]) + mat[2]; \
13016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft1 = mat[3]*_ld_(src[i]) + mat[4]*_ld_(src[i+1]) + mat[5]; \
13026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t0 = _cast_macro1_(ft0);                                    \
13036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t1 = _cast_macro1_(ft1);                                    \
13046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i] = _cast_macro2_(t0);                                 \
13056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i+1] = _cast_macro2_(t1);                               \
13066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                               \
13076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    src += size.width*2; dst += size.width*2;                       \
13086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}                                                                   \
13096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennelse                                                                \
13106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i < size.width; i++, src += 2, dst += dst_cn )      \
13116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
13126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        const double* _mat = mat;                                   \
13136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        double v0 = _ld_(src[0]), v1 = src[1];                      \
13146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( k = 0; k < dst_cn; k++, _mat += 3 )                    \
13156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                           \
13166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            temptype t0 =                                           \
13176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                _cast_macro1_(_mat[0]*v0 + _mat[1]*v1 + _mat[2]);   \
13186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dst[k] = _cast_macro2_(t0);                             \
13196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                           \
13206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
13216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
13226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
13236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define  ICV_DEF_DIAG_TRANSFORM_CASE_C2( arrtype, temptype, _ld_,   \
13246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                  _cast_macro1_, _cast_macro2_ )    \
13256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i < size.width*2; i += 2 )                          \
13266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
13276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        double ft0, ft1;                                            \
13286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t0, t1;                                            \
13296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft0 = mat[0]*_ld_(src[i]) + mat[2];                         \
13306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft1 = mat[4]*_ld_(src[i+1]) + mat[5];                       \
13316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t0 = _cast_macro1_(ft0);                                    \
13326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t1 = _cast_macro1_(ft1);                                    \
13336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i] = _cast_macro2_(t0);                                 \
13346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i+1] = _cast_macro2_(t1);                               \
13356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
13366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
13376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
13386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define  ICV_DEF_TRANSFORM_CASE_C3( arrtype, temptype, _ld_,        \
13396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                  _cast_macro1_, _cast_macro2_ )    \
13406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennif( dst_cn == 3 )                                                   \
13416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                   \
13426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i < size.width*3; i += 3 )                          \
13436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
13446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        double ft0, ft1, ft2;                                       \
13456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t0, t1, t2;                                        \
13466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft0 = mat[0]*_ld_(src[i]) + mat[1]*_ld_(src[i+1]) +         \
13476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn              mat[2]*_ld_(src[i+2]) + mat[3];                       \
13486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft1 = mat[4]*_ld_(src[i]) + mat[5]*_ld_(src[i+1]) +         \
13496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn              mat[6]*_ld_(src[i+2]) + mat[7];                       \
13506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft2 = mat[8]*_ld_(src[i]) + mat[9]*_ld_(src[i+1]) +         \
13516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn              mat[10]*_ld_(src[i+2]) + mat[11];                     \
13526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t0 = _cast_macro1_(ft0);                                    \
13536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t1 = _cast_macro1_(ft1);                                    \
13546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t2 = _cast_macro1_(ft2);                                    \
13556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i] = _cast_macro2_(t0);                                 \
13566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i+1] = _cast_macro2_(t1);                               \
13576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i+2] = _cast_macro2_(t2);                               \
13586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                               \
13596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    src += size.width*3; dst += size.width*3;                       \
13606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}                                                                   \
13616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennelse if( dst_cn == 1 )                                              \
13626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                   \
13636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i < size.width; i++, src += 3 )                     \
13646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
13656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t0 = _cast_macro1_(mat[0]*_ld_(src[0]) +           \
13666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            mat[1]*_ld_(src[1]) + mat[2]*_ld_(src[2]) + mat[3]);    \
13676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i] = _cast_macro2_(t0);                                 \
13686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                               \
13696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    dst += size.width;                                              \
13706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}                                                                   \
13716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennelse                                                                \
13726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i < size.width; i++, src += 3, dst += dst_cn )      \
13736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
13746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        const double* _mat = mat;                                   \
13756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        double v0=_ld_(src[0]), v1=_ld_(src[1]), v2=_ld_(src[2]);   \
13766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( k = 0; k < dst_cn; k++, _mat += 4 )                    \
13776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                           \
13786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            temptype t0 = _cast_macro1_(_mat[0]*v0 +                \
13796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    _mat[1]*v1 + _mat[2]*v2 + _mat[3]);             \
13806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dst[k] = _cast_macro2_(t0);                             \
13816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                           \
13826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
13836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
13846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
13856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define  ICV_DEF_DIAG_TRANSFORM_CASE_C3( arrtype, temptype, _ld_,   \
13866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                  _cast_macro1_, _cast_macro2_ )    \
13876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i < size.width*3; i += 3 )                          \
13886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
13896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        double ft0, ft1, ft2;                                       \
13906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t0, t1, t2;                                        \
13916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft0 = mat[0]*_ld_(src[i]) + mat[3];                         \
13926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft1 = mat[5]*_ld_(src[i+1]) + mat[7];                       \
13936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft2 = mat[10]*_ld_(src[i+2]) + mat[11];                     \
13946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t0 = _cast_macro1_(ft0);                                    \
13956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t1 = _cast_macro1_(ft1);                                    \
13966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t2 = _cast_macro1_(ft2);                                    \
13976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i] = _cast_macro2_(t0);                                 \
13986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i+1] = _cast_macro2_(t1);                               \
13996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i+2] = _cast_macro2_(t2);                               \
14006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
14016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define  ICV_DEF_TRANSFORM_CASE_C4( arrtype, temptype, _ld_,        \
14046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                  _cast_macro1_, _cast_macro2_ )    \
14056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennfor( i = 0; i < size.width; i++, src += 4, dst += dst_cn )          \
14066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                   \
14076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    const double* _mat = mat;                                       \
14086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    double v0 = _ld_(src[0]), v1 = _ld_(src[1]),                    \
14096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn           v2 = _ld_(src[2]), v3 = _ld_(src[3]);                    \
14106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( k = 0; k < dst_cn; k++, _mat += 5 )                        \
14116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
14126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t0 =_cast_macro1_(_mat[0]*v0+_mat[1]*v1+           \
14136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                   _mat[2]*v2+_mat[3]*v3+_mat[4]);  \
14146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[k] = _cast_macro2_(t0);                                 \
14156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                               \
14166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
14176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define  ICV_DEF_DIAG_TRANSFORM_CASE_C4( arrtype, temptype, _ld_,   \
14206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                  _cast_macro1_, _cast_macro2_ )    \
14216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i < size.width*4; i += 4 )                          \
14226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
14236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        double ft0, ft1;                                            \
14246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t0, t1;                                            \
14256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft0 = mat[0]*_ld_(src[i]) + mat[4];                         \
14266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft1 = mat[6]*_ld_(src[i+1]) + mat[9];                       \
14276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t0 = _cast_macro1_(ft0);                                    \
14286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t1 = _cast_macro1_(ft1);                                    \
14296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i] = _cast_macro2_(t0);                                 \
14306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i+1] = _cast_macro2_(t1);                               \
14316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft0 = mat[12]*_ld_(src[i+2]) + mat[14];                     \
14326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ft1 = mat[18]*_ld_(src[i+3]) + mat[19];                     \
14336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t0 = _cast_macro1_(ft0);                                    \
14346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t1 = _cast_macro1_(ft1);                                    \
14356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i+2] = _cast_macro2_(t0);                               \
14366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst[i+3] = _cast_macro2_(t1);                               \
14376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
14386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define  ICV_DEF_TRANSFORM_FUNC( flavor, arrtype, temptype, _ld_,   \
14426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                 _cast_macro1_, _cast_macro2_, cn  )\
14436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                          \
14446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvTransform_##flavor( const arrtype* src, int srcstep,             \
14456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       arrtype* dst, int dststep, CvSize size,      \
14466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       const double* mat, int dst_cn )              \
14476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                   \
14486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    srcstep = srcstep/sizeof(src[0]) - size.width*cn;               \
14496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    dststep = dststep/sizeof(dst[0]) - size.width*dst_cn;           \
14506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( ; size.height--; src += srcstep, dst += dststep )          \
14516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
14526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int i, k;                                                   \
14536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ICV_DEF_TRANSFORM_CASE_C##cn( arrtype, temptype, _ld_,      \
14546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                     _cast_macro1_, _cast_macro2_ ) \
14556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                               \
14566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
14576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_OK;                                                   \
14586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
14596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define  ICV_DEF_DIAG_TRANSFORM_FUNC( flavor, arrtype, temptype, _ld_, \
14626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                 _cast_macro1_, _cast_macro2_, cn  )\
14636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                          \
14646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvDiagTransform_##flavor( const arrtype* src, int srcstep,         \
14656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       arrtype* dst, int dststep, CvSize size,      \
14666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       const double* mat )                          \
14676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                   \
14686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    srcstep /= sizeof(src[0]);                                      \
14696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    dststep /= sizeof(dst[0]);                                      \
14706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( ; size.height--; src += srcstep, dst += dststep )          \
14716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                               \
14726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int i;                                                      \
14736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ICV_DEF_DIAG_TRANSFORM_CASE_C##cn( arrtype, temptype, _ld_, \
14746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                     _cast_macro1_, _cast_macro2_ ) \
14756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                               \
14766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                    \
14776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_OK;                                                   \
14786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
14796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 8u_C1R, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U, 1 )
14826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 8u_C2R, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U, 2 )
14836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 8u_C3R, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U, 3 )
14846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 8u_C4R, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U, 4 )
14856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 16u_C1R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 1 )
14876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 16u_C2R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 2 )
14886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 16u_C3R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 3 )
14896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 16u_C4R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 4 )
14906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 16s_C1R, short, int, CV_NOP, cvRound, CV_CAST_16S, 1 )
14926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 16s_C2R, short, int, CV_NOP, cvRound, CV_CAST_16S, 2 )
14936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 16s_C3R, short, int, CV_NOP, cvRound, CV_CAST_16S, 3 )
14946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 16s_C4R, short, int, CV_NOP, cvRound, CV_CAST_16S, 4 )
14956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
14966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 32s_C1R, int, int, CV_NOP, cvRound, CV_NOP, 1 )
14976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 32s_C2R, int, int, CV_NOP, cvRound, CV_NOP, 2 )
14986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 32s_C3R, int, int, CV_NOP, cvRound, CV_NOP, 3 )
14996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 32s_C4R, int, int, CV_NOP, cvRound, CV_NOP, 4 )
15006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 32f_C1R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 1 )
15026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 32f_C2R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 2 )
15036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 32f_C3R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 3 )
15046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 32f_C4R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 4 )
15056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 64f_C1R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 1 )
15076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 64f_C2R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 2 )
15086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 64f_C3R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 3 )
15096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_TRANSFORM_FUNC( 64f_C4R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 4 )
15106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 16u_C1R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 1 )
15126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 16u_C2R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 2 )
15136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 16u_C3R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 3 )
15146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 16u_C4R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 4 )
15156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 16s_C1R, short, int, CV_NOP, cvRound, CV_CAST_16S, 1 )
15176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 16s_C2R, short, int, CV_NOP, cvRound, CV_CAST_16S, 2 )
15186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 16s_C3R, short, int, CV_NOP, cvRound, CV_CAST_16S, 3 )
15196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 16s_C4R, short, int, CV_NOP, cvRound, CV_CAST_16S, 4 )
15206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 32s_C1R, int, int, CV_NOP, cvRound, CV_NOP, 1 )
15226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 32s_C2R, int, int, CV_NOP, cvRound, CV_NOP, 2 )
15236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 32s_C3R, int, int, CV_NOP, cvRound, CV_NOP, 3 )
15246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 32s_C4R, int, int, CV_NOP, cvRound, CV_NOP, 4 )
15256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 32f_C1R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 1 )
15276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 32f_C2R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 2 )
15286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 32f_C3R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 3 )
15296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 32f_C4R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 4 )
15306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 64f_C1R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 1 )
15326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 64f_C2R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 2 )
15336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 64f_C3R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 3 )
15346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DIAG_TRANSFORM_FUNC( 64f_C4R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 4 )
15356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define icvTransform_8s_C1R 0
15376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define icvTransform_8s_C2R 0
15386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define icvTransform_8s_C3R 0
15396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define icvTransform_8s_C4R 0
15406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define icvDiagTransform_8s_C1R 0
15426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define icvDiagTransform_8s_C2R 0
15436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define icvDiagTransform_8s_C3R 0
15446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define icvDiagTransform_8s_C4R 0
15456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define icvDiagTransform_8u_C1R 0
15476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define icvDiagTransform_8u_C2R 0
15486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define icvDiagTransform_8u_C3R 0
15496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define icvDiagTransform_8u_C4R 0
15506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennCV_DEF_INIT_BIG_FUNC_TAB_2D( Transform, R )
15526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennCV_DEF_INIT_BIG_FUNC_TAB_2D( DiagTransform, R )
15536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renntypedef CvStatus (CV_STDCALL * CvTransformFunc)(
15556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       const void* src, int srcstep,
15566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       void* dst, int dststep, CvSize size,
15576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       const void* mat, int dst_cn );
15586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renntypedef CvStatus (CV_STDCALL * CvDiagTransformFunc)(
15606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       const void* src, int srcstep,
15616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       void* dst, int dststep, CvSize size,
15626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       const void* mat );
15636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renntypedef CvStatus (CV_STDCALL * CvDiagTransformFunc)(
15656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       const void* src, int srcstep,
15666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       void* dst, int dststep, CvSize size,
15676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       const void* mat );
15686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn///////////////////// IPP transform functions //////////////////
15706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvColorTwist_8u_C3R_t icvColorTwist_8u_C3R_p = 0;
15726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvColorTwist_16u_C3R_t icvColorTwist_16u_C3R_p = 0;
15736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvColorTwist_16s_C3R_t icvColorTwist_16s_C3R_p = 0;
15746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvColorTwist_32f_C3R_t icvColorTwist_32f_C3R_p = 0;
15756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvColorTwist_32f_C4R_t icvColorTwist_32f_C4R_p = 0;
15766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvColorToGray_8u_C3C1R_t icvColorToGray_8u_C3C1R_p = 0;
15786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvColorToGray_16u_C3C1R_t icvColorToGray_16u_C3C1R_p = 0;
15796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvColorToGray_16s_C3C1R_t icvColorToGray_16s_C3C1R_p = 0;
15806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvColorToGray_32f_C3C1R_t icvColorToGray_32f_C3C1R_p = 0;
15816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvColorToGray_8u_AC4C1R_t icvColorToGray_8u_AC4C1R_p = 0;
15836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvColorToGray_16u_AC4C1R_t icvColorToGray_16u_AC4C1R_p = 0;
15846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvColorToGray_16s_AC4C1R_t icvColorToGray_16s_AC4C1R_p = 0;
15856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvColorToGray_32f_AC4C1R_t icvColorToGray_32f_AC4C1R_p = 0;
15866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renntypedef CvStatus (CV_STDCALL * CvColorTwistIPPFunc)( const void* src, int srcstep,
15886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        void* dst, int dststep, CvSize size, const float* coeffs );
15896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn////////////////////////////////////////////////////////////////
15916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
15926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennCV_IMPL void
15936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RenncvTransform( const CvArr* srcarr, CvArr* dstarr,
15946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn             const CvMat* transmat, const CvMat* shiftvec )
15956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
15966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static CvBigFuncTable transform_tab, diag_transform_tab;
15976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static int inittab = 0;
15986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat* lut = 0;
15996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CV_FUNCNAME( "cvTransform" );
16016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __BEGIN__;
16036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat srcstub, *src = (CvMat*)srcarr;
16056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat dststub, *dst = (CvMat*)dstarr;
16066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat rotstub, *rot = (CvMat*)transmat;
16076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat shiftstub, *shift = (CvMat*)shiftvec;
16086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvSeq *src_seq = 0, *dst_seq = 0;
16096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvSeq hdr; // need only one copy of stub header & seqblock (either for src or dst)
16106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvSeqBlock block_hdr;
16116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int i, j, type, cn, dst_cn;
16126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int coi = 0, coi2 = 0;
16136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    double* buffer = (double*)cvStackAlloc( CV_CN_MAX*(CV_CN_MAX+1)*sizeof(buffer[0]) );
16146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !inittab )
16166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
16176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        icvInitTransformRTable( &transform_tab );
16186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        icvInitDiagTransformRTable( &diag_transform_tab );
16196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        inittab = 1;
16206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
16216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( CV_IS_SEQ( src ))
16236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
16246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        src_seq = (CvSeq*)src;
16256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( CV_ELEM_SIZE(src_seq->flags) != src_seq->elem_size )
16266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnsupportedFormat, "Unsupported type of sequence elements" );
16276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
16286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else
16296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( src = cvGetMat( src, &srcstub, &coi ));
16306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( CV_IS_SEQ( dst ))
16326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
16336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dst_seq = (CvSeq*)dst;
16346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( CV_ELEM_SIZE(dst_seq->flags) != dst_seq->elem_size )
16356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnsupportedFormat, "Unsupported type of sequence elements" );
16366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
16376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else
16386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( dst = cvGetMat( dst, &dststub, &coi2 ));
16396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( coi != 0 || coi2 != 0 )
16416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_BadCOI, "" );
16426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_ARE_DEPTHS_EQ(src, dst) )
16446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedFormats, "" );
16456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( src_seq || dst_seq )
16476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
16486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !src_seq )
16496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
16506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( CV_IS_MAT_CONT(src->type) || (src->rows != 1 && src->cols != 1) )
16516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CV_ERROR( CV_StsBadSize, "if eigher the source or destination is a sequence, "
16526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                "the other array must be also a sequence of continous 1d vector" );
16536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            src_seq = cvMakeSeqHeaderForArray( CV_MAT_TYPE(src->type), sizeof(hdr),
16546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                       CV_ELEM_SIZE(src->type), src->data.ptr,
16556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                       src->rows + src->cols + 1, &hdr, &block_hdr );
16566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
16576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !dst_seq )
16596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
16606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( CV_IS_MAT_CONT(dst->type) || (dst->rows != 1 && dst->cols != 1) )
16616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CV_ERROR( CV_StsBadSize, "if eigher the source or destination is a sequence, "
16626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                "the other array must be also a sequence of continous 1d vector" );
16636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( dst->rows + dst->cols - 1 != src_seq->total )
16646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CV_ERROR( CV_StsUnmatchedFormats,
16656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                "source sequence and destination vector have different sizes" );
16666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dst_seq = cvMakeSeqHeaderForArray( CV_MAT_TYPE(dst->type), sizeof(hdr),
16676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                           CV_ELEM_SIZE(dst->type), dst->data.ptr,
16686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                           dst->rows + dst->cols + 1, &hdr, &block_hdr );
16696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
16706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        else if( dst_seq->total != src_seq->total )
16716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
16726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( dst_seq->total > src_seq->total )
16736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                cvSeqPopMulti( dst_seq, 0, dst_seq->total - src_seq->total );
16746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else
16756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                cvSeqPushMulti( dst_seq, 0, src_seq->total - dst_seq->total );
16766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
16776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
16786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else if( !CV_ARE_SIZES_EQ( src, dst ))
16796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedSizes, "" );
16806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    type = CV_MAT_TYPE( src->type );
16826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    cn = CV_MAT_CN( type );
16836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    dst_cn = CV_MAT_CN( dst->type );
16846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( cn > 4 || dst_cn > 4 )
16866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsOutOfRange, "Both input and output array must have at most 4 channels" );
16876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT( rot ))
16896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( rot = cvGetMat( rot, &rotstub, &coi ));
16906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( rot->rows != dst_cn )
16926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsBadSize,
16936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        "The height of transmat matrix must be equal to number of channels" );
16946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
16956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( rot->cols == cn + 1 || rot->cols == cn )
16966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
16976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( CV_MAT_TYPE( rot->type ) == CV_64FC1 )
16986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
16996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( i = 0; i < dst_cn; i++ )
17006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
17016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                buffer[i*(cn+1) + cn] = 0;
17026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( j = 0; j < rot->cols; j++ )
17036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    buffer[i*(cn+1) + j] = ((double*)(rot->data.ptr + rot->step*i))[j];
17046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
17056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
17066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        else if( CV_MAT_TYPE( rot->type ) == CV_32FC1 )
17076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
17086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( i = 0; i < dst_cn; i++ )
17096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
17106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                buffer[i*(cn+1) + cn] = 0;
17116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( j = 0; j < rot->cols; j++ )
17126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    buffer[i*(cn+1) + j] = ((float*)(rot->data.ptr + rot->step*i))[j];
17136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
17146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
17156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        else
17166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnsupportedFormat, "Rotation matrix must be 32fC1 or 64fC1" );
17176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
17186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else
17196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedSizes, "If the source array has <cn> channels, "
17206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn           "the transformation matrix must have <cn> x <cn>+1 or <cn> x <cn> size" );
17216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
17226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( shift )
17236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
17246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !CV_IS_MAT( shift ))
17256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_CALL( shift = cvGetMat( shift, &shiftstub, &coi ));
17266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
17276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( CV_MAT_CN( shift->type ) * shift->cols * shift->rows == dst_cn &&
17286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (shift->rows == 1 || shift->cols == 1) )
17296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
17306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( CV_MAT_DEPTH( shift->type ) == CV_64F )
17316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
17326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                int step = shift->step ? shift->step/sizeof(double) : 1;
17336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( i = 0; i < dst_cn; i++ )
17346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    buffer[i*(cn+1) + cn] += shift->data.db[i*step];
17356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
17366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else if( CV_MAT_DEPTH( shift->type ) == CV_32F )
17376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
17386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                int step = shift->step ? shift->step/sizeof(float) : 1;
17396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( i = 0; i < dst_cn; i++ )
17406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    buffer[i*(cn+1) + cn] += shift->data.fl[i*step];
17416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
17426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else
17436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CV_ERROR( CV_StsUnsupportedFormat, "Shift vector must be 32f or 64f" );
17446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
17456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        else
17466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
17476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnmatchedSizes,
17486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                "Shift (if present) must be 1 dimensional vector with the number "
17496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                "of elements equal to number of channels in the processed array" );
17506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
17516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
17526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
17536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( coi != 0 || coi2 != 0 )
17546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_BadCOI, "" );
17556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
17566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
17576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CvTransformFunc func = (CvTransformFunc)(transform_tab.fn_2d[type]);
17586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CvDiagTransformFunc diag_func = 0;
17596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CvLUT_TransformFunc lut_func = 0;
17606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int diag_transform = 0;
17616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CvColorTwistIPPFunc ipp_func = 0;
17626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CvSize size;
17636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        float* ipp_coeffs = (float*)cvStackAlloc( 16*sizeof(ipp_coeffs[0]) );
17646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
17656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !func )
17666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnsupportedFormat, "" );
17676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
17686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( cn == dst_cn )
17696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            ipp_func = type == CV_8UC3 ? icvColorTwist_8u_C3R_p :
17706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       type == CV_16UC3 ? icvColorTwist_16u_C3R_p :
17716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       type == CV_16SC3 ? icvColorTwist_16s_C3R_p :
17726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       type == CV_32FC3 ? icvColorTwist_32f_C3R_p :
17736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       type == CV_32FC4 && fabs(buffer[4]) < DBL_EPSILON &&
17746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       fabs(buffer[9]) < DBL_EPSILON && fabs(buffer[14]) < DBL_EPSILON &&
17756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       fabs(buffer[19]) < DBL_EPSILON ? icvColorTwist_32f_C4R_p : 0;
17766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        else if( dst_cn == 1 && (cn == 3 || cn == 4) &&
17776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                 buffer[0] >= 0 && buffer[1] >= 0 && buffer[2] >= 0 &&
17786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                 buffer[0] + buffer[1] + buffer[2] <= 1.01 &&
17796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                 fabs(buffer[3]) < DBL_EPSILON && (cn == 3 || fabs(buffer[4]) < DBL_EPSILON) )
17806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
17816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( cn == 3 )
17826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ipp_func = type == CV_8UC3 ? icvColorToGray_8u_C3C1R_p :
17836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                           type == CV_16UC3 ? icvColorToGray_16u_C3C1R_p :
17846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                           type == CV_16SC3 ? icvColorToGray_16s_C3C1R_p :
17856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                           type == CV_32FC3 ? icvColorToGray_32f_C3C1R_p : 0;
17866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else
17876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ipp_func = type == CV_8UC4 ? icvColorToGray_8u_AC4C1R_p :
17886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                           type == CV_16UC4 ? icvColorToGray_16u_AC4C1R_p :
17896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                           type == CV_16SC4 ? icvColorToGray_16s_AC4C1R_p :
17906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                           type == CV_32FC4 ? icvColorToGray_32f_AC4C1R_p : 0;
17916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
17926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
17936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( dst_cn == cn )
17946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
17956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            diag_transform = 1;
17966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( i = 0; i < dst_cn; i++ )
17976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( j = 0; j < cn; j++ )
17986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
17996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    if( i != j && fabs(buffer[i*(cn+1) + j]) > DBL_EPSILON )
18006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
18016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        diag_transform = 0;
18026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        break;
18036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
18046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
18056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
18066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( diag_transform )
18076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
18086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( CV_MAT_DEPTH(type) == CV_8U )
18096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
18106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    CV_CALL( lut = cvCreateMat( 1, 256, type ));
18116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    for( i = 0; i < cn; i++ )
18126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    {
18136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        double a = buffer[i*(cn+1) + i], b = buffer[i*(cn+1) + cn];
18146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        uchar* ltab = lut->data.ptr;
18156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        for( j = 0; j < 256; j++ )
18166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        {
18176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            int t = cvRound(a*j + b);
18186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            ltab[j*cn + i] = CV_CAST_8U(t);
18196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        }
18206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    }
18216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    lut_func = cn == 1 ? (CvLUT_TransformFunc)icvLUT_Transform8u_8u_C1R :
18226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                               cn == 2 ? (CvLUT_TransformFunc)icvLUT_Transform8u_8u_C2R :
18236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                               cn == 3 ? (CvLUT_TransformFunc)icvLUT_Transform8u_8u_C3R :
18246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                               (CvLUT_TransformFunc)icvLUT_Transform8u_8u_C4R;
18256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
18266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else
18276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    diag_func = (CvDiagTransformFunc)(diag_transform_tab.fn_2d[type]);
18286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
18296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
18306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
18316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( ipp_func )
18326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
18336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            const double* ptr = buffer;
18346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
18356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            // fill cn x 4 ipp_coeffs array
18366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( i = 0; i < cn*4; i += 4, ptr += cn+1 )
18376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
18386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                float t0 = (float)ptr[0];
18396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                float t1 = (float)ptr[1];
18406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ipp_coeffs[i] = t0;
18416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ipp_coeffs[i+1] = t1;
18426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                t0 = (float)ptr[2];
18436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                t1 = (float)ptr[3];
18446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ipp_coeffs[i+2] = t0;
18456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                ipp_coeffs[i+3] = t1;
18466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
18476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
18486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
18496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !src_seq )
18506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
18516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int srcstep = src->step;
18526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int dststep = dst->step;
18536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            size = cvGetMatSize( src );
18546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
18556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( CV_IS_MAT_CONT( src->type & dst->type ))
18566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
18576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                size.width *= size.height;
18586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                size.height = 1;
18596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                srcstep = dststep = CV_STUB_STEP;
18606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
18616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
18626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( lut_func )
18636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                lut_func( src->data.ptr, src->step, dst->data.ptr,
18646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                          dst->step, size, lut->data.ptr );
18656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else if( ipp_func )
18666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
18676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                IPPI_CALL( ipp_func( src->data.ptr, srcstep, dst->data.ptr,
18686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                     dststep, size, ipp_coeffs ));
18696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
18706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else if( diag_transform )
18716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                diag_func( src->data.ptr, src->step, dst->data.ptr,
18726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                           dst->step, size, buffer );
18736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else
18746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                func( src->data.ptr, src->step, dst->data.ptr,
18756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                      dst->step, size, buffer, dst_cn );
18766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
18776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        else
18786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
18796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CvSeqBlock* src_block = src_seq->first;
18806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CvSeqBlock* dst_block = dst_seq->first;
18816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int src_idx = 0, dst_idx = 0;
18826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int src_elem_size = CV_ELEM_SIZE(src_seq->flags);
18836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int dst_elem_size = CV_ELEM_SIZE(dst_seq->flags);
18846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
18856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( i = src_seq->total; i > 0; )
18866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
18876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                int src_len = src_block->count - src_idx;
18886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                int dst_len = dst_block->count - dst_idx;
18896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const void* srcptr = src_block->data + src_idx*src_elem_size;
18906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                void* dstptr = dst_block->data + dst_idx*dst_elem_size;
18916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                src_len = MIN(src_len, dst_len);
18926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
18936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( lut_func )
18946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    lut_func( srcptr, CV_STUB_STEP, dstptr, CV_STUB_STEP,
18956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                              cvSize( src_len, 1 ), lut->data.ptr );
18966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else if( ipp_func )
18976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
18986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    IPPI_CALL( ipp_func( srcptr, CV_STUB_STEP, dstptr, CV_STUB_STEP,
18996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                         cvSize( src_len, 1 ), ipp_coeffs ));
19006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
19016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else if( diag_transform )
19026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    diag_func( srcptr, CV_STUB_STEP, dstptr, CV_STUB_STEP,
19036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                               cvSize( src_len, 1 ), buffer );
19046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else
19056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    func( srcptr, CV_STUB_STEP, dstptr, CV_STUB_STEP,
19066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                          cvSize( src_len, 1 ), buffer, dst_cn );
19076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
19086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( (src_idx += src_len) == src_block->count )
19096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    src_block = src_block->next, src_idx = 0;
19106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( (dst_idx += src_len) == dst_block->count )
19116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    dst_block = dst_block->next, dst_idx = 0;
19126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                i -= src_len;
19136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
19146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
19156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
19166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
19176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __END__;
19186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
19196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    cvReleaseMat( &lut );
19206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
19216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
19226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
19236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn/****************************************************************************************\
19246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn*                                        cvPerspectiveTransform                          *
19256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn\****************************************************************************************/
19266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
19276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define ICV_PERSPECTIVE_TRANSFORM_FUNC_2( flavor, arrtype )                             \
19286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                                              \
19296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvPerspectiveTransform_##flavor##_C2R( const arrtype* src, int srcstep,                \
19306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                        arrtype* dst, int dststep,                      \
19316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                        CvSize size, const double* mat )                \
19326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                                       \
19336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int i;                                                                              \
19346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    size.width *= 2;                                                                    \
19356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    srcstep /= sizeof(src[0]); dststep /= sizeof(dst[0]);                               \
19366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                        \
19376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( ; size.height--; src += srcstep, dst += dststep )                              \
19386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                                   \
19396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < size.width; i += 2 )                                            \
19406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                               \
19416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            arrtype x = src[i], y = src[i + 1];                                         \
19426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            double w = x*mat[6] + y*mat[7] + mat[8];                                    \
19436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                        \
19446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( fabs(w) > FLT_EPSILON )                                                 \
19456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                                           \
19466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                w = 1./w;                                                               \
19476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                dst[i] = (arrtype)((x*mat[0] + y*mat[1] + mat[2]) * w);                 \
19486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                dst[i+1] = (arrtype)((x*mat[3] + y*mat[4] + mat[5]) * w);               \
19496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                                           \
19506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else                                                                        \
19516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                                           \
19526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                dst[i] = (arrtype)0;                                                    \
19536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                dst[i+1] = (arrtype)0;                                                  \
19546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                                           \
19556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                               \
19566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                                   \
19576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                        \
19586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_OK;                                                                       \
19596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
19606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
19616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
19626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define ICV_PERSPECTIVE_TRANSFORM_FUNC_3( flavor, arrtype )                             \
19636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                                              \
19646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvPerspectiveTransform_##flavor##_C3R( const arrtype* src, int srcstep,                \
19656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                             arrtype* dst, int dststep,                 \
19666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                             CvSize size, const double* mat )           \
19676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                                       \
19686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int i;                                                                              \
19696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    size.width *= 3;                                                                    \
19706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    srcstep /= sizeof(src[0]); dststep /= sizeof(dst[0]);                               \
19716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                        \
19726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( ; size.height--; src += srcstep, dst += dststep )                              \
19736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                                   \
19746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < size.width; i += 3 )                                            \
19756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                               \
19766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            arrtype x = src[i], y = src[i + 1], z = src[i + 2];                         \
19776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            double w = x*mat[12] + y*mat[13] + z*mat[14] + mat[15];                     \
19786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                        \
19796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( fabs(w) > FLT_EPSILON )                                                 \
19806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                                           \
19816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                w = 1./w;                                                               \
19826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                dst[i] = (arrtype)((x*mat[0] + y*mat[1] + z*mat[2] + mat[3]) * w);      \
19836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                dst[i+1] = (arrtype)((x*mat[4] + y*mat[5] + z*mat[6] + mat[7]) * w);    \
19846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                dst[i+2] = (arrtype)((x*mat[8] + y*mat[9] + z*mat[10] + mat[11]) * w);  \
19856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                                           \
19866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else                                                                        \
19876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                                           \
19886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                dst[i] = (arrtype)0;                                                    \
19896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                dst[i+1] = (arrtype)0;                                                  \
19906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                dst[i+2] = (arrtype)0;                                                  \
19916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                                           \
19926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                               \
19936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                                   \
19946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                        \
19956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_OK;                                                                       \
19966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
19976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
19986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_PERSPECTIVE_TRANSFORM_FUNC_2( 32f, float )
19996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_PERSPECTIVE_TRANSFORM_FUNC_2( 64f, double )
20006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_PERSPECTIVE_TRANSFORM_FUNC_3( 32f, float )
20016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_PERSPECTIVE_TRANSFORM_FUNC_3( 64f, double )
20026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic void icvInitPerspectiveTransformTable( CvFuncTable* tab2, CvFuncTable* tab3 )\
20046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                                   \
20056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tab2->fn_2d[CV_32F] = (void*)icvPerspectiveTransform_32f_C2R;                   \
20066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tab2->fn_2d[CV_64F] = (void*)icvPerspectiveTransform_64f_C2R;                   \
20076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tab3->fn_2d[CV_32F] = (void*)icvPerspectiveTransform_32f_C3R;                   \
20086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tab3->fn_2d[CV_64F] = (void*)icvPerspectiveTransform_64f_C3R;                   \
20096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
20106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennCV_IMPL void
20136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RenncvPerspectiveTransform( const CvArr* srcarr, CvArr* dstarr, const CvMat* mat )
20146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
20156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static CvFuncTable tab[2];
20166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static int inittab = 0;
20176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    double buffer[16];
20186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CV_FUNCNAME( "cvPerspectiveProject" );
20206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __BEGIN__;
20226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat sstub, *src = (CvMat*)srcarr;
20246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat dstub, *dst = (CvMat*)dstarr;
20256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int i, j, type, cn;
20266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvFunc2D_2A1P func = 0;
20276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvSize size;
20286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !inittab )
20306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
20316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        icvInitPerspectiveTransformTable( &tab[0], &tab[1] );
20326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        inittab = 1;
20336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
20346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT( src ))
20366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
20376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int coi = 0;
20386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( src = cvGetMat( src, &sstub, &coi ));
20396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( coi != 0 )
20416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_BadCOI, "" );
20426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
20436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT( dst ))
20456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
20466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int coi = 0;
20476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( dst = cvGetMat( dst, &dstub, &coi ));
20486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( coi != 0 )
20506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_BadCOI, "" );
20516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
20526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_ARE_TYPES_EQ( src, dst ))
20546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedFormats, "" );
20556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_ARE_SIZES_EQ( src, dst ))
20576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedSizes, "" );
20586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    type = CV_MAT_TYPE( src->type );
20606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    cn = CV_MAT_CN( type );
20616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( cn != 2 && cn != 3 )
20636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_BadNumChannels, cvUnsupportedFormat );
20646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT( mat ))
20666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsBadArg, "Invalid transformation matrix" );
20676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( mat->rows != cn + 1 && mat->cols != mat->rows )
20696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsBadSize,
20706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        "The size of transform matrix must be equal to number of channels" );
20716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( CV_MAT_TYPE( mat->type ) == CV_64FC1 )
20736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
20746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i <= cn; i++ )
20756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
20766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j <= cn; j++ )
20776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                buffer[i*(cn+1) + j] = ((double*)(mat->data.ptr + mat->step*i))[j];
20786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
20796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
20806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else if( CV_MAT_TYPE( mat->type ) == CV_32FC1 )
20816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
20826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i <= cn; i++ )
20836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
20846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = 0; j <= cn; j++ )
20856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                buffer[i*(cn+1) + j] = ((float*)(mat->data.ptr + mat->step*i))[j];
20866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
20876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
20886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else
20896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
20906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnsupportedFormat, "Rotation matrix must be 32fC1 or 64fC1" );
20916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
20926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    func = (CvFunc2D_2A1P)tab[cn == 3].fn_2d[CV_MAT_DEPTH(type)];
20946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !func )
20966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnsupportedFormat, "" );
20976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
20986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    size = cvGetMatSize( src );
20996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
21006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( CV_IS_MAT_CONT( src->type & dst->type ))
21016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
21026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        size.width *= size.height;
21036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        size.height = 1;
21046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
21056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
21066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    IPPI_CALL( func( src->data.ptr, src->step, dst->data.ptr, dst->step, size, buffer));
21076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
21086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CV_CHECK_NANS( dst );
21096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
21106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __END__;
21116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
21126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
21136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
21146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn/****************************************************************************************\
21156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn*                                       cvScaleAdd                                       *
21166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn\****************************************************************************************/
21176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
21186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define  ICV_DEF_MULADDC_CASE_C1( arrtype, temptype, src1, src2, dst, len )     \
21196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                               \
21206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int i;                                                                      \
21216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
21226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i <= (len) - 4; i += 4 )                                        \
21236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                           \
21246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t0 = (src1)[i]*s0 + (src2)[i];                                 \
21256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t1 = (src1)[i+1]*s0 + (src2)[i+1];                             \
21266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
21276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        (dst)[i] = (arrtype)t0;                                                 \
21286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        (dst)[i+1] = (arrtype)t1;                                               \
21296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
21306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t0 = (src1)[i+2]*s0 + (src2)[i+2];                                      \
21316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t1 = (src1)[i+3]*s0 + (src2)[i+3];                                      \
21326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
21336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        (dst)[i+2] = (arrtype)t0;                                               \
21346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        (dst)[i+3] = (arrtype)t1;                                               \
21356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                           \
21366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
21376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( ; i < (len); i++ )                                                     \
21386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                           \
21396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t0 = (src1)[i]*s0 + (src2)[i];                                 \
21406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        (dst)[i] = (arrtype)t0;                                                 \
21416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                           \
21426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
21436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
21446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
21456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define  ICV_DEF_MULADDC_CASE_C2( arrtype, temptype, src1, src2, dst, len )     \
21466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                               \
21476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int i;                                                                      \
21486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
21496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i <= (len) - 4; i += 4 )                                        \
21506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                           \
21516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t0 = (src1)[i]*s0 - (src1)[i+1]*s1 + (src2)[i];                \
21526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t1 = (src1)[i]*s1 + (src1)[i+1]*s0 + (src2)[i+1];              \
21536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
21546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        (dst)[i] = (arrtype)t0;                                                 \
21556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        (dst)[i+1] = (arrtype)t1;                                               \
21566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
21576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t0 = (src1)[i+2]*s0 - (src1)[i+3]*s1 + (src2)[i+2];                     \
21586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        t1 = (src1)[i+2]*s1 + (src1)[i+3]*s0 + (src2)[i+3];                     \
21596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
21606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        (dst)[i+2] = (arrtype)t0;                                               \
21616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        (dst)[i+3] = (arrtype)t1;                                               \
21626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                           \
21636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
21646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( ; i < (len); i += 2 )                                                  \
21656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                           \
21666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t0 = (src1)[i]*s0 - (src1)[i+1]*s1 + (src2)[i];                \
21676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        temptype t1 = (src1)[i]*s1 + (src1)[i+1]*s0 + (src2)[i+1];              \
21686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
21696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        (dst)[i] = (arrtype)t0;                                                 \
21706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        (dst)[i+1] = (arrtype)t1;                                               \
21716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                           \
21726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
21736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
21746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
21756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define  ICV_DEF_MULADDS_FUNC( flavor, arrtype, scalartype, entry, cn )     \
21766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                                  \
21776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvMulAddC_##flavor( const arrtype* src1, int srcstep1,                     \
21786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                      const arrtype* src2, int srcstep2,                    \
21796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                      arrtype* dst, int dststep, CvSize size,               \
21806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                      const scalartype* scalar )                            \
21816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                           \
21826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    entry(scalartype);                                                      \
21836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    size.width *= (cn);                                                     \
21846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    srcstep1 /= sizeof(src1[0]); srcstep2 /= sizeof(src2[0]);               \
21856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    dststep /= sizeof(dst[0]);                                              \
21866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
21876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( ; size.height--; src1+=srcstep1, src2+=srcstep2, dst+=dststep )    \
21886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                       \
21896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        ICV_DEF_MULADDC_CASE_C##cn( arrtype, scalartype, src1, src2,        \
21906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                    dst, size.width )                       \
21916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                       \
21926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                            \
21936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_OK;                                                           \
21946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
21956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
21966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
21976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULADDS_FUNC( 32f_C1R, float, double, CV_UN_ENTRY_C1, 1 )
21986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULADDS_FUNC( 32f_C2R, float, double, CV_UN_ENTRY_C2, 2 )
21996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULADDS_FUNC( 64f_C1R, double, double, CV_UN_ENTRY_C1, 1 )
22006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULADDS_FUNC( 64f_C2R, double, double, CV_UN_ENTRY_C2, 2 )
22016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic void
22046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvInitMulAddCTable( CvBigFuncTable* tab )
22056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
22066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tab->fn_2d[CV_32FC1] = (void*)icvMulAddC_32f_C1R;
22076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tab->fn_2d[CV_32FC2] = (void*)icvMulAddC_32f_C2R;
22086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tab->fn_2d[CV_64FC1] = (void*)icvMulAddC_64f_C1R;
22096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tab->fn_2d[CV_64FC2] = (void*)icvMulAddC_64f_C2R;
22106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
22116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennCV_IMPL void
22146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RenncvScaleAdd( const CvArr* srcarr1, CvScalar scale,
22156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            const CvArr* srcarr2, CvArr* dstarr )
22166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
22176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static CvBigFuncTable muladds_tab;
22186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static int inittab = 0;
22196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CV_FUNCNAME( "cvScaleAdd" );
22216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __BEGIN__;
22236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat stub1, *src1 = (CvMat*)srcarr1;
22256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat stub2, *src2 = (CvMat*)srcarr2;
22266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat stub, *dst = (CvMat*)dstarr;
22276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvSize size;
22286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int type;
22296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT( src1 ) || !CV_IS_MAT(src2) || !CV_IS_MAT(dst))
22316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
22326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int coi1 = 0, coi2 = 0, coi3 = 0;
22336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( src1 = cvGetMat( src1, &stub1, &coi1 ));
22346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( src2 = cvGetMat( src2, &stub2, &coi2 ));
22356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( dst = cvGetMat( dst, &stub, &coi3 ));
22366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( coi1 + coi2 + coi3 != 0 )
22386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_BadCOI, "" );
22396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
22406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_ARE_TYPES_EQ( src1, dst ) || !CV_ARE_TYPES_EQ( src2, dst ))
22426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedFormats, "" );
22436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_ARE_SIZES_EQ( src1, dst ) || !CV_ARE_SIZES_EQ( src2, dst ))
22456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedSizes, "" );
22466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    type = CV_MAT_TYPE( src1->type );
22486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    size = cvGetMatSize( src1 );
22496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( CV_IS_MAT_CONT( src1->type & src2->type & dst->type ))
22516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
22526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        size.width *= size.height;
22536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( size.width <= CV_MAX_INLINE_MAT_OP_SIZE )
22556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
22566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( type == CV_32FC1 )
22576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
22586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                float* mA = src1->data.fl;
22596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                float* mB = src2->data.fl;
22606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                float* mC = dst->data.fl;
22616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                do
22636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
22646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    mC[size.width - 1] = (float)(mA[size.width - 1]*scale.val[0] +
22656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                         mB[size.width - 1]);
22666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
22676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                while( --size.width );
22686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                EXIT;
22706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
22716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( type == CV_64FC1 )
22736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
22746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double* mA = src1->data.db;
22756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double* mB = src2->data.db;
22766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double* mC = dst->data.db;
22776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                do
22796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
22806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    mC[size.width - 1] = mA[size.width - 1]*scale.val[0] +
22816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                         mB[size.width - 1];
22826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
22836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                while( --size.width );
22846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                EXIT;
22866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
22876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
22886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        size.height = 1;
22906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
22916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !inittab )
22936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
22946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        icvInitMulAddCTable( &muladds_tab );
22956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        inittab = 1;
22966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
22976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
22986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( CV_MAT_CN(type) > 2 )
22996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsOutOfRange, "The function only supports 1- and 2-channel arrays" );
23006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
23016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
23026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CvFunc2D_3A1P func = (CvFunc2D_3A1P)(muladds_tab.fn_2d[type]);
23036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
23046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !func )
23056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnsupportedFormat, "" );
23066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
23076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        IPPI_CALL( func( src1->data.ptr, src1->step, src2->data.ptr, src2->step,
23086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                         dst->data.ptr, dst->step, size, scale.val ));
23096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
23106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
23116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CV_CHECK_NANS( dst );
23126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
23136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __END__;
23146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
23156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
23166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
23176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn/****************************************************************************************\
23186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn*                                    cvCalcCovarMatrix                                   *
23196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn\****************************************************************************************/
23206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
23216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define ICV_DOT_PRODUCT_CASE( flavor, srctype, avgtype, load_macro )                    \
23226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                                              \
23236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvDotProductShifted_##flavor##_C1R( const srctype* vec1, int vecstep1,                 \
23246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                     const srctype* vec2, int vecstep2,                 \
23256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                     const avgtype* avg, int avgstep,                   \
23266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                     CvSize size, double* _result )                     \
23276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                                       \
23286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    double result = 0;                                                                  \
23296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    vecstep1 /= sizeof(vec1[0]); vecstep2 /= sizeof(vec2[0]); avgstep /= sizeof(avg[0]);\
23306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                        \
23316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( ; size.height--; vec1 += vecstep1, vec2 += vecstep2, avg += avgstep )          \
23326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                                   \
23336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int x;                                                                          \
23346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( x = 0; x <= size.width - 4; x += 4 )                                       \
23356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            result += (load_macro(vec1[x]) - avg[x])*(load_macro(vec2[x]) - avg[x]) +   \
23366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                (load_macro(vec1[x+1]) - avg[x+1])*(load_macro(vec2[x+1]) - avg[x+1]) + \
23376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                (load_macro(vec1[x+2]) - avg[x+2])*(load_macro(vec2[x+2]) - avg[x+2]) + \
23386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                (load_macro(vec1[x+3]) - avg[x+3])*(load_macro(vec2[x+3]) - avg[x+3]);  \
23396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( ; x < size.width; x++ )                                                    \
23406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            result += (load_macro(vec1[x]) - avg[x])*(load_macro(vec2[x]) - avg[x]);    \
23416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                                   \
23426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                        \
23436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    *_result = result;                                                                  \
23446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_OK;                                                                       \
23456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
23466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
23476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
23486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DOT_PRODUCT_CASE( 8u32f, uchar, float, CV_8TO32F )
23496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DOT_PRODUCT_CASE( 8u64f, uchar, double, CV_8TO32F )
23506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DOT_PRODUCT_CASE( 16u32f, ushort, float, CV_NOP )
23516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DOT_PRODUCT_CASE( 16u64f, ushort, double, CV_NOP )
23526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DOT_PRODUCT_CASE( 16s32f, short, float, CV_NOP )
23536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DOT_PRODUCT_CASE( 16s64f, short, double, CV_NOP )
23546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DOT_PRODUCT_CASE( 32f, float, float, CV_NOP )
23556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DOT_PRODUCT_CASE( 32f64f, float, double, CV_NOP )
23566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DOT_PRODUCT_CASE( 64f, double, double, CV_NOP )
23576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
23586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic void  icvInitDotProductShiftedTable( CvFuncTable* tabfl, CvFuncTable* tabdb )
23596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
23606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_8U] = (void*)icvDotProductShifted_8u32f_C1R;
23616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_8S] = 0;
23626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_16U] = (void*)icvDotProductShifted_16u32f_C1R;
23636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_16S] = (void*)icvDotProductShifted_16s32f_C1R;
23646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_32S] = 0;
23656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_32F] = (void*)icvDotProductShifted_32f_C1R;
23666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_64F] = 0;
23676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
23686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_8U] = (void*)icvDotProductShifted_8u64f_C1R;
23696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_8S] = 0;
23706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_16U] = (void*)icvDotProductShifted_16u64f_C1R;
23716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_16S] = (void*)icvDotProductShifted_16s64f_C1R;
23726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_32S] = 0;
23736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_32F] = (void*)icvDotProductShifted_32f64f_C1R;
23746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_64F] = (void*)icvDotProductShifted_64f_C1R;
23756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
23766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
23776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define ICV_EXT_PRODUCT_CASE( flavor, srctype, avgtype, load_macro )                    \
23786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                                              \
23796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvExtProductShifted_##flavor##_C1R( const srctype* vec, int vecstep,                   \
23806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                     const avgtype* avg, int avgstep,                   \
23816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                     avgtype* dst, int dststep,                         \
23826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                     CvSize size, avgtype* tempbuf )                    \
23836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                                       \
23846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int x, y, dstsize = size.width * size.height;                                       \
23856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                        \
23866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    vecstep /= sizeof(vec[0]); avgstep /= sizeof(avg[0]);                               \
23876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( y = 0; y < size.height; y++, vec += vecstep, avg += avgstep )                  \
23886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( x = 0; x < size.width; x++ )                                               \
23896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            *tempbuf++ = load_macro(vec[x]) - avg[x];                                   \
23906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tempbuf -= dstsize;                                                                 \
23916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                        \
23926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    dststep /= sizeof(dst[0]);                                                          \
23936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( y = 0; y < dstsize; y++, dst += dststep )                                      \
23946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                                   \
23956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        double ty = tempbuf[y];                                                         \
23966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( x = 0; x <= y - 3; x += 4 )                                                \
23976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                               \
23986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            double t0 = dst[x] + ty*tempbuf[x];                                         \
23996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            double t1 = dst[x+1] + ty*tempbuf[x+1];                                     \
24006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dst[x] = (avgtype)t0;                                                       \
24016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dst[x+1] = (avgtype)t1;                                                     \
24026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            t0 = dst[x+2] + ty*tempbuf[x+2];                                            \
24036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            t1 = dst[x+3] + ty*tempbuf[x+3];                                            \
24046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dst[x+2] = (avgtype)t0;                                                     \
24056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dst[x+3] = (avgtype)t1;                                                     \
24066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                               \
24076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( ; x <= y; x++ )                                                            \
24086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dst[x] = (avgtype)(dst[x] + ty*tempbuf[x]);                                 \
24096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                                   \
24106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                        \
24116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_OK;                                                                       \
24126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
24136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_EXT_PRODUCT_CASE( 8u32f, uchar, float, CV_8TO32F )
24156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_EXT_PRODUCT_CASE( 8u64f, uchar, double, CV_8TO32F )
24166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_EXT_PRODUCT_CASE( 16u32f, ushort, float, CV_NOP )
24176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_EXT_PRODUCT_CASE( 16u64f, ushort, double, CV_NOP )
24186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_EXT_PRODUCT_CASE( 16s32f, short, float, CV_NOP )
24196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_EXT_PRODUCT_CASE( 16s64f, short, double, CV_NOP )
24206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_EXT_PRODUCT_CASE( 32f, float, float, CV_NOP )
24216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_EXT_PRODUCT_CASE( 32f64f, float, double, CV_NOP )
24226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_EXT_PRODUCT_CASE( 64f, double, double, CV_NOP )
24236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic void  icvInitExtProductShiftedTable( CvFuncTable* tabfl, CvFuncTable* tabdb )
24266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
24276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_8U] = (void*)icvExtProductShifted_8u32f_C1R;
24286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_8S] = 0;
24296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_16U] = (void*)icvExtProductShifted_16u32f_C1R;
24306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_16S] = (void*)icvExtProductShifted_16s32f_C1R;
24316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_32S] = 0;
24326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_32F] = (void*)icvExtProductShifted_32f_C1R;
24336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabfl->fn_2d[CV_64F] = 0;
24346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_8U] = (void*)icvExtProductShifted_8u64f_C1R;
24366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_8S] = 0;
24376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_16U] = (void*)icvExtProductShifted_16u64f_C1R;
24386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_16S] = (void*)icvExtProductShifted_16s64f_C1R;
24396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_32S] = 0;
24406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_32F] = (void*)icvExtProductShifted_32f64f_C1R;
24416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tabdb->fn_2d[CV_64F] = (void*)icvExtProductShifted_64f_C1R;
24426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
24436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renntypedef struct vec_data
24466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
24476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    void* ptr;
24486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int step;
24496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
24506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennvec_data;
24516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennCV_IMPL void
24536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RenncvCalcCovarMatrix( const CvArr** vecarr, int count,
24546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                   CvArr* covarr, CvArr* avgarr, int flags )
24556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
24566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static CvFuncTable dot_tab[2];
24576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static CvFuncTable ext_tab[2];
24586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static int inittab = 0;
24596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    vec_data* vecdata = 0;
24606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat *tempvec = 0;
24616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CV_FUNCNAME( "cvCalcCovarMatrix" );
24636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __BEGIN__;
24656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat covstub, *cov = (CvMat*)covarr;
24676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat avgstub, *avg = (CvMat*)avgarr;
24686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat vecstub0, *vecmat = 0;
24696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvSize srcsize, contsize;
24706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int srctype = 0, dsttype = 0;
24716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int i, j;
24726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int cont_flag, vec_delta = 0, vec_step = 0;
24736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int is_covar_normal = (flags & CV_COVAR_NORMAL) != 0;
24746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    double scale;
24756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !inittab )
24776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
24786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        icvInitDotProductShiftedTable( dot_tab + 0, dot_tab + 1 );
24796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        icvInitExtProductShiftedTable( ext_tab + 0, ext_tab + 1 );
24806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        inittab = 1;
24816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
24826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !vecarr )
24846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsNullPtr, "NULL vec pointer" );
24856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CV_CALL( cov = cvGetMat( cov, &covstub ));
24876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CV_CALL( avg = cvGetMat( avg, &avgstub ));
24886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_ARE_TYPES_EQ( cov, avg ))
24906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedFormats,
24916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        "Covariation matrix and average vector should have the same types" );
24926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    dsttype = CV_MAT_TYPE( cov->type );
24946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( dsttype != CV_32FC1 && dsttype != CV_64FC1 )
24956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnsupportedFormat, "Covariation matrix must be 32fC1 or 64fC1" );
24966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
24976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( cov->rows != cov->cols )
24986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsBadSize, "Covariation matrix must be square" );
24996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    srcsize = cvGetMatSize( avg );
25016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    contsize.width = srcsize.width * srcsize.height;
25026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    contsize.height = 1;
25036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    cont_flag = avg->type;
25046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( flags & (CV_COVAR_ROWS|CV_COVAR_COLS) )
25066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
25076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( vecmat = cvGetMat( vecarr[0], &vecstub0 ));
25086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        srctype = CV_MAT_TYPE(vecmat->type);
25096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( flags & CV_COVAR_COLS )
25106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
25116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            count = vecmat->cols;
25126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( avg->cols != 1 || avg->rows != vecmat->rows )
25136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CV_ERROR( CV_StsUnmatchedSizes,
25146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                "The number of input vectors does not match to avg vector size" );
25156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            cont_flag = 0;
25166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            vec_delta = CV_ELEM_SIZE(vecmat->type);
25176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            vec_step = vecmat->step;
25186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
25196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        else
25206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
25216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            count = vecmat->rows;
25226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( avg->rows != 1 || avg->cols != vecmat->cols )
25236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CV_ERROR( CV_StsUnmatchedSizes,
25246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                "The number of input vectors does not match to avg vector size" );
25256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            vec_delta = vecmat->step;
25266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            vec_step = CV_STUB_STEP;
25276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
25286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !(flags & CV_COVAR_USE_AVG) )
25306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_CALL( cvReduce( vecmat, avg, -1, CV_REDUCE_AVG ));
25316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        scale = !(flags & CV_COVAR_SCALE) ? 1. : 1./count;
25336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        cvMulTransposed( vecmat, cov, ((flags & CV_COVAR_ROWS)!=0) ^ ((flags & CV_COVAR_NORMAL)==0), avg, scale );
25356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        EXIT;
25366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
25376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    scale = !(flags & CV_COVAR_SCALE) ? 1. : 1./count;
25396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( is_covar_normal )
25416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
25426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( count <= 0 )
25436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsBadSize,
25446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            "The number of vectors is zero or negative" );
25456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( cov->rows != contsize.width )
25466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnmatchedSizes,
25476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            "The size of input vectors does not match with the size of covariation matrix" );
25486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( tempvec = cvCreateMat( avg->rows, avg->cols, dsttype ));
25506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
25516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else if( count != cov->rows )
25526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedSizes,
25536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        "The vector count and covariance matrix size do not match" );
25546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !(flags & (CV_COVAR_ROWS|CV_COVAR_COLS)) )
25566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
25576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !(flags & CV_COVAR_USE_AVG) )
25586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            cvZero( avg );
25596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( vecdata = (vec_data*)cvAlloc( count*sizeof(vecdata[0])));
25616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < count; i++ )
25636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
25646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CvMat vecstub, *vec = (CvMat*)vecarr[i];
25656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CvMat* temp;
25666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( !CV_IS_MAT(vec) )
25686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CV_CALL( vec = cvGetMat( vec, &vecstub ));
25696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( !CV_ARE_SIZES_EQ( vec, avg ))
25716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CV_ERROR( CV_StsUnmatchedSizes,
25726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                "All input vectors and average vector must have the same size" );
25736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            vecdata[i].ptr = vec->data.ptr;
25756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            vecdata[i].step = vec->step;
25766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            cont_flag &= vec->type;
25776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            temp = vec;
25786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( i == 0 )
25796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
25806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                srctype = CV_MAT_TYPE( vec->type );
25816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( CV_MAT_CN( srctype ) != 1 )
25826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    CV_ERROR( CV_BadNumChannels, "All vectors must have a single channel" );
25836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( srctype != dsttype && !tempvec && !(flags & CV_COVAR_USE_AVG))
25846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    CV_CALL( tempvec = cvCreateMat( vec->rows, vec->cols, dsttype ));
25856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
25866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else if( CV_MAT_TYPE(vec->type) != srctype )
25876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CV_ERROR( CV_StsUnmatchedFormats,
25886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                "All input vectors must have the same type" );
25896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
25906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( !(flags & CV_COVAR_USE_AVG) )
25916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
25926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( tempvec )
25936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
25946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    temp = tempvec;
25956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    cvConvert( vec, temp );
25966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
25976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                cvAdd( temp, avg, avg );
25986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
25996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
26006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
26016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !(flags & CV_COVAR_USE_AVG) )
26026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            cvScale( avg, avg, 1./count );
26036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
26046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
26056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    cont_flag = CV_IS_MAT_CONT( cont_flag );
26066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( cont_flag )
26076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        srcsize = contsize;
26086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
26096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !is_covar_normal )
26106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
26116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CvFunc2D_3A1P dot_func =
26126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (CvFunc2D_3A1P)dot_tab[dsttype == CV_64FC1].fn_2d[CV_MAT_DEPTH(srctype)];
26136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
26146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !dot_func )
26156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnsupportedFormat,
26166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            "The format of input vectors is not supported" );
26176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
26186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < count; i++ )
26196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
26206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int a, b, delta;
26216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( !(i & 1) )
26226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                a = 0, b = i+1, delta = 1;
26236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else
26246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                a = i, b = -1, delta = -1;
26256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
26266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = a; j != b; j += delta )
26276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
26286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double result = 0;
26296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                void *v_i, *v_j;
26306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                int step_i, step_j;
26316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
26326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( !vecmat )
26336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
26346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    v_i = vecdata[i].ptr;
26356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    v_j = vecdata[j].ptr;
26366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    step_i = vecdata[i].step;
26376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    step_j = vecdata[j].step;
26386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
26396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else
26406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
26416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    v_i = vecmat->data.ptr + vec_delta*i;
26426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    v_j = vecmat->data.ptr + vec_delta*j;
26436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    step_i = step_j = vec_step;
26446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
26456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
26466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                dot_func( v_i, step_i, v_j, step_j, avg->data.ptr, avg->step, srcsize, &result );
26476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
26486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( dsttype == CV_64FC1 )
26496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
26506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    ((double*)(cov->data.ptr + i*cov->step))[j] =
26516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    ((double*)(cov->data.ptr + j*cov->step))[i] = result*scale;
26526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
26536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                else
26546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
26556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    ((float*)(cov->data.ptr + i*cov->step))[j] =
26566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    ((float*)(cov->data.ptr + j*cov->step))[i] = (float)(result*scale);
26576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
26586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
26596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
26606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
26616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else
26626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
26636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        uchar* cov_ptr = cov->data.ptr;
26646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int cov_step = cov->step;
26656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int cov_size = cov->rows;
26666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CvFunc2D_3A1P ext_func =
26676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (CvFunc2D_3A1P)ext_tab[dsttype == CV_64FC1].fn_2d[CV_MAT_DEPTH(srctype)];
26686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !ext_func )
26696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnsupportedFormat,
26706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            "The format of input vectors is not supported" );
26716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
26726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        cvZero( cov );
26736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
26746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < count; i++ )
26756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
26766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            void* v;
26776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int vstep;
26786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( !vecmat )
26796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
26806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                v = vecdata[i].ptr;
26816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                vstep = vecdata[i].step;
26826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
26836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else
26846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
26856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                v = vecmat->data.ptr + vec_delta*i;
26866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                vstep = vec_step;
26876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
26886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
26896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            ext_func( v, vstep, avg->data.ptr, avg->step,
26906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                      cov_ptr, cov_step, srcsize, tempvec->data.ptr );
26916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
26926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
26936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( dsttype == CV_64FC1 )
26946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( i = 0; i < cov_size; i++ )
26956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( j = 0; j <= i; j++ )
26966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
26976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    double* cov1 = ((double*)(cov_ptr + i*cov_step)) + j;
26986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    double* cov2 = ((double*)(cov_ptr + j*cov_step)) + i;
26996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    if( flags & CV_COVAR_SCALE )
27016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        *cov1 = *cov2 = *cov1*scale;
27026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    else
27036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        *cov2 = *cov1;
27046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
27056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        else
27066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( i = 0; i < cov_size; i++ )
27076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( j = 0; j <= i; j++ )
27086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {
27096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    float* cov1 = ((float*)(cov_ptr + i*cov_step)) + j;
27106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    float* cov2 = ((float*)(cov_ptr + j*cov_step)) + i;
27116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    if( flags & CV_COVAR_SCALE )
27136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        *cov1 = *cov2 = (float)(*cov1*scale);
27146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    else
27156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        *cov2 = *cov1;
27166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }
27176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
27186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __END__;
27206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    cvFree( &vecdata );
27226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    cvReleaseMat( &tempvec );
27236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
27246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn/****************************************************************************************\
27266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn*                                        cvMahalanobis                                   *
27276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn\****************************************************************************************/
27286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define ICV_MAHALANOBIS( flavor, arrtype )                                              \
27306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                                              \
27316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvMahalanobis_##flavor##_C1R( const arrtype* mat, int matstep,                         \
27326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                               const arrtype* vec, int len, double* _result )           \
27336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                                       \
27346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int i, j;                                                                           \
27356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    double result = 0;                                                                  \
27366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                        \
27376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    matstep /= sizeof(mat[0]);                                                          \
27386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 0; i < len; i++, mat += matstep )                                          \
27396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                                   \
27406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        double row_sum = 0;                                                             \
27416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( j = 0; j <= len - 4; j += 4 )                                              \
27426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            row_sum += vec[j]*mat[j] + vec[j+1]*mat[j+1] +                              \
27436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       vec[j+2]*mat[j+2] + vec[j+3]*mat[j+3];                           \
27446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( ; j < len; j++ )                                                           \
27456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            row_sum += vec[j]*mat[j];                                                   \
27466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        result += row_sum * vec[i];                                                     \
27476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                                   \
27486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    *_result = result;                                                                  \
27496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                        \
27506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_OK;                                                                       \
27516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
27526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_MAHALANOBIS( 32f, float )
27546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_MAHALANOBIS( 64f, double )
27556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic void  icvInitMahalanobisTable( CvFuncTable* tab )
27576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
27586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tab->fn_2d[CV_32F] = (void*)icvMahalanobis_32f_C1R;
27596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    tab->fn_2d[CV_64F] = (void*)icvMahalanobis_64f_C1R;
27606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
27616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renntypedef CvStatus (CV_STDCALL * CvMahalanobisFunc)( const void* mat, int matstep,
27636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                   const void* vec, int len, double* _result );
27646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennCV_IMPL double
27666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RenncvMahalanobis( const CvArr* srcAarr, const CvArr* srcBarr, CvArr* matarr )
27676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
27686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static CvFuncTable mahal_tab;
27696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static int inittab = 0;
27706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    uchar* buffer = 0;
27716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int local_alloc = 0;
27726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    double dist = 0;
27736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CV_FUNCNAME( "cvMahalanobis" );
27756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __BEGIN__;
27776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int buf_size, elem_size, len;
27796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat stubA, *srcA = (CvMat*)srcAarr;
27806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat stubB, *srcB = (CvMat*)srcBarr;
27816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat stub, *mat = (CvMat*)matarr;
27826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat temp;
27836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMahalanobisFunc func;
27846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !inittab )
27866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
27876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        icvInitMahalanobisTable( &mahal_tab );
27886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        inittab = 1;
27896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
27906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT(srcA) )
27926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( srcA = cvGetMat( srcA, &stubA ));
27936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT(srcB) )
27956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( srcB = cvGetMat( srcB, &stubB ));
27966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
27976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT(mat) )
27986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( mat = cvGetMat( mat, &stub ));
27996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( srcA->rows != 1 && srcA->cols != 1 )
28016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsBadSize, "Input matrices must be 1-d vectors" );
28026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    len = srcA->rows + srcA->cols - 1;
28046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_ARE_SIZES_EQ(srcA,srcB) )
28066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedSizes, "Input vectors have different sizes" );
28076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( mat->rows != len || mat->cols != len )
28096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedSizes, "Input vectors and covariation matrix have different sizes" );
28106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    func = (CvMahalanobisFunc)mahal_tab.fn_2d[CV_MAT_DEPTH(srcA->type)];
28126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( CV_MAT_CN(srcA->type) > 1 || !func )
28146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnsupportedFormat,
28156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        "Only single-channel floating-point vectors are supported" );
28166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_ARE_TYPES_EQ(srcA,srcB) || !CV_ARE_TYPES_EQ(srcA,mat) )
28186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedSizes, "Input vectors have different sizes" );
28196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    elem_size = CV_ELEM_SIZE(srcA->type);
28216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    buf_size = len*elem_size;
28226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( buf_size <= CV_MAX_LOCAL_SIZE )
28246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
28256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        buffer = (uchar*)cvStackAlloc( buf_size );
28266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        local_alloc = 1;
28276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
28286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else
28296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
28306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
28316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
28326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    temp = cvMat( srcA->rows, srcA->cols, srcA->type, buffer );
28346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CV_CALL( cvSub( srcA, srcB, &temp ));
28356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    IPPI_CALL( func( mat->data.ptr, mat->step, temp.data.ptr, len, &dist ));
28376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    dist = sqrt(dist);
28386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __END__;
28406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( buffer && !local_alloc )
28426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        cvFree( &buffer );
28436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return  dist;
28456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
28466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn/****************************************************************************************\
28496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn*                                        cvMulTransposed                                 *
28506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn\****************************************************************************************/
28516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
28526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define ICV_DEF_MULTRANS_R_FUNC( flavor, srctype, dsttype, load_macro )         \
28536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                                      \
28546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvMulTransposedR_##flavor( const srctype* src, int srcstep,                    \
28556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       dsttype* dst, int dststep,                               \
28566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       const dsttype* delta, int deltastep,                     \
28576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                       CvSize size, int delta_cols, double scale )              \
28586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                               \
28596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int i, j, k;                                                                \
28606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    dsttype* tdst = dst;                                                        \
28616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    dsttype* col_buf = 0;                                                       \
28626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    dsttype* delta_buf = 0;                                                     \
28636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int local_alloc = 0;                                                        \
28646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int buf_size = size.height*sizeof(dsttype);                                 \
28656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
28666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( delta && delta_cols < size.width )                                      \
28676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                           \
28686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        assert( delta_cols == 1 );                                              \
28696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        buf_size += 4*buf_size;                                                 \
28706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                           \
28716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
28726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( buf_size <= CV_MAX_LOCAL_SIZE )                                         \
28736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                           \
28746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        col_buf = (dsttype*)cvStackAlloc( buf_size );                           \
28756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        local_alloc = 1;                                                        \
28766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                           \
28776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else                                                                        \
28786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                           \
28796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        col_buf = (dsttype*)cvAlloc( buf_size );                                \
28806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !col_buf )                                                          \
28816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            return CV_OUTOFMEM_ERR;                                             \
28826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                           \
28836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
28846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    srcstep /= sizeof(src[0]); dststep /= sizeof(dst[0]);                       \
28856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    deltastep /= sizeof(delta[0]);                                              \
28866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
28876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( delta && delta_cols < size.width )                                      \
28886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                           \
28896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        delta_buf = col_buf + size.height;                                      \
28906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < size.height; i++ )                                      \
28916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            delta_buf[i*4] = delta_buf[i*4+1] =                                 \
28926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                delta_buf[i*4+2] = delta_buf[i*4+3] = delta[i*deltastep];       \
28936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        delta = delta_buf;                                                      \
28946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        deltastep = deltastep ? 4 : 0;                                          \
28956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                           \
28966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
28976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !delta )                                                                \
28986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < size.width; i++, tdst += dststep )                      \
28996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                       \
29006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( k = 0; k < size.height; k++ )                                  \
29016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                col_buf[k] = src[k*srcstep+i];                                  \
29026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = i; j <= size.width - 4; j += 4 )                           \
29046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                                   \
29056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double s0 = 0, s1 = 0, s2 = 0, s3 = 0;                          \
29066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const srctype *tsrc = src + j;                                  \
29076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < size.height; k++, tsrc += srcstep )             \
29096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                               \
29106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    double a = col_buf[k];                                      \
29116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 += a * load_macro(tsrc[0]);                              \
29126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s1 += a * load_macro(tsrc[1]);                              \
29136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s2 += a * load_macro(tsrc[2]);                              \
29146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s3 += a * load_macro(tsrc[3]);                              \
29156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                               \
29166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                tdst[j] = (dsttype)(s0*scale);                                  \
29186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                tdst[j+1] = (dsttype)(s1*scale);                                \
29196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                tdst[j+2] = (dsttype)(s2*scale);                                \
29206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                tdst[j+3] = (dsttype)(s3*scale);                                \
29216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                                   \
29226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( ; j < size.width; j++ )                                        \
29246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                                   \
29256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double s0 = 0;                                                  \
29266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const srctype *tsrc = src + j;                                  \
29276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < size.height; k++, tsrc += srcstep )             \
29296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 += col_buf[k] * tsrc[0];                                 \
29306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                tdst[j] = (dsttype)(s0*scale);                                  \
29326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                                   \
29336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                       \
29346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else                                                                        \
29356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < size.width; i++, tdst += dststep )                      \
29366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                       \
29376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( !delta_buf )                                                    \
29386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < size.height; k++ )                              \
29396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    col_buf[k] = load_macro(src[k*srcstep+i]) - delta[k*deltastep+i]; \
29406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else                                                                \
29416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < size.height; k++ )                              \
29426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    col_buf[k] = load_macro(src[k*srcstep+i]) - delta_buf[k*deltastep]; \
29436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = i; j <= size.width - 4; j += 4 )                           \
29456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                                   \
29466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double s0 = 0, s1 = 0, s2 = 0, s3 = 0;                          \
29476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const srctype *tsrc = src + j;                                  \
29486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const dsttype *d = delta_buf ? delta_buf : delta + j;           \
29496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < size.height; k++, tsrc+=srcstep, d+=deltastep ) \
29516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                               \
29526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    double a = col_buf[k];                                      \
29536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 += a * (load_macro(tsrc[0]) - d[0]);                     \
29546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s1 += a * (load_macro(tsrc[1]) - d[1]);                     \
29556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s2 += a * (load_macro(tsrc[2]) - d[2]);                     \
29566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s3 += a * (load_macro(tsrc[3]) - d[3]);                     \
29576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                               \
29586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                tdst[j] = (dsttype)(s0*scale);                                  \
29606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                tdst[j+1] = (dsttype)(s1*scale);                                \
29616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                tdst[j+2] = (dsttype)(s2*scale);                                \
29626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                tdst[j+3] = (dsttype)(s3*scale);                                \
29636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                                   \
29646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( ; j < size.width; j++ )                                        \
29666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                                   \
29676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double s0 = 0;                                                  \
29686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const srctype *tsrc = src + j;                                  \
29696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const dsttype *d = delta_buf ? delta_buf : delta + j;           \
29706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < size.height; k++, tsrc+=srcstep, d+=deltastep ) \
29726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s0 += col_buf[k] * (load_macro(tsrc[0]) - d[0]);            \
29736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                tdst[j] = (dsttype)(s0*scale);                                  \
29756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                                   \
29766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                       \
29776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    /* fill the lower part of the destination matrix */                         \
29796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( i = 1; i < size.width; i++ )                                           \
29806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( j = 0; j < i; j++ )                                                \
29816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dst[dststep*i + j] = dst[dststep*j + i];                            \
29826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( col_buf && !local_alloc )                                               \
29846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        cvFree( &col_buf );                                                     \
29856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
29866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_NO_ERR;                                                           \
29876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
29886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
29896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
29906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define ICV_DEF_MULTRANS_L_FUNC( flavor, srctype, dsttype, load_macro )         \
29916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                                      \
29926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvMulTransposedL_##flavor( const srctype* src, int srcstep,                    \
29936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            dsttype* dst, int dststep,                          \
29946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            dsttype* delta, int deltastep,                      \
29956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                            CvSize size, int delta_cols, double scale )         \
29966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                               \
29976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int i, j, k;                                                                \
29986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    dsttype* tdst = dst;                                                        \
29996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
30006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    srcstep /= sizeof(src[0]); dststep /= sizeof(dst[0]);                       \
30016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    deltastep /= sizeof(delta[0]);                                              \
30026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
30036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !delta )                                                                \
30046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < size.height; i++, tdst += dststep )                     \
30056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = i; j < size.height; j++ )                                  \
30066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                                   \
30076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double s = 0;                                                   \
30086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const srctype *tsrc1 = src + i*srcstep;                         \
30096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const srctype *tsrc2 = src + j*srcstep;                         \
30106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
30116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k <= size.width - 4; k += 4 )                       \
30126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s += tsrc1[k]*tsrc2[k] + tsrc1[k+1]*tsrc2[k+1] +            \
30136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                         tsrc1[k+2]*tsrc2[k+2] + tsrc1[k+3]*tsrc2[k+3];         \
30146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( ; k < size.width; k++ )                                    \
30156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s += tsrc1[k] * tsrc2[k];                                   \
30166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                tdst[j] = (dsttype)(s*scale);                                   \
30176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                                   \
30186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else                                                                        \
30196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                           \
30206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dsttype* row_buf = 0;                                                   \
30216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int local_alloc = 0;                                                    \
30226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int buf_size = size.width*sizeof(dsttype);                              \
30236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        dsttype delta_buf[4];                                                   \
30246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int delta_shift = delta_cols == size.width ? 4 : 0;                     \
30256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
30266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( buf_size <= CV_MAX_LOCAL_SIZE )                                     \
30276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                       \
30286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            row_buf = (dsttype*)cvStackAlloc( buf_size );                       \
30296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            local_alloc = 1;                                                    \
30306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                       \
30316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        else                                                                    \
30326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                       \
30336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            row_buf = (dsttype*)cvAlloc( buf_size );                            \
30346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( !row_buf )                                                      \
30356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                return CV_OUTOFMEM_ERR;                                         \
30366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                       \
30376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
30386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i < size.height; i++, tdst += dststep )                     \
30396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                                       \
30406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            const srctype *tsrc1 = src + i*srcstep;                             \
30416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            const dsttype *tdelta1 = delta + i*deltastep;                       \
30426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
30436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( delta_cols < size.width )                                       \
30446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < size.width; k++ )                               \
30456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    row_buf[k] = tsrc1[k] - tdelta1[0];                         \
30466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            else                                                                \
30476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k < size.width; k++ )                               \
30486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    row_buf[k] = tsrc1[k] - tdelta1[k];                         \
30496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
30506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            for( j = i; j < size.height; j++ )                                  \
30516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {                                                                   \
30526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double s = 0;                                                   \
30536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const srctype *tsrc2 = src + j*srcstep;                         \
30546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                const dsttype *tdelta2 = delta + j*deltastep;                   \
30556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                if( delta_cols < size.width )                                   \
30566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                {                                                               \
30576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    delta_buf[0] = delta_buf[1] =                               \
30586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                        delta_buf[2] = delta_buf[3] = tdelta2[0];               \
30596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    tdelta2 = delta_buf;                                        \
30606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                }                                                               \
30616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( k = 0; k <= size.width-4; k += 4, tdelta2 += delta_shift ) \
30626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s += row_buf[k]*(load_macro(tsrc2[k]) - tdelta2[0]) +       \
30636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                         row_buf[k+1]*(load_macro(tsrc2[k+1]) - tdelta2[1]) +   \
30646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                         row_buf[k+2]*(load_macro(tsrc2[k+2]) - tdelta2[2]) +   \
30656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                         row_buf[k+3]*(load_macro(tsrc2[k+3]) - tdelta2[3]);    \
30666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                for( ; k < size.width; k++, tdelta2++ )                         \
30676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    s += row_buf[k]*(load_macro(tsrc2[k]) - tdelta2[0]);        \
30686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                tdst[j] = (dsttype)(s*scale);                                   \
30696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }                                                                   \
30706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                                       \
30716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
30726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( row_buf && !local_alloc )                                           \
30736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            cvFree( &row_buf );                                                 \
30746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                           \
30756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
30766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    /* fill the lower part of the destination matrix */                         \
30776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( j = 0; j < size.height - 1; j++ )                                      \
30786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = j; i < size.height; i++ )                                      \
30796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            dst[dststep*i + j] = dst[dststep*j + i];                            \
30806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                                \
30816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_NO_ERR;                                                           \
30826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
30836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
30846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
30856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_R_FUNC( 8u32f, uchar, float, CV_8TO32F )
30866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_R_FUNC( 8u64f, uchar, double, CV_8TO32F )
30876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_R_FUNC( 32f, float, float, CV_NOP )
30886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_R_FUNC( 32f64f, float, double, CV_NOP )
30896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_R_FUNC( 64f, double, double, CV_NOP )
30906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_R_FUNC( 16u32f, ushort, float, CV_NOP )
30916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_R_FUNC( 16u64f, ushort, double, CV_NOP )
30926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_R_FUNC( 16s32f, short, float, CV_NOP )
30936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_R_FUNC( 16s64f, short, double, CV_NOP )
30946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
30956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_L_FUNC( 8u32f, uchar, float, CV_8TO32F )
30966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_L_FUNC( 8u64f, uchar, double, CV_8TO32F )
30976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_L_FUNC( 32f, float, float, CV_NOP )
30986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_L_FUNC( 32f64f, float, double, CV_NOP )
30996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_L_FUNC( 64f, double, double, CV_NOP )
31006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_L_FUNC( 16u32f, ushort, float, CV_NOP )
31016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_L_FUNC( 16u64f, ushort, double, CV_NOP )
31026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_L_FUNC( 16s32f, short, float, CV_NOP )
31036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_MULTRANS_L_FUNC( 16s64f, short, double, CV_NOP )
31046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renntypedef CvStatus (CV_STDCALL * CvMulTransposedFunc)
31076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    ( const void* src, int srcstep,
31086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn      void* dst, int dststep, const void* delta,
31096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn      int deltastep, CvSize size, int delta_cols, double scale );
31106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennCV_IMPL void
31126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RenncvMulTransposed( const CvArr* srcarr, CvArr* dstarr,
31136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                 int order, const CvArr* deltaarr, double scale )
31146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
31156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    const int gemm_level = 100; // boundary above which GEMM is faster.
31166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat* src2 = 0;
31176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CV_FUNCNAME( "cvMulTransposed" );
31196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __BEGIN__;
31216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat sstub, *src = (CvMat*)srcarr;
31236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat dstub, *dst = (CvMat*)dstarr;
31246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat deltastub, *delta = (CvMat*)deltaarr;
31256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int stype, dtype;
31266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT( src ))
31286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( src = cvGetMat( src, &sstub ));
31296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT( dst ))
31316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( dst = cvGetMat( dst, &dstub ));
31326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( delta )
31346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
31356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !CV_IS_MAT( delta ))
31366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_CALL( delta = cvGetMat( delta, &deltastub ));
31376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !CV_ARE_TYPES_EQ( dst, delta ))
31396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnmatchedFormats, "" );
31406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( (delta->rows != src->rows && delta->rows != 1) ||
31426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (delta->cols != src->cols && delta->cols != 1) )
31436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnmatchedSizes, "" );
31446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
31456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else
31466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
31476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        delta = &deltastub;
31486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        delta->data.ptr = 0;
31496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        delta->step = 0;
31506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        delta->rows = delta->cols = 0;
31516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
31526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    stype = CV_MAT_TYPE( src->type );
31546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    dtype = CV_MAT_TYPE( dst->type );
31556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( dst->rows != dst->cols )
31576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsBadSize, "The destination matrix must be square" );
31586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( (order != 0 && src->cols != dst->cols) ||
31606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        (order == 0 && src->rows != dst->rows))
31616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnmatchedSizes, "" );
31626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
31636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( src->data.ptr == dst->data.ptr || (stype == dtype &&
31646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        (dst->cols >= gemm_level && dst->rows >= gemm_level &&
31656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn         src->cols >= gemm_level && src->rows >= gemm_level)))
31666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
31676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( deltaarr )
31686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
31696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_CALL( src2 = cvCreateMat( src->rows, src->cols, src->type ));
31706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            cvRepeat( delta, src2 );
31716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            cvSub( src, src2, src2 );
31726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            src = src2;
31736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
31746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        cvGEMM( src, src, scale, 0, 0, dst, order == 0 ? CV_GEMM_B_T : CV_GEMM_A_T );
31756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
31766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else
31776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
31786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CvMulTransposedFunc func =
31796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            stype == CV_8U && dtype == CV_32F ?
31806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (order ? (CvMulTransposedFunc)icvMulTransposedR_8u32f :
31816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    (CvMulTransposedFunc)icvMulTransposedL_8u32f) :
31826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            stype == CV_8U && dtype == CV_64F ?
31836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (order ? (CvMulTransposedFunc)icvMulTransposedR_8u64f :
31846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    (CvMulTransposedFunc)icvMulTransposedL_8u64f) :
31856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            stype == CV_16U && dtype == CV_32F ?
31866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (order ? (CvMulTransposedFunc)icvMulTransposedR_16u32f :
31876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    (CvMulTransposedFunc)icvMulTransposedL_16u32f) :
31886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            stype == CV_16U && dtype == CV_64F ?
31896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (order ? (CvMulTransposedFunc)icvMulTransposedR_16u64f :
31906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    (CvMulTransposedFunc)icvMulTransposedL_16u64f) :
31916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            stype == CV_16S && dtype == CV_32F ?
31926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (order ? (CvMulTransposedFunc)icvMulTransposedR_16s32f :
31936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    (CvMulTransposedFunc)icvMulTransposedL_16s32f) :
31946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            stype == CV_16S && dtype == CV_64F ?
31956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (order ? (CvMulTransposedFunc)icvMulTransposedR_16s64f :
31966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    (CvMulTransposedFunc)icvMulTransposedL_16s64f) :
31976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            stype == CV_32F && dtype == CV_32F ?
31986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (order ? (CvMulTransposedFunc)icvMulTransposedR_32f :
31996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    (CvMulTransposedFunc)icvMulTransposedL_32f) :
32006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            stype == CV_32F && dtype == CV_64F ?
32016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (order ? (CvMulTransposedFunc)icvMulTransposedR_32f64f :
32026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    (CvMulTransposedFunc)icvMulTransposedL_32f64f) :
32036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            stype == CV_64F && dtype == CV_64F ?
32046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            (order ? (CvMulTransposedFunc)icvMulTransposedR_64f :
32056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    (CvMulTransposedFunc)icvMulTransposedL_64f) : 0;
32066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !func )
32086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnsupportedFormat, "" );
32096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        IPPI_CALL( func( src->data.ptr, src->step, dst->data.ptr, dst->step,
32116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                         delta->data.ptr, delta->step, cvGetMatSize( src ),
32126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                         delta->cols, scale ));
32136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
32146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __END__;
32166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( src2 )
32186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        cvReleaseMat( &src2 );
32196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
32206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn/****************************************************************************************\
32236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn*                                        cvDotProduct                                    *
32246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn\****************************************************************************************/
32256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define ICV_DEF_DOT_PROD_FUNC_2D( flavor, arrtype, temptype, sumtype )  \
32276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Rennstatic CvStatus CV_STDCALL                                              \
32286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennicvDotProduct_##flavor##_C1R( const arrtype* src1, int step1,           \
32296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                              const arrtype* src2, int step2,           \
32306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                              CvSize size, sumtype* _sum )              \
32316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{                                                                       \
32326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    sumtype sum = 0;                                                    \
32336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    step1 /= sizeof(src1[0]); step2 /= sizeof(src2[0]);                 \
32346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                        \
32356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    for( ; size.height--; src1 += step1, src2 += step2 )                \
32366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {                                                                   \
32376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int i;                                                          \
32386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                        \
32396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( i = 0; i <= size.width - 4; i += 4 )                       \
32406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                               \
32416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            temptype t0 = (temptype)src1[i]*src2[i];                    \
32426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            temptype t1 = (temptype)src1[i+1]*src2[i+1];                \
32436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            t0 += (temptype)src1[i+2]*src2[i+2];                        \
32446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            t1 += (temptype)src1[i+3]*src2[i+3];                        \
32456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            sum += t0 + t1;                                             \
32466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                               \
32476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                        \
32486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        for( ; i < size.width; i++ )                                    \
32496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {                                                               \
32506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            sum += (temptype)src1[i]*src2[i];                           \
32516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }                                                               \
32526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }                                                                   \
32536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                                                                        \
32546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    *_sum = sum;                                                        \
32556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return CV_OK;                                                       \
32566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
32576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DOT_PROD_FUNC_2D( 8u, uchar, int, int64 )
32606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DOT_PROD_FUNC_2D( 16u, ushort, int64, int64 )
32616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DOT_PROD_FUNC_2D( 16s, short, int64, int64 )
32626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DOT_PROD_FUNC_2D( 32s, int, double, double )
32636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DOT_PROD_FUNC_2D( 32f, float, double, double )
32646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennICV_DEF_DOT_PROD_FUNC_2D( 64f, double, double, double )
32656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn#define icvDotProduct_8s_C1R 0
32676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennCV_DEF_INIT_FUNC_TAB_2D( DotProduct, C1R )
32696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RennCV_IMPL double
32716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius RenncvDotProduct( const CvArr* srcAarr, const CvArr* srcBarr )
32726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn{
32736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static CvFuncTable tab_2d;
32746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    static int inittab = 0;
32756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    Cv64suf result;
32776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    result.f = 0;
32786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32796acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CV_FUNCNAME( "cvDotProduct" );
32806acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32816acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __BEGIN__;
32826acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32836acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat stubA, *srcA = (CvMat*)srcAarr;
32846acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvMat stubB, *srcB = (CvMat*)srcBarr;
32856acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvSize size;
32866acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    int type, depth;
32876acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    CvFunc2D_2A1P func;
32886acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32896acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !inittab )
32906acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
32916acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        icvInitDotProductC1RTable( &tab_2d );
32926acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        inittab = 1;
32936acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
32946acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
32956acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !CV_IS_MAT( srcA ))
32966acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
32976acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        int coi = 0;
32986acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_CALL( srcA = cvGetMat( srcA, &stubA, &coi ));
32996acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( coi != 0 )
33006acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_BadCOI, "coi is not supported" );
33016acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
33026acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33036acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( srcBarr == srcAarr )
33046acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        srcB = srcA;
33056acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    else
33066acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
33076acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !CV_IS_MAT( srcB ))
33086acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
33096acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            int coi = 0;
33106acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_CALL( srcB = cvGetMat( srcB, &stubB, &coi ));
33116acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33126acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( coi != 0 )
33136acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                CV_ERROR( CV_BadCOI, "coi is not supported" );
33146acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
33156acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33166acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !CV_ARE_TYPES_EQ( srcA, srcB ))
33176acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnmatchedFormats, "" );
33186acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33196acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( !CV_ARE_SIZES_EQ( srcA, srcB ))
33206acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            CV_ERROR( CV_StsUnmatchedSizes, "" );
33216acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
33226acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33236acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    type = CV_MAT_TYPE( srcA->type );
33246acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    size = cvGetMatSize( srcA );
33256acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33266acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    size.width *= CV_MAT_CN( type );
33276acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    depth = CV_MAT_DEPTH( type );
33286acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33296acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( CV_IS_MAT_CONT( srcA->type & srcB->type ))
33306acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    {
33316acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        size.width *= size.height;
33326acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33336acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        if( size.width <= CV_MAX_INLINE_MAT_OP_SIZE )
33346acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        {
33356acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( depth == CV_32F )
33366acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
33376acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                float* mA = srcA->data.fl;
33386acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                float* mB = srcB->data.fl;
33396acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double sum = 0;
33406acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                do
33416acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    sum += (double)mA[size.width - 1]*mB[size.width - 1];
33426acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                while( --size.width );
33436acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                result.f = sum;
33446acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                EXIT;
33456acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
33466acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33476acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            if( depth == CV_64F )
33486acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            {
33496acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double* mA = srcA->data.db;
33506acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double* mB = srcB->data.db;
33516acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                double sum = 0;
33526acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                do
33536acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                    sum += mA[size.width - 1]*mB[size.width - 1];
33546acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                while( --size.width );
33556acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                result.f = sum;
33566acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                EXIT;
33576acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn            }
33586acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        }
33596acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        size.height = 1;
33606acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    }
33616acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33626acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    func = (CvFunc2D_2A1P)(tab_2d.fn_2d[depth]);
33636acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( !func )
33646acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        CV_ERROR( CV_StsUnsupportedFormat, "" );
33656acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33666acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    IPPI_CALL( func( srcA->data.ptr, srcA->step,
33676acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                     srcB->data.ptr, srcB->step,
33686acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn                     size, &result ));
33696acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33706acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    if( depth < CV_32S )
33716acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn        result.f = (double)result.i;
33726acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33736acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    __END__;
33746acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33756acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn    return result.f;
33766acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn}
33776acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn
33786acb9a7ea3d7564944e12cbc73a857b88c1301eeMarius Renn/* End of file. */
3379