1793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler/*M///////////////////////////////////////////////////////////////////////////////////////
2793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
3793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
5793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  By downloading, copying, installing or using the software you agree to this license.
6793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  If you do not agree to this license, do not download, install,
7793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//  copy or use the software.
8793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
9793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
10793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//                        Intel License Agreement
11793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//                For Open Source Computer Vision Library
12793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
13793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Copyright (C) 2000, Intel Corporation, all rights reserved.
14793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Third party copyrights are property of their respective icvers.
15793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
16793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// Redistribution and use in source and binary forms, with or without modification,
17793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// are permitted provided that the following conditions are met:
18793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
19793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//   * Redistribution's of source code must retain the above copyright notice,
20793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     this list of conditions and the following disclaimer.
21793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
22793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//   * Redistribution's in binary form must reproduce the above copyright notice,
23793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     this list of conditions and the following disclaimer in the documentation
24793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     and/or other materials provided with the distribution.
25793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
26793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//   * The name of Intel Corporation may not be used to endorse or promote products
27793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//     derived from this software without specific prior written permission.
28793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
29793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// This software is provided by the copyright holders and contributors "as is" and
30793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// any express or implied warranties, including, but not limited to, the implied
31793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// warranties of merchantability and fitness for a particular purpose are disclaimed.
32793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// In no event shall the Intel Corporation or contributors be liable for any direct,
33793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// indirect, incidental, special, exemplary, or consequential damages
34793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// (including, but not limited to, procurement of substitute goods or services;
35793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// loss of use, data, or profits; or business interruption) however caused
36793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// and on any theory of liability, whether in contract, strict liability,
37793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// or tort (including negligence or otherwise) arising in any way out of
38793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler// the use of this software, even if advised of the possibility of such damage.
39793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//
40793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler//M*/
41793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
42793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#ifndef __OPENCV_FAST_NLMEANS_DENOISING_INVOKER_HPP__
43793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#define __OPENCV_FAST_NLMEANS_DENOISING_INVOKER_HPP__
44793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
45793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "precomp.hpp"
46793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include <limits>
47793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
48793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "fast_nlmeans_denoising_invoker_commons.hpp"
49793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#include "arrays.hpp"
50793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
51793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerusing namespace cv;
52793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
53793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslertemplate <typename T, typename IT, typename UIT, typename D, typename WT>
54793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerstruct FastNlMeansDenoisingInvoker :
55793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        public ParallelLoopBody
56793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
57793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerpublic:
58793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
59793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int template_window_size, int search_window_size, const float *h);
60793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
61793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    void operator() (const Range& range) const;
62793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
63793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerprivate:
64793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    void operator= (const FastNlMeansDenoisingInvoker&);
65793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
66793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    const Mat& src_;
67793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    Mat& dst_;
68793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
69793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    Mat extended_src_;
70793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int border_size_;
71793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
72793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int template_window_size_;
73793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int search_window_size_;
74793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
75793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int template_window_half_size_;
76793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int search_window_half_size_;
77793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
78793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    typename pixelInfo<WT>::sampleType fixed_point_mult_;
79793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int almost_template_window_size_sq_bin_shift_;
80793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    std::vector<WT> almost_dist2weight_;
81793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
82793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    void calcDistSumsForFirstElementInRow(
83793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int i, Array2d<int>& dist_sums,
84793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        Array3d<int>& col_dist_sums,
85793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        Array3d<int>& up_col_dist_sums) const;
86793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
87793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    void calcDistSumsForElementInFirstRow(
88793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        int i, int j, int first_col_num,
89793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        Array2d<int>& dist_sums,
90793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        Array3d<int>& col_dist_sums,
91793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        Array3d<int>& up_col_dist_sums) const;
92793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler};
93793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
94793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerinline int getNearestPowerOf2(int value)
95793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
96793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int p = 0;
97793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    while( 1 << p < value)
98793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        ++p;
99793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    return p;
100793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
101793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
102793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslertemplate <typename T, typename IT, typename UIT, typename D, typename WT>
103793ee12c6df9cad3806238d32528c49a3ff9331dNoah PreslerFastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansDenoisingInvoker(
104793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    const Mat& src, Mat& dst,
105793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int template_window_size,
106793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int search_window_size,
107793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    const float *h) :
108793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    src_(src), dst_(dst)
109793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
110793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    CV_Assert(src.channels() == pixelInfo<T>::channels);
111793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
112793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    template_window_half_size_ = template_window_size / 2;
113793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    search_window_half_size_   = search_window_size   / 2;
114793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    template_window_size_      = template_window_half_size_ * 2 + 1;
115793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    search_window_size_        = search_window_half_size_   * 2 + 1;
116793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
117793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    border_size_ = search_window_half_size_ + template_window_half_size_;
118793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT);
119793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
120793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    const IT max_estimate_sum_value =
121793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
122793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
123793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                                          pixelInfo<WT>::sampleMax());
124793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
125793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    // precalc weight for every possible l2 dist between blocks
126793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    // additional optimization of precalced weights to replace division(averaging) by binary shift
127793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    CV_Assert(template_window_size_ <= 46340); // sqrt(INT_MAX)
128793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int template_window_size_sq = template_window_size_ * template_window_size_;
129793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq);
130793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
131793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
132793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int max_dist = D::template maxDist<T>();
133793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
134793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    almost_dist2weight_.resize(almost_max_dist);
135793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
136793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
137793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
138793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        double dist = almost_dist * almost_dist2actual_dist_multiplier;
139793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        almost_dist2weight_[almost_dist] =
140793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
141793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
142793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
143793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    // additional optimization init end
144793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    if (dst_.empty())
145793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        dst_ = Mat::zeros(src_.size(), src_.type());
146793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
147793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
148793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslertemplate <typename T, typename IT, typename UIT, typename D, typename WT>
149793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslervoid FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
150793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
151793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int row_from = range.start;
152793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int row_to = range.end - 1;
153793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
154793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    // sums of cols anf rows for current pixel p
155793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    Array2d<int> dist_sums(search_window_size_, search_window_size_);
156793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
157793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    // for lazy calc optimization (sum of cols for current pixel)
158793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    Array3d<int> col_dist_sums(template_window_size_, search_window_size_, search_window_size_);
159793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
160793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int first_col_num = -1;
161793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    // last elements of column sum (for each element in row)
162793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    Array3d<int> up_col_dist_sums(src_.cols, search_window_size_, search_window_size_);
163793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
164793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    for (int i = row_from; i <= row_to; i++)
165793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    {
166793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        for (int j = 0; j < src_.cols; j++)
167793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        {
168793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            int search_window_y = i - search_window_half_size_;
169793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            int search_window_x = j - search_window_half_size_;
170793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
171793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            // calc dist_sums
172793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            if (j == 0)
173793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
174793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                calcDistSumsForFirstElementInRow(i, dist_sums, col_dist_sums, up_col_dist_sums);
175793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                first_col_num = 0;
176793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
177793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            else
178793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
179793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                // calc cur dist_sums using previous dist_sums
180793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                if (i == row_from)
181793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                {
182793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    calcDistSumsForElementInFirstRow(i, j, first_col_num,
183793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        dist_sums, col_dist_sums, up_col_dist_sums);
184793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                }
185793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                else
186793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                {
187793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    int ay = border_size_ + i;
188793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    int ax = border_size_ + j + template_window_half_size_;
189793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
190793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    int start_by = border_size_ + i - search_window_half_size_;
191793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    int start_bx = border_size_ + j - search_window_half_size_ + template_window_half_size_;
192793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
193793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    T a_up = extended_src_.at<T>(ay - template_window_half_size_ - 1, ax);
194793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    T a_down = extended_src_.at<T>(ay + template_window_half_size_, ax);
195793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
196793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    // copy class member to local variable for optimization
197793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    int search_window_size = search_window_size_;
198793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
199793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    for (int y = 0; y < search_window_size; y++)
200793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    {
201793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        int * dist_sums_row = dist_sums.row_ptr(y);
202793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        int * col_dist_sums_row = col_dist_sums.row_ptr(first_col_num, y);
203793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        int * up_col_dist_sums_row = up_col_dist_sums.row_ptr(j, y);
204793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
205793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        const T * b_up_ptr = extended_src_.ptr<T>(start_by - template_window_half_size_ - 1 + y);
206793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        const T * b_down_ptr = extended_src_.ptr<T>(start_by + template_window_half_size_ + y);
207793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
208793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        for (int x = 0; x < search_window_size; x++)
209793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        {
210793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            // remove from current pixel sum column sum with index "first_col_num"
211793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            dist_sums_row[x] -= col_dist_sums_row[x];
212793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
213793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            int bx = start_bx + x;
214793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            col_dist_sums_row[x] = up_col_dist_sums_row[x] + D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
215793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
216793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            dist_sums_row[x] += col_dist_sums_row[x];
217793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                            up_col_dist_sums_row[x] = col_dist_sums_row[x];
218793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        }
219793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    }
220793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                }
221793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
222793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                first_col_num = (first_col_num + 1) % template_window_size_;
223793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
224793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
225793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            // calc weights
226793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
227793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            for (int channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
228793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                estimation[channel_num] = 0;
229793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            for (int channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
230793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                weights_sum[channel_num] = 0;
231793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
232793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            for (int y = 0; y < search_window_size_; y++)
233793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            {
234793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                const T* cur_row_ptr = extended_src_.ptr<T>(border_size_ + search_window_y + y);
235793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                int* dist_sums_row = dist_sums.row_ptr(y);
236793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                for (int x = 0; x < search_window_size_; x++)
237793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                {
238793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_;
239793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    WT weight = almost_dist2weight_[almostAvgDist];
240793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    T p = cur_row_ptr[border_size_ + search_window_x + x];
241793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
242793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                }
243793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            }
244793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
245793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
246793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                                                                                      weights_sum);
247793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
248793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        }
249793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    }
250793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
251793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
252793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslertemplate <typename T, typename IT, typename UIT, typename D, typename WT>
253793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerinline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
254793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int i,
255793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    Array2d<int>& dist_sums,
256793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    Array3d<int>& col_dist_sums,
257793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    Array3d<int>& up_col_dist_sums) const
258793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
259793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int j = 0;
260793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
261793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    for (int y = 0; y < search_window_size_; y++)
262793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        for (int x = 0; x < search_window_size_; x++)
263793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        {
264793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            dist_sums[y][x] = 0;
265793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            for (int tx = 0; tx < template_window_size_; tx++)
266793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                col_dist_sums[tx][y][x] = 0;
267793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
268793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            int start_y = i + y - search_window_half_size_;
269793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            int start_x = j + x - search_window_half_size_;
270793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
271793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
272793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++)
273793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                {
274793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    int dist = D::template calcDist<T>(extended_src_,
275793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        border_size_ + i + ty, border_size_ + j + tx,
276793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                        border_size_ + start_y + ty, border_size_ + start_x + tx);
277793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
278793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    dist_sums[y][x] += dist;
279793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                    col_dist_sums[tx + template_window_half_size_][y][x] += dist;
280793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                }
281793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
282793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            up_col_dist_sums[j][y][x] = col_dist_sums[template_window_size_ - 1][y][x];
283793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        }
284793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
285793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
286793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslertemplate <typename T, typename IT, typename UIT, typename D, typename WT>
287793ee12c6df9cad3806238d32528c49a3ff9331dNoah Preslerinline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
288793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int i, int j, int first_col_num,
289793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    Array2d<int>& dist_sums,
290793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    Array3d<int>& col_dist_sums,
291793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    Array3d<int>& up_col_dist_sums) const
292793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler{
293793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int ay = border_size_ + i;
294793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int ax = border_size_ + j + template_window_half_size_;
295793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
296793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int start_by = border_size_ + i - search_window_half_size_;
297793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int start_bx = border_size_ + j - search_window_half_size_ + template_window_half_size_;
298793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
299793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    int new_last_col_num = first_col_num;
300793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
301793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler    for (int y = 0; y < search_window_size_; y++)
302793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        for (int x = 0; x < search_window_size_; x++)
303793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        {
304793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            dist_sums[y][x] -= col_dist_sums[first_col_num][y][x];
305793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
306793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            col_dist_sums[new_last_col_num][y][x] = 0;
307793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            int by = start_by + y;
308793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            int bx = start_bx + x;
309793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
310793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler                col_dist_sums[new_last_col_num][y][x] += D::template calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
311793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
312793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x];
313793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler            up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x];
314793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler        }
315793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler}
316793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler
317793ee12c6df9cad3806238d32528c49a3ff9331dNoah Presler#endif
318