15c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org/*
25c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
35c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org *
45c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org *  Use of this source code is governed by a BSD-style license
55c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org *  that can be found in the LICENSE file in the root of the source
65c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org *  tree. An additional intellectual property rights grant can be found
75c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org *  in the file PATENTS.  All contributing project authors may
85c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org *  be found in the AUTHORS file in the root of the source tree.
95c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org */
105c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org
115c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org#include "vp8/encoder/denoising.h"
125c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org#include "vp8/common/reconinter.h"
135c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org#include "vpx/vpx_integer.h"
145c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org#include "vpx_mem/vpx_mem.h"
156fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org#include "vp8_rtcd.h"
165c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org
175c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org#include <emmintrin.h>
18d348b8d765c019ee7250075d663a83db00c65c08tomfinegan@chromium.org#include "vpx_ports/emmintrin_compat.h"
195c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org
20e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org/* Compute the sum of all pixel differences of this MB. */
21e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.orgstatic INLINE unsigned int abs_sum_diff_16x1(__m128i acc_diff) {
22e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org  const __m128i k_1 = _mm_set1_epi16(1);
23e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org  const __m128i acc_diff_lo = _mm_srai_epi16(
24e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org      _mm_unpacklo_epi8(acc_diff, acc_diff), 8);
25e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org  const __m128i acc_diff_hi = _mm_srai_epi16(
26e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org      _mm_unpackhi_epi8(acc_diff, acc_diff), 8);
27e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org  const __m128i acc_diff_16 = _mm_add_epi16(acc_diff_lo, acc_diff_hi);
28e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org  const __m128i hg_fe_dc_ba = _mm_madd_epi16(acc_diff_16, k_1);
29e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org  const __m128i hgfe_dcba = _mm_add_epi32(hg_fe_dc_ba,
30e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                                          _mm_srli_si128(hg_fe_dc_ba, 8));
31e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org  const __m128i hgfedcba = _mm_add_epi32(hgfe_dcba,
32e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                                         _mm_srli_si128(hgfe_dcba, 4));
33b522b7857137b602a1a92eb21adbd80ebe955ae2johannkoenig@chromium.org  unsigned int sum_diff = abs(_mm_cvtsi128_si32(hgfedcba));
34e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
35b522b7857137b602a1a92eb21adbd80ebe955ae2johannkoenig@chromium.org  return sum_diff;
36e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org}
375c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org
387765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.orgint vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y,
397765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org                             int mc_avg_y_stride,
407765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org                             unsigned char *running_avg_y, int avg_y_stride,
417765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org                             unsigned char *sig, int sig_stride,
42118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org                             unsigned int motion_magnitude,
43118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org                             int increase_denoising)
445c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org{
457765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org    unsigned char *running_avg_y_start = running_avg_y;
467765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org    unsigned char *sig_start = sig;
47e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    unsigned int sum_diff_thresh;
48ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org    int r;
49118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org    int shift_inc  = (increase_denoising &&
50118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org        motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0;
51ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org    __m128i acc_diff = _mm_setzero_si128();
52ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org    const __m128i k_0 = _mm_setzero_si128();
53118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org    const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
54ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org    const __m128i k_8 = _mm_set1_epi8(8);
55ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org    const __m128i k_16 = _mm_set1_epi8(16);
56ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org    /* Modify each level's adjustment according to motion_magnitude. */
57ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org    const __m128i l3 = _mm_set1_epi8(
58118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org                       (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
59118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org                        7 + shift_inc : 6);
60ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org    /* Difference between level 3 and level 2 is 2. */
61ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org    const __m128i l32 = _mm_set1_epi8(2);
62ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org    /* Difference between level 2 and level 1 is 1. */
63ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org    const __m128i l21 = _mm_set1_epi8(1);
645c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org
655c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org    for (r = 0; r < 16; ++r)
665c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org    {
67ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        /* Calculate differences */
68ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0]));
69ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        const __m128i v_mc_running_avg_y = _mm_loadu_si128(
70ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org                                           (__m128i *)(&mc_running_avg_y[0]));
715c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org        __m128i v_running_avg_y;
72ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
73ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
74ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        /* Obtain the sign. FF if diff is negative. */
75ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
76ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        /* Clamp absolute difference to 16 to be used to get mask. Doing this
77ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org         * allows us to use _mm_cmpgt_epi8, which operates on signed byte. */
78ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        const __m128i clamped_absdiff = _mm_min_epu8(
79ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org                                        _mm_or_si128(pdiff, ndiff), k_16);
80ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        /* Get masks for l2 l1 and l0 adjustments */
81ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff);
82ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff);
83ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff);
84ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        /* Get adjustments for l2, l1, and l0 */
85ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        __m128i adj2 = _mm_and_si128(mask2, l32);
86ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        const __m128i adj1 = _mm_and_si128(mask1, l21);
87ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
88ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        __m128i adj,  padj, nadj;
895c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org
90ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        /* Combine the adjustments and get absolute adjustments. */
91ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        adj2 = _mm_add_epi8(adj2, adj1);
92ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        adj = _mm_sub_epi8(l3, adj2);
93ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        adj = _mm_andnot_si128(mask0, adj);
94ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        adj = _mm_or_si128(adj, adj0);
955c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org
96ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        /* Restore the sign and get positive and negative adjustments. */
97ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        padj = _mm_andnot_si128(diff_sign, adj);
98ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        nadj = _mm_and_si128(diff_sign, adj);
995c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org
100ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        /* Calculate filtered value. */
101ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        v_running_avg_y = _mm_adds_epu8(v_sig, padj);
102ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        v_running_avg_y = _mm_subs_epu8(v_running_avg_y, nadj);
103ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y);
1045c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org
105ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        /* Adjustments <=7, and each element in acc_diff can fit in signed
106ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org         * char.
107ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org         */
108ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        acc_diff = _mm_adds_epi8(acc_diff, padj);
109ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        acc_diff = _mm_subs_epi8(acc_diff, nadj);
1105c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org
111ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        /* Update pointers for next iteration. */
1125c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org        sig += sig_stride;
1135c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org        mc_running_avg_y += mc_avg_y_stride;
1145c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org        running_avg_y += avg_y_stride;
1155c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org    }
116ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org
1175c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org    {
118ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org        /* Compute the sum of all pixel differences of this MB. */
119e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        unsigned int abs_sum_diff = abs_sum_diff_16x1(acc_diff);
120118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org        sum_diff_thresh = SUM_DIFF_THRESHOLD;
121118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org        if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;
122e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        if (abs_sum_diff > sum_diff_thresh) {
12388b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org          // Before returning to copy the block (i.e., apply no denoising),
12488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org          // checK if we can still apply some (weaker) temporal filtering to
12588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org          // this block, that would otherwise not be denoised at all. Simplest
12688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org          // is to apply an additional adjustment to running_avg_y to bring it
12788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org          // closer to sig. The adjustment is capped by a maximum delta, and
12888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org          // chosen such that in most cases the resulting sum_diff will be
12988b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org          // within the accceptable range given by sum_diff_thresh.
13088b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org
13188b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org          // The delta is set by the excess of absolute pixel diff over the
13288b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org          // threshold.
133e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          int delta = ((abs_sum_diff - sum_diff_thresh) >> 8) + 1;
13488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org          // Only apply the adjustment for max delta up to 3.
13588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org          if (delta < 4) {
13688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org            const __m128i k_delta = _mm_set1_epi8(delta);
13788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org            sig -= sig_stride * 16;
13888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org            mc_running_avg_y -= mc_avg_y_stride * 16;
13988b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org            running_avg_y -= avg_y_stride * 16;
14088b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org            for (r = 0; r < 16; ++r) {
14188b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              __m128i v_running_avg_y =
14288b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org                  _mm_loadu_si128((__m128i *)(&running_avg_y[0]));
14388b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              // Calculate differences.
14488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0]));
14588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              const __m128i v_mc_running_avg_y =
14688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org                  _mm_loadu_si128((__m128i *)(&mc_running_avg_y[0]));
14788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
14888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
14988b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              // Obtain the sign. FF if diff is negative.
15088b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
15188b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              // Clamp absolute difference to delta to get the adjustment.
15288b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              const __m128i adj =
15388b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org                  _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
15488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              // Restore the sign and get positive and negative adjustments.
15588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              __m128i padj, nadj;
15688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              padj = _mm_andnot_si128(diff_sign, adj);
15788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              nadj = _mm_and_si128(diff_sign, adj);
15888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              // Calculate filtered value.
15988b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              v_running_avg_y = _mm_subs_epu8(v_running_avg_y, padj);
16088b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org              v_running_avg_y = _mm_adds_epu8(v_running_avg_y, nadj);
16188b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org             _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y);
16288b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org
16388b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org             // Accumulate the adjustments.
16488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org             acc_diff = _mm_subs_epi8(acc_diff, padj);
16588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org             acc_diff = _mm_adds_epi8(acc_diff, nadj);
16688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org
16788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org             // Update pointers for next iteration.
16888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org             sig += sig_stride;
16988b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org             mc_running_avg_y += mc_avg_y_stride;
17088b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org             running_avg_y += avg_y_stride;
17188b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org            }
172e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            abs_sum_diff = abs_sum_diff_16x1(acc_diff);
173e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            if (abs_sum_diff > sum_diff_thresh) {
174e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              return COPY_BLOCK;
17588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org            }
17688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org          } else {
1775c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org            return COPY_BLOCK;
17888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org          }
1795c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org        }
1805c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org    }
181ed759d81a39febed3a8a395386639d54307504aagrunell@chromium.org
1827765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org    vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride);
1835c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org    return FILTER_BLOCK;
1845c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org}
185e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
186e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.orgint vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg,
187e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                             int mc_avg_stride,
188e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                             unsigned char *running_avg, int avg_stride,
189e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                             unsigned char *sig, int sig_stride,
190e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                             unsigned int motion_magnitude,
191e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                             int increase_denoising) {
192e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    unsigned char *running_avg_start = running_avg;
193e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    unsigned char *sig_start = sig;
194e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    unsigned int sum_diff_thresh;
195e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    int r;
196e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    int shift_inc  = (increase_denoising &&
197e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 1 : 0;
198e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    __m128i acc_diff = _mm_setzero_si128();
199e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    const __m128i k_0 = _mm_setzero_si128();
200e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
201e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    const __m128i k_8 = _mm_set1_epi8(8);
202e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    const __m128i k_16 = _mm_set1_epi8(16);
203e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    /* Modify each level's adjustment according to motion_magnitude. */
204e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    const __m128i l3 = _mm_set1_epi8(
205e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                       (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ?
206e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                        7 + shift_inc : 6);
207e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    /* Difference between level 3 and level 2 is 2. */
208e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    const __m128i l32 = _mm_set1_epi8(2);
209e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    /* Difference between level 2 and level 1 is 1. */
210e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    const __m128i l21 = _mm_set1_epi8(1);
211e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
212e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    {
213e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org      const __m128i k_1 = _mm_set1_epi16(1);
214e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org      __m128i vec_sum_block = _mm_setzero_si128();
215e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
216e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org      // Avoid denoising color signal if its close to average level.
217e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org      for (r = 0; r < 8; ++r) {
218e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i v_sig = _mm_loadl_epi64((__m128i *)(&sig[0]));
219e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i v_sig_unpack = _mm_unpacklo_epi8(v_sig, k_0);
220e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        vec_sum_block = _mm_add_epi16(vec_sum_block, v_sig_unpack);
221e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        sig += sig_stride;
222e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org      }
223e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org      sig -= sig_stride * 8;
224e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org      {
225e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i hg_fe_dc_ba = _mm_madd_epi16(vec_sum_block, k_1);
226e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i hgfe_dcba = _mm_add_epi32(hg_fe_dc_ba,
227e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                                                _mm_srli_si128(hg_fe_dc_ba, 8));
228e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i hgfedcba = _mm_add_epi32(hgfe_dcba,
229e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                                               _mm_srli_si128(hgfe_dcba, 4));
230e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const int sum_block = _mm_cvtsi128_si32(hgfedcba);
231e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) {
232e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          return COPY_BLOCK;
233e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        }
234e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org      }
235e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    }
236e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
237e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    for (r = 0; r < 4; ++r) {
238e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        /* Calculate differences */
239e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i v_sig_low = _mm_castpd_si128(
240e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            _mm_load_sd((double *)(&sig[0])));
241e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i v_sig = _mm_castpd_si128(
242e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            _mm_loadh_pd(_mm_castsi128_pd(v_sig_low),
243e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                         (double *)(&sig[sig_stride])));
244e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i v_mc_running_avg_low = _mm_castpd_si128(
245e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            _mm_load_sd((double *)(&mc_running_avg[0])));
246e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i v_mc_running_avg = _mm_castpd_si128(
247e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            _mm_loadh_pd(_mm_castsi128_pd(v_mc_running_avg_low),
248e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                         (double *)(&mc_running_avg[mc_avg_stride])));
249e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg, v_sig);
250e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg);
251e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        /* Obtain the sign. FF if diff is negative. */
252e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
253e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        /* Clamp absolute difference to 16 to be used to get mask. Doing this
254e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org         * allows us to use _mm_cmpgt_epi8, which operates on signed byte. */
255e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i clamped_absdiff = _mm_min_epu8(
256e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                                        _mm_or_si128(pdiff, ndiff), k_16);
257e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        /* Get masks for l2 l1 and l0 adjustments */
258e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff);
259e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff);
260e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff);
261e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        /* Get adjustments for l2, l1, and l0 */
262e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        __m128i adj2 = _mm_and_si128(mask2, l32);
263e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i adj1 = _mm_and_si128(mask1, l21);
264e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
265e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        __m128i adj,  padj, nadj;
266e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        __m128i v_running_avg;
267e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
268e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        /* Combine the adjustments and get absolute adjustments. */
269e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        adj2 = _mm_add_epi8(adj2, adj1);
270e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        adj = _mm_sub_epi8(l3, adj2);
271e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        adj = _mm_andnot_si128(mask0, adj);
272e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        adj = _mm_or_si128(adj, adj0);
273e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
274e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        /* Restore the sign and get positive and negative adjustments. */
275e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        padj = _mm_andnot_si128(diff_sign, adj);
276e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        nadj = _mm_and_si128(diff_sign, adj);
277e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
278e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        /* Calculate filtered value. */
279e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        v_running_avg = _mm_adds_epu8(v_sig, padj);
280e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        v_running_avg = _mm_subs_epu8(v_running_avg, nadj);
281e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
282e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        _mm_storel_pd((double *)&running_avg[0],
283e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                      _mm_castsi128_pd(v_running_avg));
284e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        _mm_storeh_pd((double *)&running_avg[avg_stride],
285e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                      _mm_castsi128_pd(v_running_avg));
286e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
287e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        /* Adjustments <=7, and each element in acc_diff can fit in signed
288e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org         * char.
289e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org         */
290e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        acc_diff = _mm_adds_epi8(acc_diff, padj);
291e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        acc_diff = _mm_subs_epi8(acc_diff, nadj);
292e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
293e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        /* Update pointers for next iteration. */
294e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        sig += sig_stride * 2;
295e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        mc_running_avg += mc_avg_stride * 2;
296e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        running_avg += avg_stride * 2;
297e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    }
298e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
299e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    {
300e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        unsigned int abs_sum_diff = abs_sum_diff_16x1(acc_diff);
301e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        sum_diff_thresh = SUM_DIFF_THRESHOLD_UV;
302e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV;
303e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        if (abs_sum_diff > sum_diff_thresh) {
304e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          // Before returning to copy the block (i.e., apply no denoising),
305e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          // checK if we can still apply some (weaker) temporal filtering to
306e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          // this block, that would otherwise not be denoised at all. Simplest
307e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          // is to apply an additional adjustment to running_avg_y to bring it
308e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          // closer to sig. The adjustment is capped by a maximum delta, and
309e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          // chosen such that in most cases the resulting sum_diff will be
310e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          // within the accceptable range given by sum_diff_thresh.
311e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
312e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          // The delta is set by the excess of absolute pixel diff over the
313e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          // threshold.
314e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          int delta = ((abs_sum_diff - sum_diff_thresh) >> 8) + 1;
315e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          // Only apply the adjustment for max delta up to 3.
316e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          if (delta < 4) {
317e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            const __m128i k_delta = _mm_set1_epi8(delta);
318e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            sig -= sig_stride * 8;
319e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            mc_running_avg -= mc_avg_stride * 8;
320e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            running_avg -= avg_stride * 8;
321e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            for (r = 0; r < 4; ++r) {
322e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              // Calculate differences.
323e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              const __m128i v_sig_low = _mm_castpd_si128(
324e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                  _mm_load_sd((double *)(&sig[0])));
325e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              const __m128i v_sig = _mm_castpd_si128(
326e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                  _mm_loadh_pd(_mm_castsi128_pd(v_sig_low),
327e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                               (double *)(&sig[sig_stride])));
328e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              const __m128i v_mc_running_avg_low = _mm_castpd_si128(
329e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                  _mm_load_sd((double *)(&mc_running_avg[0])));
330e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              const __m128i v_mc_running_avg = _mm_castpd_si128(
331e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                  _mm_loadh_pd(_mm_castsi128_pd(v_mc_running_avg_low),
332e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                               (double *)(&mc_running_avg[mc_avg_stride])));
333e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg, v_sig);
334e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg);
335e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              // Obtain the sign. FF if diff is negative.
336e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
337e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              // Clamp absolute difference to delta to get the adjustment.
338e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              const __m128i adj =
339e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                  _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
340e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              // Restore the sign and get positive and negative adjustments.
341e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              __m128i padj, nadj;
342e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              const __m128i v_running_avg_low = _mm_castpd_si128(
343e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                  _mm_load_sd((double *)(&running_avg[0])));
344e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              __m128i v_running_avg = _mm_castpd_si128(
345e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                  _mm_loadh_pd(_mm_castsi128_pd(v_running_avg_low),
346e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                               (double *)(&running_avg[avg_stride])));
347e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              padj = _mm_andnot_si128(diff_sign, adj);
348e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              nadj = _mm_and_si128(diff_sign, adj);
349e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              // Calculate filtered value.
350e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              v_running_avg = _mm_subs_epu8(v_running_avg, padj);
351e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              v_running_avg = _mm_adds_epu8(v_running_avg, nadj);
352e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
353e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              _mm_storel_pd((double *)&running_avg[0],
354e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                            _mm_castsi128_pd(v_running_avg));
355e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              _mm_storeh_pd((double *)&running_avg[avg_stride],
356e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org                            _mm_castsi128_pd(v_running_avg));
357e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
358e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org             // Accumulate the adjustments.
359e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org             acc_diff = _mm_subs_epi8(acc_diff, padj);
360e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org             acc_diff = _mm_adds_epi8(acc_diff, nadj);
361e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
362e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org             // Update pointers for next iteration.
363e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org             sig += sig_stride * 2;
364e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org             mc_running_avg += mc_avg_stride * 2;
365e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org             running_avg += avg_stride * 2;
366e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            }
367e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            abs_sum_diff = abs_sum_diff_16x1(acc_diff);
368e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            if (abs_sum_diff > sum_diff_thresh) {
369e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org              return COPY_BLOCK;
370e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            }
371e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          } else {
372e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org            return COPY_BLOCK;
373e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org          }
374e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org        }
375e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    }
376e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org
377e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    vp8_copy_mem8x8(running_avg_start, avg_stride, sig_start, sig_stride);
378e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    return FILTER_BLOCK;
379e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org}
380