190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/*
2f71323e297a928af368937089d3ed71239786f86Andreas Huber *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
4f71323e297a928af368937089d3ed71239786f86Andreas Huber *  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber *  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber *  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber *  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber *  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber */
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
111b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx_config.h"
121b362b15af34006e6a11974088a46d42b903418eJohann#include "vp8/common/variance.h"
1379f15823c34ae1e423108295e416213200bb280fAndreas Huber#include "vp8/common/pragmas.h"
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "vpx_ports/mem.h"
151b362b15af34006e6a11974088a46d42b903418eJohann#include "vp8/common/x86/filter_x86.h"
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
171b362b15af34006e6a11974088a46d42b903418eJohannextern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
181b362b15af34006e6a11974088a46d42b903418eJohannextern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
191b362b15af34006e6a11974088a46d42b903418eJohannextern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
201b362b15af34006e6a11974088a46d42b903418eJohannextern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void vp8_filter_block2d_bil4x4_var_mmx
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
24538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int ref_pixels_per_line,
26538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int src_pixels_per_line,
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    const short *HFilter,
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    const short *VFilter,
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int *sum,
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sumsquared
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern unsigned int vp8_get4x4var_mmx
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
36538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
38538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  recon_stride,
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *SSE,
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int *Sum
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_get_mb_ss_sse2
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
46538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const short *src_ptr
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_get16x16var_sse2
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
50538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
51538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int source_stride,
52538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
53538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int recon_stride,
54538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    unsigned int *SSE,
55538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int *Sum
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_get8x8var_sse2
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
59538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
60538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int source_stride,
61538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
62538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int recon_stride,
63538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    unsigned int *SSE,
64538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int *Sum
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vp8_filter_block2d_bil_var_sse2
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
68538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int ref_pixels_per_line,
70538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int src_pixels_per_line,
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int Height,
7379f15823c34ae1e423108295e416213200bb280fAndreas Huber    int  xoffset,
7479f15823c34ae1e423108295e416213200bb280fAndreas Huber    int  yoffset,
7579f15823c34ae1e423108295e416213200bb280fAndreas Huber    int *sum,
7679f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int *sumsquared
7779f15823c34ae1e423108295e416213200bb280fAndreas Huber);
7879f15823c34ae1e423108295e416213200bb280fAndreas Hubervoid vp8_half_horiz_vert_variance8x_h_sse2
7979f15823c34ae1e423108295e416213200bb280fAndreas Huber(
8079f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *ref_ptr,
8179f15823c34ae1e423108295e416213200bb280fAndreas Huber    int ref_pixels_per_line,
8279f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *src_ptr,
8379f15823c34ae1e423108295e416213200bb280fAndreas Huber    int src_pixels_per_line,
8479f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int Height,
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int *sum,
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sumsquared
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vp8_half_horiz_vert_variance16x_h_sse2
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
90538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int ref_pixels_per_line,
92538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int src_pixels_per_line,
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int Height,
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int *sum,
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sumsquared
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
9879f15823c34ae1e423108295e416213200bb280fAndreas Hubervoid vp8_half_horiz_variance8x_h_sse2
9979f15823c34ae1e423108295e416213200bb280fAndreas Huber(
10079f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *ref_ptr,
10179f15823c34ae1e423108295e416213200bb280fAndreas Huber    int ref_pixels_per_line,
10279f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *src_ptr,
10379f15823c34ae1e423108295e416213200bb280fAndreas Huber    int src_pixels_per_line,
10479f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int Height,
10579f15823c34ae1e423108295e416213200bb280fAndreas Huber    int *sum,
10679f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int *sumsquared
10779f15823c34ae1e423108295e416213200bb280fAndreas Huber);
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vp8_half_horiz_variance16x_h_sse2
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
110538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int ref_pixels_per_line,
112538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int src_pixels_per_line,
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int Height,
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int *sum,
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sumsquared
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
11879f15823c34ae1e423108295e416213200bb280fAndreas Hubervoid vp8_half_vert_variance8x_h_sse2
11979f15823c34ae1e423108295e416213200bb280fAndreas Huber(
12079f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *ref_ptr,
12179f15823c34ae1e423108295e416213200bb280fAndreas Huber    int ref_pixels_per_line,
12279f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *src_ptr,
12379f15823c34ae1e423108295e416213200bb280fAndreas Huber    int src_pixels_per_line,
12479f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int Height,
12579f15823c34ae1e423108295e416213200bb280fAndreas Huber    int *sum,
12679f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int *sumsquared
12779f15823c34ae1e423108295e416213200bb280fAndreas Huber);
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vp8_half_vert_variance16x_h_sse2
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
130538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int ref_pixels_per_line,
132538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int src_pixels_per_line,
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int Height,
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int *sum,
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sumsquared
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance4x4_wmt(
140538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
142538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
1431b362b15af34006e6a11974088a46d42b903418eJohann    int  recon_stride,
1441b362b15af34006e6a11974088a46d42b903418eJohann    unsigned int *sse)
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int var;
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int avg;
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
1501b362b15af34006e6a11974088a46d42b903418eJohann    *sse = var;
151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (var - (((unsigned int)avg * avg) >> 4));
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance8x8_wmt
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
157538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
159538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
1601b362b15af34006e6a11974088a46d42b903418eJohann    int  recon_stride,
1611b362b15af34006e6a11974088a46d42b903418eJohann    unsigned int *sse)
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int var;
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int avg;
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
1671b362b15af34006e6a11974088a46d42b903418eJohann    *sse = var;
168ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (var - (((unsigned int)avg * avg) >> 6));
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance16x16_wmt
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
175538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
177538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  recon_stride,
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse)
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int sse0;
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int sum0;
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = sse0;
187ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_mse16x16_wmt(
190538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
192538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  recon_stride,
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse)
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int sse0;
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int sum0;
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = sse0;
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    return sse0;
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance16x8_wmt
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
208538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
210538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  recon_stride,
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse)
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int sse0, sse1, var;
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int sum0, sum1, avg;
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    var = sse0 + sse1;
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    avg = sum0 + sum1;
22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = var;
223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (var - (((unsigned int)avg * avg) >> 7));
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance8x16_wmt
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
229538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
231538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  recon_stride,
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse)
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int sse0, sse1, var;
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int sum0, sum1, avg;
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    var = sse0 + sse1;
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    avg = sum0 + sum1;
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = var;
244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (var - (((unsigned int)avg * avg) >> 7));
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance4x4_wmt
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
250538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char  *src_ptr,
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  src_pixels_per_line,
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  xoffset,
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  yoffset,
254538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int dst_pixels_per_line,
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber)
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int xsum;
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int xxsum;
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_filter_block2d_bil4x4_var_mmx(
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        src_ptr, src_pixels_per_line,
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dst_ptr, dst_pixels_per_line,
2641b362b15af34006e6a11974088a46d42b903418eJohann        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        &xsum, &xxsum
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    );
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = xxsum;
268ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum - (((unsigned int)xsum * xsum) >> 4));
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance8x8_wmt
27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
274538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char  *src_ptr,
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  src_pixels_per_line,
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  xoffset,
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  yoffset,
278538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int dst_pixels_per_line,
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber)
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int xsum;
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int xxsum;
28579f15823c34ae1e423108295e416213200bb280fAndreas Huber
28679f15823c34ae1e423108295e416213200bb280fAndreas Huber    if (xoffset == 4 && yoffset == 0)
28779f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
28879f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_horiz_variance8x_h_sse2(
28979f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
29079f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 8,
29179f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum, &xxsum);
29279f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
29379f15823c34ae1e423108295e416213200bb280fAndreas Huber    else if (xoffset == 0 && yoffset == 4)
29479f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
29579f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_vert_variance8x_h_sse2(
29679f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
29779f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 8,
29879f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum, &xxsum);
29979f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
30079f15823c34ae1e423108295e416213200bb280fAndreas Huber    else if (xoffset == 4 && yoffset == 4)
30179f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
30279f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_horiz_vert_variance8x_h_sse2(
30379f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
30479f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 8,
30579f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum, &xxsum);
30679f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
30779f15823c34ae1e423108295e416213200bb280fAndreas Huber    else
30879f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
30979f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_filter_block2d_bil_var_sse2(
31079f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
31179f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 8,
31279f15823c34ae1e423108295e416213200bb280fAndreas Huber            xoffset, yoffset,
31379f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum, &xxsum);
31479f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = xxsum;
317ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum - (((unsigned int)xsum * xsum) >> 6));
31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance16x16_wmt
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
322538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char  *src_ptr,
32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  src_pixels_per_line,
32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  xoffset,
32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  yoffset,
326538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int dst_pixels_per_line,
32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse
32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber)
33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int xsum0, xsum1;
33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int xxsum0, xxsum1;
33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3351b362b15af34006e6a11974088a46d42b903418eJohann    /* note we could avoid these if statements if the calling function
3361b362b15af34006e6a11974088a46d42b903418eJohann     * just called the appropriate functions inside.
3371b362b15af34006e6a11974088a46d42b903418eJohann     */
33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    if (xoffset == 4 && yoffset == 0)
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    {
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        vp8_half_horiz_variance16x_h_sse2(
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            src_ptr, src_pixels_per_line,
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            dst_ptr, dst_pixels_per_line, 16,
34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            &xsum0, &xxsum0);
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    else if (xoffset == 0 && yoffset == 4)
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    {
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        vp8_half_vert_variance16x_h_sse2(
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            src_ptr, src_pixels_per_line,
34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            dst_ptr, dst_pixels_per_line, 16,
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            &xsum0, &xxsum0);
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    else if (xoffset == 4 && yoffset == 4)
35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    {
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        vp8_half_horiz_vert_variance16x_h_sse2(
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            src_ptr, src_pixels_per_line,
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            dst_ptr, dst_pixels_per_line, 16,
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            &xsum0, &xxsum0);
35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    else
36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    {
36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        vp8_filter_block2d_bil_var_sse2(
36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            src_ptr, src_pixels_per_line,
36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            dst_ptr, dst_pixels_per_line, 16,
36479f15823c34ae1e423108295e416213200bb280fAndreas Huber            xoffset, yoffset,
36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            &xsum0, &xxsum0
36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        );
36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        vp8_filter_block2d_bil_var_sse2(
36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            src_ptr + 8, src_pixels_per_line,
37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            dst_ptr + 8, dst_pixels_per_line, 16,
37179f15823c34ae1e423108295e416213200bb280fAndreas Huber            xoffset, yoffset,
37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber            &xsum1, &xxsum1
37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        );
37479f15823c34ae1e423108295e416213200bb280fAndreas Huber        xsum0 += xsum1;
37579f15823c34ae1e423108295e416213200bb280fAndreas Huber        xxsum0 += xxsum1;
37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = xxsum0;
379ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_mse16x16_wmt(
383538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char  *src_ptr,
38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  src_pixels_per_line,
38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  xoffset,
38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  yoffset,
387538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int dst_pixels_per_line,
38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse
39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber)
39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    return *sse;
39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance16x8_wmt
39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
398538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char  *src_ptr,
39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  src_pixels_per_line,
40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  xoffset,
40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  yoffset,
402538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int dst_pixels_per_line,
40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse
40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber)
40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int xsum0, xsum1;
40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int xxsum0, xxsum1;
41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41179f15823c34ae1e423108295e416213200bb280fAndreas Huber    if (xoffset == 4 && yoffset == 0)
41279f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
41379f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_horiz_variance16x_h_sse2(
41479f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
41579f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 8,
41679f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum0, &xxsum0);
41779f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
41879f15823c34ae1e423108295e416213200bb280fAndreas Huber    else if (xoffset == 0 && yoffset == 4)
41979f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
42079f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_vert_variance16x_h_sse2(
42179f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
42279f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 8,
42379f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum0, &xxsum0);
42479f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
42579f15823c34ae1e423108295e416213200bb280fAndreas Huber    else if (xoffset == 4 && yoffset == 4)
42679f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
42779f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_horiz_vert_variance16x_h_sse2(
42879f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
42979f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 8,
43079f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum0, &xxsum0);
43179f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
43279f15823c34ae1e423108295e416213200bb280fAndreas Huber    else
43379f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
43479f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_filter_block2d_bil_var_sse2(
43579f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
43679f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 8,
43779f15823c34ae1e423108295e416213200bb280fAndreas Huber            xoffset, yoffset,
43879f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum0, &xxsum0);
43990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
44079f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_filter_block2d_bil_var_sse2(
44179f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr + 8, src_pixels_per_line,
44279f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr + 8, dst_pixels_per_line, 8,
44379f15823c34ae1e423108295e416213200bb280fAndreas Huber            xoffset, yoffset,
44479f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum1, &xxsum1);
44579f15823c34ae1e423108295e416213200bb280fAndreas Huber        xsum0 += xsum1;
44679f15823c34ae1e423108295e416213200bb280fAndreas Huber        xxsum0 += xxsum1;
44779f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
44890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
44990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = xxsum0;
450ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
45190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
45290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
45390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance8x16_wmt
45490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
455538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char  *src_ptr,
45690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  src_pixels_per_line,
45790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  xoffset,
45890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  yoffset,
459538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
46090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int dst_pixels_per_line,
46190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse
46290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber)
46390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
46490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int xsum;
46590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int xxsum;
46679f15823c34ae1e423108295e416213200bb280fAndreas Huber
46779f15823c34ae1e423108295e416213200bb280fAndreas Huber    if (xoffset == 4 && yoffset == 0)
46879f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
46979f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_horiz_variance8x_h_sse2(
47079f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
47179f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 16,
47279f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum, &xxsum);
47379f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
47479f15823c34ae1e423108295e416213200bb280fAndreas Huber    else if (xoffset == 0 && yoffset == 4)
47579f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
47679f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_vert_variance8x_h_sse2(
47779f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
47879f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 16,
47979f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum, &xxsum);
48079f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
48179f15823c34ae1e423108295e416213200bb280fAndreas Huber    else if (xoffset == 4 && yoffset == 4)
48279f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
48379f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_horiz_vert_variance8x_h_sse2(
48479f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
48579f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 16,
48679f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum, &xxsum);
48779f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
48879f15823c34ae1e423108295e416213200bb280fAndreas Huber    else
48979f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
49079f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_filter_block2d_bil_var_sse2(
49179f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
49279f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 16,
49379f15823c34ae1e423108295e416213200bb280fAndreas Huber            xoffset, yoffset,
49479f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum, &xxsum);
49579f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
49690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
49790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = xxsum;
498ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum - (((unsigned int)xsum * xsum) >> 7));
49990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
50090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
501538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
502538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberunsigned int vp8_variance_halfpixvar16x16_h_wmt(
503538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
504538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int  src_pixels_per_line,
505538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
506538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int  dst_pixels_per_line,
507538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    unsigned int *sse)
508538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber{
50979f15823c34ae1e423108295e416213200bb280fAndreas Huber    int xsum0;
51079f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int xxsum0;
511538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
512538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    vp8_half_horiz_variance16x_h_sse2(
513538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        src_ptr, src_pixels_per_line,
514538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        dst_ptr, dst_pixels_per_line, 16,
515538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        &xsum0, &xxsum0);
516538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
517538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    *sse = xxsum0;
518ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
519538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber}
520538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
521538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
522538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberunsigned int vp8_variance_halfpixvar16x16_v_wmt(
523538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
524538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int  src_pixels_per_line,
525538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
526538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int  dst_pixels_per_line,
527538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    unsigned int *sse)
528538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber{
52979f15823c34ae1e423108295e416213200bb280fAndreas Huber    int xsum0;
53079f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int xxsum0;
531538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    vp8_half_vert_variance16x_h_sse2(
532538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        src_ptr, src_pixels_per_line,
533538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        dst_ptr, dst_pixels_per_line, 16,
534538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        &xsum0, &xxsum0);
535538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
536538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    *sse = xxsum0;
537ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
538538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber}
539538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
540538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
541538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberunsigned int vp8_variance_halfpixvar16x16_hv_wmt(
542538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
543538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int  src_pixels_per_line,
544538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
545538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int  dst_pixels_per_line,
546538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    unsigned int *sse)
547538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber{
54879f15823c34ae1e423108295e416213200bb280fAndreas Huber    int xsum0;
54979f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int xxsum0;
550538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
551538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    vp8_half_horiz_vert_variance16x_h_sse2(
552538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        src_ptr, src_pixels_per_line,
553538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        dst_ptr, dst_pixels_per_line, 16,
554538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        &xsum0, &xxsum0);
555538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
556538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    *sse = xxsum0;
557ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
558538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber}
559