190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/*
2f71323e297a928af368937089d3ed71239786f86Andreas Huber *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
4f71323e297a928af368937089d3ed71239786f86Andreas Huber *  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber *  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber *  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber *  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber *  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber */
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
111b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx_config.h"
121b362b15af34006e6a11974088a46d42b903418eJohann#include "vp8/common/variance.h"
1379f15823c34ae1e423108295e416213200bb280fAndreas Huber#include "vp8/common/pragmas.h"
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "vpx_ports/mem.h"
151b362b15af34006e6a11974088a46d42b903418eJohann#include "vp8/common/x86/filter_x86.h"
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void filter_block1d_h6_mmx
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
19538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned short *output_ptr,
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int src_pixels_per_line,
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int pixel_step,
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int output_height,
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int output_width,
251b362b15af34006e6a11974088a46d42b903418eJohann    short *filter
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void filter_block1d_v6_mmx
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
29538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const short *src_ptr,
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned char *output_ptr,
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int pixels_per_line,
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int pixel_step,
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int output_height,
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int output_width,
351b362b15af34006e6a11974088a46d42b903418eJohann    short *filter
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
381b362b15af34006e6a11974088a46d42b903418eJohannextern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr);
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern unsigned int vp8_get8x8var_mmx
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
41538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
43538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  recon_stride,
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *SSE,
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int *Sum
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern unsigned int vp8_get4x4var_mmx
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
50538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
52538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  recon_stride,
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *SSE,
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int *Sum
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void vp8_filter_block2d_bil4x4_var_mmx
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
59538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int ref_pixels_per_line,
61538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int src_pixels_per_line,
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    const short *HFilter,
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    const short *VFilter,
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int *sum,
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sumsquared
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void vp8_filter_block2d_bil_var_mmx
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
70538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int ref_pixels_per_line,
72538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int src_pixels_per_line,
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int Height,
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    const short *HFilter,
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    const short *VFilter,
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int *sum,
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sumsquared
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber);
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance4x4_mmx(
83538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
85538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  recon_stride,
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse)
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int var;
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int avg;
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = var;
94ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (var - (((unsigned int)avg * avg) >> 4));
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance8x8_mmx(
99538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
101538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  recon_stride,
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse)
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int var;
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int avg;
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = var;
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (var - (((unsigned int)avg * avg) >> 6));
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_mse16x16_mmx(
116538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
118538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  recon_stride,
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse)
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int sse0, sse1, sse2, sse3, var;
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int sum0, sum1, sum2, sum3;
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    var = sse0 + sse1 + sse2 + sse3;
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = var;
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    return var;
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance16x16_mmx(
138538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
140538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  recon_stride,
1421b362b15af34006e6a11974088a46d42b903418eJohann    unsigned int *sse)
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int sse0, sse1, sse2, sse3, var;
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int sum0, sum1, sum2, sum3, avg;
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    var = sse0 + sse1 + sse2 + sse3;
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    avg = sum0 + sum1 + sum2 + sum3;
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = var;
156ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (var - (((unsigned int)avg * avg) >> 8));
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance16x8_mmx(
160538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
162538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  recon_stride,
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse)
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int sse0, sse1, var;
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int sum0, sum1, avg;
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    var = sse0 + sse1;
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    avg = sum0 + sum1;
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = var;
175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (var - (((unsigned int)avg * avg) >> 7));
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance8x16_mmx(
181538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  source_stride,
183538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  recon_stride,
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse)
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int sse0, sse1, var;
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int sum0, sum1, avg;
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    var = sse0 + sse1;
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    avg = sum0 + sum1;
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = var;
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
197ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (var - (((unsigned int)avg * avg) >> 7));
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance4x4_mmx
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
204538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char  *src_ptr,
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  src_pixels_per_line,
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  xoffset,
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  yoffset,
208538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int dst_pixels_per_line,
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse)
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int xsum;
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int xxsum;
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_filter_block2d_bil4x4_var_mmx(
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        src_ptr, src_pixels_per_line,
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dst_ptr, dst_pixels_per_line,
2181b362b15af34006e6a11974088a46d42b903418eJohann        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        &xsum, &xxsum
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    );
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = xxsum;
222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum - (((unsigned int)xsum * xsum) >> 4));
22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance8x8_mmx
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
228538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char  *src_ptr,
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  src_pixels_per_line,
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  xoffset,
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  yoffset,
232538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int dst_pixels_per_line,
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber)
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int xsum;
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int xxsum;
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_filter_block2d_bil_var_mmx(
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        src_ptr, src_pixels_per_line,
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dst_ptr, dst_pixels_per_line, 8,
2431b362b15af34006e6a11974088a46d42b903418eJohann        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        &xsum, &xxsum
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    );
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = xxsum;
247ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum - (((unsigned int)xsum * xsum) >> 6));
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance16x16_mmx
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
252538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char  *src_ptr,
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  src_pixels_per_line,
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  xoffset,
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  yoffset,
256538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int dst_pixels_per_line,
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber)
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int xsum0, xsum1;
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int xxsum0, xxsum1;
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_filter_block2d_bil_var_mmx(
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        src_ptr, src_pixels_per_line,
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dst_ptr, dst_pixels_per_line, 16,
2691b362b15af34006e6a11974088a46d42b903418eJohann        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        &xsum0, &xxsum0
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    );
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_filter_block2d_bil_var_mmx(
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        src_ptr + 8, src_pixels_per_line,
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dst_ptr + 8, dst_pixels_per_line, 16,
2771b362b15af34006e6a11974088a46d42b903418eJohann        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        &xsum1, &xxsum1
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    );
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    xsum0 += xsum1;
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    xxsum0 += xxsum1;
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = xxsum0;
285ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_mse16x16_mmx(
291538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char  *src_ptr,
29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  src_pixels_per_line,
29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  xoffset,
29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  yoffset,
295538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int dst_pixels_per_line,
29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse
29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber)
29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    return *sse;
30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance16x8_mmx
30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
306538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char  *src_ptr,
30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  src_pixels_per_line,
30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  xoffset,
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  yoffset,
310538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int dst_pixels_per_line,
31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int *sse
31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber)
31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int xsum0, xsum1;
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int xxsum0, xxsum1;
31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_filter_block2d_bil_var_mmx(
32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        src_ptr, src_pixels_per_line,
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dst_ptr, dst_pixels_per_line, 8,
3221b362b15af34006e6a11974088a46d42b903418eJohann        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        &xsum0, &xxsum0
32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    );
32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_filter_block2d_bil_var_mmx(
32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        src_ptr + 8, src_pixels_per_line,
32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dst_ptr + 8, dst_pixels_per_line, 8,
3301b362b15af34006e6a11974088a46d42b903418eJohann        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        &xsum1, &xxsum1
33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    );
33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    xsum0 += xsum1;
33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    xxsum0 += xxsum1;
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = xxsum0;
338ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance8x16_mmx
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber(
343538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char  *src_ptr,
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  src_pixels_per_line,
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  xoffset,
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int  yoffset,
347538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *dst_ptr,
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int dst_pixels_per_line,
3491b362b15af34006e6a11974088a46d42b903418eJohann    unsigned int *sse
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber)
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int xsum;
35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    unsigned int xxsum;
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vp8_filter_block2d_bil_var_mmx(
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        src_ptr, src_pixels_per_line,
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dst_ptr, dst_pixels_per_line, 16,
3571b362b15af34006e6a11974088a46d42b903418eJohann        vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        &xsum, &xxsum
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    );
36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    *sse = xxsum;
361ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum - (((unsigned int)xsum * xsum) >> 7));
36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
363538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
364538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
365538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberunsigned int vp8_variance_halfpixvar16x16_h_mmx(
366538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
367538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int  source_stride,
368538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
369538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int  recon_stride,
370538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    unsigned int *sse)
371538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber{
372538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
373538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber                                           ref_ptr, recon_stride, sse);
374538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber}
375538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
376538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
377538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberunsigned int vp8_variance_halfpixvar16x16_v_mmx(
378538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
379538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int  source_stride,
380538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
381538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int  recon_stride,
382538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    unsigned int *sse)
383538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber{
384538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
385538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber                                           ref_ptr, recon_stride, sse);
386538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber}
387538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
388538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
389538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberunsigned int vp8_variance_halfpixvar16x16_hv_mmx(
390538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *src_ptr,
391538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int  source_stride,
392538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    const unsigned char *ref_ptr,
393538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    int  recon_stride,
394538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    unsigned int *sse)
395538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber{
396538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber    return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
397538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber                                           ref_ptr, recon_stride, sse);
398538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber}
399