179f15823c34ae1e423108295e416213200bb280fAndreas Huber/*
279f15823c34ae1e423108295e416213200bb280fAndreas Huber *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
379f15823c34ae1e423108295e416213200bb280fAndreas Huber *
479f15823c34ae1e423108295e416213200bb280fAndreas Huber *  Use of this source code is governed by a BSD-style license
579f15823c34ae1e423108295e416213200bb280fAndreas Huber *  that can be found in the LICENSE file in the root of the source
679f15823c34ae1e423108295e416213200bb280fAndreas Huber *  tree. An additional intellectual property rights grant can be found
779f15823c34ae1e423108295e416213200bb280fAndreas Huber *  in the file PATENTS.  All contributing project authors may
879f15823c34ae1e423108295e416213200bb280fAndreas Huber *  be found in the AUTHORS file in the root of the source tree.
979f15823c34ae1e423108295e416213200bb280fAndreas Huber */
1079f15823c34ae1e423108295e416213200bb280fAndreas Huber
111b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx_config.h"
121b362b15af34006e6a11974088a46d42b903418eJohann#include "vp8/common/variance.h"
1379f15823c34ae1e423108295e416213200bb280fAndreas Huber#include "vp8/common/pragmas.h"
1479f15823c34ae1e423108295e416213200bb280fAndreas Huber#include "vpx_ports/mem.h"
1579f15823c34ae1e423108295e416213200bb280fAndreas Huber
1679f15823c34ae1e423108295e416213200bb280fAndreas Huberextern unsigned int vp8_get16x16var_sse2
1779f15823c34ae1e423108295e416213200bb280fAndreas Huber(
1879f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *src_ptr,
1979f15823c34ae1e423108295e416213200bb280fAndreas Huber    int source_stride,
2079f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *ref_ptr,
2179f15823c34ae1e423108295e416213200bb280fAndreas Huber    int recon_stride,
2279f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int *SSE,
2379f15823c34ae1e423108295e416213200bb280fAndreas Huber    int *Sum
2479f15823c34ae1e423108295e416213200bb280fAndreas Huber);
2579f15823c34ae1e423108295e416213200bb280fAndreas Huberextern void vp8_half_horiz_vert_variance16x_h_sse2
2679f15823c34ae1e423108295e416213200bb280fAndreas Huber(
2779f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *ref_ptr,
2879f15823c34ae1e423108295e416213200bb280fAndreas Huber    int ref_pixels_per_line,
2979f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *src_ptr,
3079f15823c34ae1e423108295e416213200bb280fAndreas Huber    int src_pixels_per_line,
3179f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int Height,
3279f15823c34ae1e423108295e416213200bb280fAndreas Huber    int *sum,
3379f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int *sumsquared
3479f15823c34ae1e423108295e416213200bb280fAndreas Huber);
3579f15823c34ae1e423108295e416213200bb280fAndreas Huberextern void vp8_half_horiz_variance16x_h_sse2
3679f15823c34ae1e423108295e416213200bb280fAndreas Huber(
3779f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *ref_ptr,
3879f15823c34ae1e423108295e416213200bb280fAndreas Huber    int ref_pixels_per_line,
3979f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *src_ptr,
4079f15823c34ae1e423108295e416213200bb280fAndreas Huber    int src_pixels_per_line,
4179f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int Height,
4279f15823c34ae1e423108295e416213200bb280fAndreas Huber    int *sum,
4379f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int *sumsquared
4479f15823c34ae1e423108295e416213200bb280fAndreas Huber);
4579f15823c34ae1e423108295e416213200bb280fAndreas Huberextern void vp8_half_vert_variance16x_h_sse2
4679f15823c34ae1e423108295e416213200bb280fAndreas Huber(
4779f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *ref_ptr,
4879f15823c34ae1e423108295e416213200bb280fAndreas Huber    int ref_pixels_per_line,
4979f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *src_ptr,
5079f15823c34ae1e423108295e416213200bb280fAndreas Huber    int src_pixels_per_line,
5179f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int Height,
5279f15823c34ae1e423108295e416213200bb280fAndreas Huber    int *sum,
5379f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int *sumsquared
5479f15823c34ae1e423108295e416213200bb280fAndreas Huber);
5579f15823c34ae1e423108295e416213200bb280fAndreas Huberextern void vp8_filter_block2d_bil_var_ssse3
5679f15823c34ae1e423108295e416213200bb280fAndreas Huber(
5779f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *ref_ptr,
5879f15823c34ae1e423108295e416213200bb280fAndreas Huber    int ref_pixels_per_line,
5979f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *src_ptr,
6079f15823c34ae1e423108295e416213200bb280fAndreas Huber    int src_pixels_per_line,
6179f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int Height,
6279f15823c34ae1e423108295e416213200bb280fAndreas Huber    int  xoffset,
6379f15823c34ae1e423108295e416213200bb280fAndreas Huber    int  yoffset,
6479f15823c34ae1e423108295e416213200bb280fAndreas Huber    int *sum,
6579f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int *sumsquared
6679f15823c34ae1e423108295e416213200bb280fAndreas Huber);
6779f15823c34ae1e423108295e416213200bb280fAndreas Huber
6879f15823c34ae1e423108295e416213200bb280fAndreas Huberunsigned int vp8_sub_pixel_variance16x16_ssse3
6979f15823c34ae1e423108295e416213200bb280fAndreas Huber(
7079f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char  *src_ptr,
7179f15823c34ae1e423108295e416213200bb280fAndreas Huber    int  src_pixels_per_line,
7279f15823c34ae1e423108295e416213200bb280fAndreas Huber    int  xoffset,
7379f15823c34ae1e423108295e416213200bb280fAndreas Huber    int  yoffset,
7479f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *dst_ptr,
7579f15823c34ae1e423108295e416213200bb280fAndreas Huber    int dst_pixels_per_line,
7679f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int *sse
7779f15823c34ae1e423108295e416213200bb280fAndreas Huber)
7879f15823c34ae1e423108295e416213200bb280fAndreas Huber{
7979f15823c34ae1e423108295e416213200bb280fAndreas Huber    int xsum0;
8079f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int xxsum0;
8179f15823c34ae1e423108295e416213200bb280fAndreas Huber
821b362b15af34006e6a11974088a46d42b903418eJohann    /* note we could avoid these if statements if the calling function
831b362b15af34006e6a11974088a46d42b903418eJohann     * just called the appropriate functions inside.
841b362b15af34006e6a11974088a46d42b903418eJohann     */
8579f15823c34ae1e423108295e416213200bb280fAndreas Huber    if (xoffset == 4 && yoffset == 0)
8679f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
8779f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_horiz_variance16x_h_sse2(
8879f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
8979f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 16,
9079f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum0, &xxsum0);
9179f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
9279f15823c34ae1e423108295e416213200bb280fAndreas Huber    else if (xoffset == 0 && yoffset == 4)
9379f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
9479f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_vert_variance16x_h_sse2(
9579f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
9679f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 16,
9779f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum0, &xxsum0);
9879f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
9979f15823c34ae1e423108295e416213200bb280fAndreas Huber    else if (xoffset == 4 && yoffset == 4)
10079f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
10179f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_horiz_vert_variance16x_h_sse2(
10279f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
10379f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 16,
10479f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum0, &xxsum0);
10579f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
10679f15823c34ae1e423108295e416213200bb280fAndreas Huber    else
10779f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
10879f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_filter_block2d_bil_var_ssse3(
10979f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
11079f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 16,
11179f15823c34ae1e423108295e416213200bb280fAndreas Huber            xoffset, yoffset,
11279f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum0, &xxsum0);
11379f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
11479f15823c34ae1e423108295e416213200bb280fAndreas Huber
11579f15823c34ae1e423108295e416213200bb280fAndreas Huber    *sse = xxsum0;
116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
11779f15823c34ae1e423108295e416213200bb280fAndreas Huber}
11879f15823c34ae1e423108295e416213200bb280fAndreas Huber
11979f15823c34ae1e423108295e416213200bb280fAndreas Huberunsigned int vp8_sub_pixel_variance16x8_ssse3
12079f15823c34ae1e423108295e416213200bb280fAndreas Huber(
12179f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char  *src_ptr,
12279f15823c34ae1e423108295e416213200bb280fAndreas Huber    int  src_pixels_per_line,
12379f15823c34ae1e423108295e416213200bb280fAndreas Huber    int  xoffset,
12479f15823c34ae1e423108295e416213200bb280fAndreas Huber    int  yoffset,
12579f15823c34ae1e423108295e416213200bb280fAndreas Huber    const unsigned char *dst_ptr,
12679f15823c34ae1e423108295e416213200bb280fAndreas Huber    int dst_pixels_per_line,
12779f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int *sse
12879f15823c34ae1e423108295e416213200bb280fAndreas Huber
12979f15823c34ae1e423108295e416213200bb280fAndreas Huber)
13079f15823c34ae1e423108295e416213200bb280fAndreas Huber{
13179f15823c34ae1e423108295e416213200bb280fAndreas Huber    int xsum0;
13279f15823c34ae1e423108295e416213200bb280fAndreas Huber    unsigned int xxsum0;
13379f15823c34ae1e423108295e416213200bb280fAndreas Huber
13479f15823c34ae1e423108295e416213200bb280fAndreas Huber    if (xoffset == 4 && yoffset == 0)
13579f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
13679f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_horiz_variance16x_h_sse2(
13779f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
13879f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 8,
13979f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum0, &xxsum0);
14079f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
14179f15823c34ae1e423108295e416213200bb280fAndreas Huber    else if (xoffset == 0 && yoffset == 4)
14279f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
14379f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_vert_variance16x_h_sse2(
14479f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
14579f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 8,
14679f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum0, &xxsum0);
14779f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
14879f15823c34ae1e423108295e416213200bb280fAndreas Huber    else if (xoffset == 4 && yoffset == 4)
14979f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
15079f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_half_horiz_vert_variance16x_h_sse2(
15179f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
15279f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 8,
15379f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum0, &xxsum0);
15479f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
15579f15823c34ae1e423108295e416213200bb280fAndreas Huber    else
15679f15823c34ae1e423108295e416213200bb280fAndreas Huber    {
15779f15823c34ae1e423108295e416213200bb280fAndreas Huber        vp8_filter_block2d_bil_var_ssse3(
15879f15823c34ae1e423108295e416213200bb280fAndreas Huber            src_ptr, src_pixels_per_line,
15979f15823c34ae1e423108295e416213200bb280fAndreas Huber            dst_ptr, dst_pixels_per_line, 8,
16079f15823c34ae1e423108295e416213200bb280fAndreas Huber            xoffset, yoffset,
16179f15823c34ae1e423108295e416213200bb280fAndreas Huber            &xsum0, &xxsum0);
16279f15823c34ae1e423108295e416213200bb280fAndreas Huber    }
16379f15823c34ae1e423108295e416213200bb280fAndreas Huber
16479f15823c34ae1e423108295e416213200bb280fAndreas Huber    *sse = xxsum0;
165ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
16679f15823c34ae1e423108295e416213200bb280fAndreas Huber}
167