1ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/*
2ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *
4ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  Use of this source code is governed by a BSD-style license
5ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  that can be found in the LICENSE file in the root of the source
6ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  tree. An additional intellectual property rights grant can be found
7ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  in the file PATENTS.  All contributing project authors may
8ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  be found in the AUTHORS file in the root of the source tree.
9ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */
10ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vpx_config.h"
12ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/encoder/vp9_variance.h"
13ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_pragmas.h"
14ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vpx_ports/mem.h"
15ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
16ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangextern unsigned int vp9_get8x8var_mmx
17ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang(
18ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *src_ptr,
19ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  source_stride,
20ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *ref_ptr,
21ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  recon_stride,
22ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int *SSE,
23ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *Sum
24ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang);
25ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangextern unsigned int vp9_get4x4var_mmx
26ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang(
27ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *src_ptr,
28ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  source_stride,
29ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *ref_ptr,
30ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  recon_stride,
31ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int *SSE,
32ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *Sum
33ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang);
34ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
35ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangunsigned int vp9_variance4x4_mmx(
36ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *src_ptr,
37ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  source_stride,
38ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *ref_ptr,
39ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  recon_stride,
40ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int *sse) {
41ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int var;
42ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int avg;
43ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
44ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
45ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *sse = var;
46ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return (var - (((unsigned int)avg * avg) >> 4));
47ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
48ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
49ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangunsigned int vp9_variance8x8_mmx(
50ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *src_ptr,
51ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  source_stride,
52ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *ref_ptr,
53ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  recon_stride,
54ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int *sse) {
55ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int var;
56ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int avg;
57ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
58ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
59ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *sse = var;
60ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
61ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return (var - (((unsigned int)avg * avg) >> 6));
62ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
63ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
64ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangunsigned int vp9_mse16x16_mmx(
65ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *src_ptr,
66ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  source_stride,
67ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *ref_ptr,
68ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  recon_stride,
69ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int *sse) {
70ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sse0, sse1, sse2, sse3, var;
71ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int sum0, sum1, sum2, sum3;
72ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
73ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    &sum0);
765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride,
775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    &sse1, &sum1);
785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride,
795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2);
805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride,
815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
82ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
83ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  var = sse0 + sse1 + sse2 + sse3;
84ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *sse = var;
85ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return var;
86ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
87ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
88ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
89ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangunsigned int vp9_variance16x16_mmx(
90ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *src_ptr,
91ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  source_stride,
92ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *ref_ptr,
93ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  recon_stride,
94ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int *sse) {
95ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sse0, sse1, sse2, sse3, var;
96ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int sum0, sum1, sum2, sum3, avg;
97ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    &sum0);
1005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride,
1015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    &sse1, &sum1);
1025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride,
1035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2);
1045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride,
1055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  var = sse0 + sse1 + sse2 + sse3;
108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  avg = sum0 + sum1 + sum2 + sum3;
109ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *sse = var;
110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return (var - (((unsigned int)avg * avg) >> 8));
111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
113ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangunsigned int vp9_variance16x8_mmx(
114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *src_ptr,
115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  source_stride,
116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *ref_ptr,
117ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  recon_stride,
118ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int *sse) {
119ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sse0, sse1, var;
120ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int sum0, sum1, avg;
121ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
1235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    &sum0);
1245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride,
1255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    &sse1, &sum1);
126ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
127ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  var = sse0 + sse1;
128ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  avg = sum0 + sum1;
129ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *sse = var;
130ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return (var - (((unsigned int)avg * avg) >> 7));
131ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
133ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
134ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangunsigned int vp9_variance8x16_mmx(
135ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *src_ptr,
136ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  source_stride,
137ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const unsigned char *ref_ptr,
138ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int  recon_stride,
139ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int *sse) {
140ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sse0, sse1, var;
141ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int sum0, sum1, avg;
142ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
1445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    &sum0);
1455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride,
1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1);
147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  var = sse0 + sse1;
149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  avg = sum0 + sum1;
150ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *sse = var;
151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
152ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return (var - (((unsigned int)avg * avg) >> 7));
153ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
154