1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/*
2233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
4233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Use of this source code is governed by a BSD-style license
5233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  that can be found in the LICENSE file in the root of the source
6233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  tree. An additional intellectual property rights grant can be found
7233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  in the file PATENTS.  All contributing project authors may
8233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  be found in the AUTHORS file in the root of the source tree.
9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */
10233d2500723e5594f3e7c70896ffeeef32b9c950ywan
11233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vpx_config.h"
12233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/encoder/vp9_variance.h"
13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_pragmas.h"
14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_ports/mem.h"
15233d2500723e5594f3e7c70896ffeeef32b9c950ywan
16233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern unsigned int vp9_get8x8var_mmx
17233d2500723e5594f3e7c70896ffeeef32b9c950ywan(
18233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *src_ptr,
19233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  source_stride,
20233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *ref_ptr,
21233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  recon_stride,
22233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int *SSE,
23233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int *Sum
24233d2500723e5594f3e7c70896ffeeef32b9c950ywan);
25233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern unsigned int vp9_get4x4var_mmx
26233d2500723e5594f3e7c70896ffeeef32b9c950ywan(
27233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *src_ptr,
28233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  source_stride,
29233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *ref_ptr,
30233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  recon_stride,
31233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int *SSE,
32233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int *Sum
33233d2500723e5594f3e7c70896ffeeef32b9c950ywan);
34233d2500723e5594f3e7c70896ffeeef32b9c950ywan
35233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance4x4_mmx(
36233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *src_ptr,
37233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  source_stride,
38233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *ref_ptr,
39233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  recon_stride,
40233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int *sse) {
41233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
42233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
43233d2500723e5594f3e7c70896ffeeef32b9c950ywan
44233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
45233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
46233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((unsigned int)avg * avg) >> 4));
47233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
48233d2500723e5594f3e7c70896ffeeef32b9c950ywan
49233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance8x8_mmx(
50233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *src_ptr,
51233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  source_stride,
52233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *ref_ptr,
53233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  recon_stride,
54233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int *sse) {
55233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
56233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
57233d2500723e5594f3e7c70896ffeeef32b9c950ywan
58233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
59233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
60233d2500723e5594f3e7c70896ffeeef32b9c950ywan
61233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((unsigned int)avg * avg) >> 6));
62233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
63233d2500723e5594f3e7c70896ffeeef32b9c950ywan
64233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_mse16x16_mmx(
65233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *src_ptr,
66233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  source_stride,
67233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *ref_ptr,
68233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  recon_stride,
69233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int *sse) {
70233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int sse0, sse1, sse2, sse3, var;
71233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int sum0, sum1, sum2, sum3;
72233d2500723e5594f3e7c70896ffeeef32b9c950ywan
73233d2500723e5594f3e7c70896ffeeef32b9c950ywan
74233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
75233d2500723e5594f3e7c70896ffeeef32b9c950ywan                    &sum0);
76233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride,
77233d2500723e5594f3e7c70896ffeeef32b9c950ywan                    &sse1, &sum1);
78233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride,
79233d2500723e5594f3e7c70896ffeeef32b9c950ywan                    ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2);
80233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride,
81233d2500723e5594f3e7c70896ffeeef32b9c950ywan                    ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
82233d2500723e5594f3e7c70896ffeeef32b9c950ywan
83233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var = sse0 + sse1 + sse2 + sse3;
84233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
85233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return var;
86233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
87233d2500723e5594f3e7c70896ffeeef32b9c950ywan
88233d2500723e5594f3e7c70896ffeeef32b9c950ywan
89233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x16_mmx(
90233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *src_ptr,
91233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  source_stride,
92233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *ref_ptr,
93233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  recon_stride,
94233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int *sse) {
95233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int sse0, sse1, sse2, sse3, var;
96233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int sum0, sum1, sum2, sum3, avg;
97233d2500723e5594f3e7c70896ffeeef32b9c950ywan
98233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
99233d2500723e5594f3e7c70896ffeeef32b9c950ywan                    &sum0);
100233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride,
101233d2500723e5594f3e7c70896ffeeef32b9c950ywan                    &sse1, &sum1);
102233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride,
103233d2500723e5594f3e7c70896ffeeef32b9c950ywan                    ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2);
104233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride,
105233d2500723e5594f3e7c70896ffeeef32b9c950ywan                    ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
106233d2500723e5594f3e7c70896ffeeef32b9c950ywan
107233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var = sse0 + sse1 + sse2 + sse3;
108233d2500723e5594f3e7c70896ffeeef32b9c950ywan  avg = sum0 + sum1 + sum2 + sum3;
109233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
110233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((unsigned int)avg * avg) >> 8));
111233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
112233d2500723e5594f3e7c70896ffeeef32b9c950ywan
113233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x8_mmx(
114233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *src_ptr,
115233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  source_stride,
116233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *ref_ptr,
117233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  recon_stride,
118233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int *sse) {
119233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int sse0, sse1, var;
120233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int sum0, sum1, avg;
121233d2500723e5594f3e7c70896ffeeef32b9c950ywan
122233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
123233d2500723e5594f3e7c70896ffeeef32b9c950ywan                    &sum0);
124233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride,
125233d2500723e5594f3e7c70896ffeeef32b9c950ywan                    &sse1, &sum1);
126233d2500723e5594f3e7c70896ffeeef32b9c950ywan
127233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var = sse0 + sse1;
128233d2500723e5594f3e7c70896ffeeef32b9c950ywan  avg = sum0 + sum1;
129233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
130233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((unsigned int)avg * avg) >> 7));
131233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
132233d2500723e5594f3e7c70896ffeeef32b9c950ywan
133233d2500723e5594f3e7c70896ffeeef32b9c950ywan
134233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance8x16_mmx(
135233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *src_ptr,
136233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  source_stride,
137233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *ref_ptr,
138233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int  recon_stride,
139233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int *sse) {
140233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int sse0, sse1, var;
141233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int sum0, sum1, avg;
142233d2500723e5594f3e7c70896ffeeef32b9c950ywan
143233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
144233d2500723e5594f3e7c70896ffeeef32b9c950ywan                    &sum0);
145233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride,
146233d2500723e5594f3e7c70896ffeeef32b9c950ywan                    ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1);
147233d2500723e5594f3e7c70896ffeeef32b9c950ywan
148233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var = sse0 + sse1;
149233d2500723e5594f3e7c70896ffeeef32b9c950ywan  avg = sum0 + sum1;
150233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
151233d2500723e5594f3e7c70896ffeeef32b9c950ywan
152233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((unsigned int)avg * avg) >> 7));
153233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
154