1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "./vpx_config.h"
12#include "vp9/encoder/vp9_variance.h"
13#include "vpx_ports/mem.h"
14
15unsigned int vp9_get8x8var_mmx(const uint8_t *src, int src_stride,
16                               const uint8_t *ref, int ref_stride,
17                               unsigned int *sse, int *sum);
18
19unsigned int vp9_get4x4var_mmx(const uint8_t *src, int src_stride,
20                               const uint8_t *ref, int ref_stride,
21                               unsigned int *SSE, int *sum);
22
23unsigned int vp9_variance4x4_mmx(const uint8_t *src, int src_stride,
24                                 const uint8_t *ref, int ref_stride,
25                                 unsigned int *sse) {
26  int sum;
27  vp9_get4x4var_mmx(src, src_stride, ref, ref_stride, sse, &sum);
28  return *sse - (((unsigned int)sum * sum) >> 4);
29}
30
31unsigned int vp9_variance8x8_mmx(const uint8_t *src, int src_stride,
32                                 const uint8_t *ref, int ref_stride,
33                                 unsigned int *sse) {
34  int sum;
35  vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, sse, &sum);
36  return *sse - (((unsigned int)sum * sum) >> 6);
37}
38
39unsigned int vp9_mse16x16_mmx(const uint8_t *src, int src_stride,
40                              const uint8_t *ref, int ref_stride,
41                              unsigned int *sse) {
42  unsigned int sse0, sse1, sse2, sse3;
43  int sum0, sum1, sum2, sum3;
44
45  vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0);
46  vp9_get8x8var_mmx(src + 8, src_stride, ref + 8, ref_stride, &sse1, &sum1);
47  vp9_get8x8var_mmx(src + 8 * src_stride, src_stride,
48                    ref + 8 * ref_stride, ref_stride, &sse2, &sum2);
49  vp9_get8x8var_mmx(src + 8 * src_stride + 8, src_stride,
50                    ref + 8 * ref_stride + 8, ref_stride, &sse3, &sum3);
51
52  *sse = sse0 + sse1 + sse2 + sse3;
53  return *sse;
54}
55
56
57unsigned int vp9_variance16x16_mmx(const uint8_t *src, int src_stride,
58                                   const uint8_t *ref, int ref_stride,
59                                   unsigned int *sse) {
60  unsigned int sse0, sse1, sse2, sse3;
61  int sum0, sum1, sum2, sum3, sum;
62
63  vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0);
64  vp9_get8x8var_mmx(src + 8, src_stride, ref + 8, ref_stride, &sse1, &sum1);
65  vp9_get8x8var_mmx(src + 8 * src_stride, src_stride,
66                    ref + 8 * ref_stride, ref_stride, &sse2, &sum2);
67  vp9_get8x8var_mmx(src + 8 * src_stride + 8, src_stride,
68                    ref + 8 * ref_stride + 8, ref_stride, &sse3, &sum3);
69
70  *sse = sse0 + sse1 + sse2 + sse3;
71  sum = sum0 + sum1 + sum2 + sum3;
72  return *sse - (((unsigned int)sum * sum) >> 8);
73}
74
75unsigned int vp9_variance16x8_mmx(const uint8_t *src, int src_stride,
76                                  const uint8_t *ref, int ref_stride,
77                                  unsigned int *sse) {
78  unsigned int sse0, sse1;
79  int sum0, sum1, sum;
80
81  vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0);
82  vp9_get8x8var_mmx(src + 8, src_stride, ref + 8, ref_stride, &sse1, &sum1);
83
84  *sse = sse0 + sse1;
85  sum = sum0 + sum1;
86  return *sse - (((unsigned int)sum * sum) >> 7);
87}
88
89
90unsigned int vp9_variance8x16_mmx(const uint8_t *src, int src_stride,
91                                  const uint8_t *ref, int ref_stride,
92                                  unsigned int *sse) {
93  unsigned int sse0, sse1;
94  int sum0, sum1, sum;
95
96  vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0);
97  vp9_get8x8var_mmx(src + 8 * src_stride, src_stride,
98                    ref + 8 * ref_stride, ref_stride, &sse1, &sum1);
99
100  *sse = sse0 + sse1;
101  sum = sum0 + sum1;
102  return *sse - (((unsigned int)sum * sum) >> 7);
103}
104