1/*
2 *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "./vp9_rtcd.h"
12#include "vpx_dsp/mips/macros_msa.h"
13
14#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize)                                   \
15static int64_t block_error_##BSize##size_msa(const int16_t *coeff_ptr,     \
16                                             const int16_t *dq_coeff_ptr,  \
17                                             int64_t *ssz) {               \
18  int64_t err = 0;                                                         \
19  uint32_t loop_cnt;                                                       \
20  v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h;                             \
21  v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w;                              \
22  v2i64 sq_coeff_r, sq_coeff_l;                                            \
23  v2i64 err0, err_dup0, err1, err_dup1;                                    \
24                                                                           \
25  coeff = LD_SH(coeff_ptr);                                                \
26  dq_coeff = LD_SH(dq_coeff_ptr);                                          \
27  UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                                \
28  ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                      \
29  HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                       \
30  DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w,                  \
31              sq_coeff_r, sq_coeff_l);                                     \
32  DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1);                 \
33                                                                           \
34  coeff = LD_SH(coeff_ptr + 8);                                            \
35  dq_coeff = LD_SH(dq_coeff_ptr + 8);                                      \
36  UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                                \
37  ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                      \
38  HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                       \
39  DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);              \
40  DPADD_SD2_SD(diff_r, diff_l, err0, err1);                                \
41                                                                           \
42  coeff_ptr += 16;                                                         \
43  dq_coeff_ptr += 16;                                                      \
44                                                                           \
45  for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) {                       \
46    coeff = LD_SH(coeff_ptr);                                              \
47    dq_coeff = LD_SH(dq_coeff_ptr);                                        \
48    UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                              \
49    ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                    \
50    HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                     \
51    DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);            \
52    DPADD_SD2_SD(diff_r, diff_l, err0, err1);                              \
53                                                                           \
54    coeff = LD_SH(coeff_ptr + 8);                                          \
55    dq_coeff = LD_SH(dq_coeff_ptr + 8);                                    \
56    UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                              \
57    ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                    \
58    HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                     \
59    DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);            \
60    DPADD_SD2_SD(diff_r, diff_l, err0, err1);                              \
61                                                                           \
62    coeff_ptr += 16;                                                       \
63    dq_coeff_ptr += 16;                                                    \
64  }                                                                        \
65                                                                           \
66  err_dup0 = __msa_splati_d(sq_coeff_r, 1);                                \
67  err_dup1 = __msa_splati_d(sq_coeff_l, 1);                                \
68  sq_coeff_r += err_dup0;                                                  \
69  sq_coeff_l += err_dup1;                                                  \
70  *ssz = __msa_copy_s_d(sq_coeff_r, 0);                                    \
71  *ssz += __msa_copy_s_d(sq_coeff_l, 0);                                   \
72                                                                           \
73  err_dup0 = __msa_splati_d(err0, 1);                                      \
74  err_dup1 = __msa_splati_d(err1, 1);                                      \
75  err0 += err_dup0;                                                        \
76  err1 += err_dup1;                                                        \
77  err = __msa_copy_s_d(err0, 0);                                           \
78  err += __msa_copy_s_d(err1, 0);                                          \
79                                                                           \
80  return err;                                                              \
81}
82
83BLOCK_ERROR_BLOCKSIZE_MSA(16);
84BLOCK_ERROR_BLOCKSIZE_MSA(64);
85BLOCK_ERROR_BLOCKSIZE_MSA(256);
86BLOCK_ERROR_BLOCKSIZE_MSA(1024);
87
88int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr,
89                            const tran_low_t *dq_coeff_ptr,
90                            intptr_t blk_size, int64_t *ssz) {
91  int64_t err;
92  const int16_t *coeff = (const int16_t *)coeff_ptr;
93  const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
94
95  switch (blk_size) {
96    case 16:
97      err = block_error_16size_msa(coeff, dq_coeff, ssz);
98      break;
99    case 64:
100      err = block_error_64size_msa(coeff, dq_coeff, ssz);
101      break;
102    case 256:
103      err = block_error_256size_msa(coeff, dq_coeff, ssz);
104      break;
105    case 1024:
106      err = block_error_1024size_msa(coeff, dq_coeff, ssz);
107      break;
108    default:
109      err = vp9_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
110      break;
111  }
112
113  return err;
114}
115