1/*
2 *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <assert.h>
12
13#include "vp9/common/vp9_enums.h"
14#include "vpx_dsp/mips/inv_txfm_msa.h"
15
16void vp9_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst,
17                           int32_t dst_stride, int32_t tx_type) {
18  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
19
20  /* load vector elements of 8x8 block */
21  LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
22
23  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
24                     in4, in5, in6, in7);
25
26  switch (tx_type) {
27    case DCT_DCT:
28      /* DCT in horizontal */
29      VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
30                     in4, in5, in6, in7);
31      /* DCT in vertical */
32      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
33                         in3, in4, in5, in6, in7);
34      VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
35                     in4, in5, in6, in7);
36      break;
37    case ADST_DCT:
38      /* DCT in horizontal */
39      VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
40                     in4, in5, in6, in7);
41      /* ADST in vertical */
42      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
43                         in3, in4, in5, in6, in7);
44      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
45                in5, in6, in7);
46      break;
47    case DCT_ADST:
48      /* ADST in horizontal */
49      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
50                in5, in6, in7);
51      /* DCT in vertical */
52      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
53                         in3, in4, in5, in6, in7);
54      VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
55                     in4, in5, in6, in7);
56      break;
57    case ADST_ADST:
58      /* ADST in horizontal */
59      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
60                in5, in6, in7);
61      /* ADST in vertical */
62      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
63                         in3, in4, in5, in6, in7);
64      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
65                in5, in6, in7);
66      break;
67    default: assert(0); break;
68  }
69
70  /* final rounding (add 2^4, divide by 2^5) and shift */
71  SRARI_H4_SH(in0, in1, in2, in3, 5);
72  SRARI_H4_SH(in4, in5, in6, in7, 5);
73
74  /* add block and store 8x8 */
75  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
76  dst += (4 * dst_stride);
77  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
78}
79