1/*
2 *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <assert.h>
12#include <stdio.h>
13
14#include "./vpx_config.h"
15#include "./vp9_rtcd.h"
16#include "vp9/common/vp9_common.h"
17#include "vp9/common/vp9_blockd.h"
18#include "vp9/common/vp9_idct.h"
19#include "vpx_dsp/mips/inv_txfm_dspr2.h"
20#include "vpx_dsp/txfm_common.h"
21#include "vpx_ports/mem.h"
22
23#if HAVE_DSPR2
24void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, int pitch,
25                                int tx_type) {
26  int i, j;
27  DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
28  int16_t *outptr = out;
29  int16_t temp_out[16];
30  uint32_t pos = 45;
31
32  /* bit positon for extract from acc */
33  __asm__ __volatile__("wrdsp    %[pos],    1    \n\t" : : [pos] "r"(pos));
34
35  switch (tx_type) {
36    case DCT_DCT:  // DCT in both horizontal and vertical
37      idct16_rows_dspr2(input, outptr, 16);
38      idct16_cols_add_blk_dspr2(out, dest, pitch);
39      break;
40    case ADST_DCT:  // ADST in vertical, DCT in horizontal
41      idct16_rows_dspr2(input, outptr, 16);
42
43      outptr = out;
44
45      for (i = 0; i < 16; ++i) {
46        iadst16_dspr2(outptr, temp_out);
47
48        for (j = 0; j < 16; ++j)
49          dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) +
50                                           dest[j * pitch + i]);
51        outptr += 16;
52      }
53      break;
54    case DCT_ADST:  // DCT in vertical, ADST in horizontal
55    {
56      int16_t temp_in[16 * 16];
57
58      for (i = 0; i < 16; ++i) {
59        /* prefetch row */
60        prefetch_load((const uint8_t *)(input + 16));
61
62        iadst16_dspr2(input, outptr);
63        input += 16;
64        outptr += 16;
65      }
66
67      for (i = 0; i < 16; ++i)
68        for (j = 0; j < 16; ++j) temp_in[j * 16 + i] = out[i * 16 + j];
69
70      idct16_cols_add_blk_dspr2(temp_in, dest, pitch);
71      break;
72    }
73    case ADST_ADST:  // ADST in both directions
74    {
75      int16_t temp_in[16];
76
77      for (i = 0; i < 16; ++i) {
78        /* prefetch row */
79        prefetch_load((const uint8_t *)(input + 16));
80
81        iadst16_dspr2(input, outptr);
82        input += 16;
83        outptr += 16;
84      }
85
86      for (i = 0; i < 16; ++i) {
87        for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
88        iadst16_dspr2(temp_in, temp_out);
89        for (j = 0; j < 16; ++j)
90          dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) +
91                                           dest[j * pitch + i]);
92      }
93      break;
94    }
95    default: printf("vp9_short_iht16x16_add_dspr2 : Invalid tx_type\n"); break;
96  }
97}
98#endif  // #if HAVE_DSPR2
99