1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef VP9_COMMON_VP9_IDCT_H_
12#define VP9_COMMON_VP9_IDCT_H_
13
14#include <assert.h>
15
16#include "./vpx_config.h"
17#include "vpx/vpx_integer.h"
18#include "vp9/common/vp9_common.h"
19#include "vp9/common/vp9_enums.h"
20
21#ifdef __cplusplus
22extern "C" {
23#endif
24
25
26// Constants and Macros used by all idct/dct functions
27#define DCT_CONST_BITS 14
28#define DCT_CONST_ROUNDING  (1 << (DCT_CONST_BITS - 1))
29
30#define UNIT_QUANT_SHIFT 2
31#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT)
32
33#define pair_set_epi16(a, b) \
34  _mm_set_epi16(b, a, b, a, b, a, b, a)
35
36#define dual_set_epi16(a, b) \
37  _mm_set_epi16(b, b, b, b, a, a, a, a)
38
39// Note:
40// tran_low_t  is the datatype used for final transform coefficients.
41// tran_high_t is the datatype used for intermediate transform stages.
42#if CONFIG_VP9_HIGHBITDEPTH
43typedef int64_t tran_high_t;
44typedef int32_t tran_low_t;
45#else
46typedef int32_t tran_high_t;
47typedef int16_t tran_low_t;
48#endif
49
50// Constants:
51//  for (int i = 1; i< 32; ++i)
52//    printf("static const int cospi_%d_64 = %.0f;\n", i,
53//           round(16384 * cos(i*M_PI/64)));
54// Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
55static const tran_high_t cospi_1_64  = 16364;
56static const tran_high_t cospi_2_64  = 16305;
57static const tran_high_t cospi_3_64  = 16207;
58static const tran_high_t cospi_4_64  = 16069;
59static const tran_high_t cospi_5_64  = 15893;
60static const tran_high_t cospi_6_64  = 15679;
61static const tran_high_t cospi_7_64  = 15426;
62static const tran_high_t cospi_8_64  = 15137;
63static const tran_high_t cospi_9_64  = 14811;
64static const tran_high_t cospi_10_64 = 14449;
65static const tran_high_t cospi_11_64 = 14053;
66static const tran_high_t cospi_12_64 = 13623;
67static const tran_high_t cospi_13_64 = 13160;
68static const tran_high_t cospi_14_64 = 12665;
69static const tran_high_t cospi_15_64 = 12140;
70static const tran_high_t cospi_16_64 = 11585;
71static const tran_high_t cospi_17_64 = 11003;
72static const tran_high_t cospi_18_64 = 10394;
73static const tran_high_t cospi_19_64 = 9760;
74static const tran_high_t cospi_20_64 = 9102;
75static const tran_high_t cospi_21_64 = 8423;
76static const tran_high_t cospi_22_64 = 7723;
77static const tran_high_t cospi_23_64 = 7005;
78static const tran_high_t cospi_24_64 = 6270;
79static const tran_high_t cospi_25_64 = 5520;
80static const tran_high_t cospi_26_64 = 4756;
81static const tran_high_t cospi_27_64 = 3981;
82static const tran_high_t cospi_28_64 = 3196;
83static const tran_high_t cospi_29_64 = 2404;
84static const tran_high_t cospi_30_64 = 1606;
85static const tran_high_t cospi_31_64 = 804;
86
87//  16384 * sqrt(2) * sin(kPi/9) * 2 / 3
88static const tran_high_t sinpi_1_9 = 5283;
89static const tran_high_t sinpi_2_9 = 9929;
90static const tran_high_t sinpi_3_9 = 13377;
91static const tran_high_t sinpi_4_9 = 15212;
92
93static INLINE tran_low_t dct_const_round_shift(tran_high_t input) {
94  tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
95#if CONFIG_VP9_HIGHBITDEPTH
96  // For valid highbitdepth VP9 streams, intermediate stage coefficients will
97  // stay within the ranges:
98  // - 8 bit: signed 16 bit integer
99  // - 10 bit: signed 18 bit integer
100  // - 12 bit: signed 20 bit integer
101#elif CONFIG_COEFFICIENT_RANGE_CHECKING
102  // For valid VP9 input streams, intermediate stage coefficients should always
103  // stay within the range of a signed 16 bit integer. Coefficients can go out
104  // of this range for invalid/corrupt VP9 streams. However, strictly checking
105  // this range for every intermediate coefficient can burdensome for a decoder,
106  // therefore the following assertion is only enabled when configured with
107  // --enable-coefficient-range-checking.
108  assert(INT16_MIN <= rv);
109  assert(rv <= INT16_MAX);
110#endif
111  return (tran_low_t)rv;
112}
113
114typedef void (*transform_1d)(const tran_low_t*, tran_low_t*);
115
116typedef struct {
117  transform_1d cols, rows;  // vertical and horizontal
118} transform_2d;
119
120#if CONFIG_VP9_HIGHBITDEPTH
121typedef void (*high_transform_1d)(const tran_low_t*, tran_low_t*, int bd);
122
123typedef struct {
124  high_transform_1d cols, rows;  // vertical and horizontal
125} high_transform_2d;
126#endif  // CONFIG_VP9_HIGHBITDEPTH
127
128void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
129                     int eob);
130void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
131                     int eob);
132void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
133                     int eob);
134void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int
135                       eob);
136void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
137                       int eob);
138
139void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
140                    int stride, int eob);
141void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
142                    int stride, int eob);
143void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
144                      int stride, int eob);
145
146#if CONFIG_VP9_HIGHBITDEPTH
147void vp9_high_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
148                          int eob, int bd);
149void vp9_high_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
150                          int eob, int bd);
151void vp9_high_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
152                          int eob, int bd);
153void vp9_high_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
154                            int eob, int bd);
155void vp9_high_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
156                            int eob, int bd);
157void vp9_high_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
158                         uint8_t *dest, int stride, int eob, int bd);
159void vp9_high_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
160                         uint8_t *dest, int stride, int eob, int bd);
161void vp9_high_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
162                           uint8_t *dest, int stride, int eob, int bd);
163#endif  // CONFIG_VP9_HIGHBITDEPTH
164#ifdef __cplusplus
165}  // extern "C"
166#endif
167
168#endif  // VP9_COMMON_VP9_IDCT_H_
169