1/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19*******************************************************************************
20* @file
21*  ihevc_trans_macros.h
22*
23* @brief
24*  Macros used in the forward transform and inverse transform functions
25*
26* @author
27*  Ittiam
28*
29* @remarks
30*  None
31*
32*******************************************************************************
33*/
34#ifndef IHEVC_TRANS_MACROS_H_
35#define IHEVC_TRANS_MACROS_H_
36
37#define QUANT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
38{                                                                                                                                                                \
39    LWORD64 tmp;                                                                                                                                                  \
40    WORD32 sign;                                                                                                                                                 \
41    WORD32 bit_depth,transform_shift;                                                                                                                            \
42    WORD32  q_bits, quant_multiplier;                                                                                                                            \
43                                                                                                                                                                 \
44    /* q_bits and q_add calculation*/                                                                                                                            \
45    /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
46    bit_depth = 8;                                                                                                                                               \
47    transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
48    quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
49    q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ;                                                                       \
50                                                                                                                                                                 \
51    sign = (inp)<0 ? -1:1;                                                                                                                                       \
52                                                                                                                                                                 \
53    tmp = (LWORD64)(abs(inp));                                                                                                                                    \
54    tmp = tmp * (quant_coeff);                                                                                                                                   \
55    tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
56    tmp = tmp >> q_bits;                                                                                                                                         \
57                                                                                                                                                                 \
58    tmp = tmp * sign;                                                                                                                                            \
59    out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
60}                                                                                                                                                                \
61
62#define QUANT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
63{                                                                                                                                                                \
64    LWORD64 tmp;                                                                                                                                                  \
65    WORD32 sign;                                                                                                                                                 \
66    WORD32 transform_shift;                                                                                                                                      \
67    WORD32  q_bits, quant_multiplier;                                                                                                                            \
68                                                                                                                                                                 \
69    /* q_bits and q_add calculation*/                                                                                                                            \
70    /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
71                                                                                                                                                                 \
72    transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
73    quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
74    q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ;                                                                       \
75                                                                                                                                                                 \
76    sign = (inp)<0 ? -1:1;                                                                                                                                       \
77                                                                                                                                                                 \
78    tmp = (LWORD64)(abs(inp));                                                                                                                                    \
79    tmp = tmp * (quant_coeff);                                                                                                                                   \
80    tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
81    tmp = tmp >> q_bits;                                                                                                                                         \
82                                                                                                                                                                 \
83    tmp = tmp * sign;                                                                                                                                            \
84    out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
85}
86/* added by 100028 */
87#define QUANT_NO_WEIGHTMAT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
88{                                                                                                                                                                \
89    WORD32 tmp;                                                                                                                                                  \
90    WORD32 sign;                                                                                                                                                 \
91    WORD32 bit_depth,transform_shift;                                                                                                                            \
92    WORD32  q_bits, quant_multiplier;                                                                                                                            \
93                                                                                                                                                                 \
94    /* q_bits and q_add calculation*/                                                                                                                            \
95    /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
96    bit_depth = 8;                                                                                                                                               \
97    transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
98    quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
99    q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */;                                                                       \
100                                                                                                                                                                 \
101    sign = (inp)<0 ? -1:1;                                                                                                                                       \
102                                                                                                                                                                 \
103    tmp = (WORD32)(abs(inp));                                                                                                                                    \
104    tmp = tmp * (quant_coeff);                                                                                                                                   \
105    tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
106    tmp = tmp >> q_bits;                                                                                                                                         \
107                                                                                                                                                                 \
108    tmp = tmp * sign;                                                                                                                                            \
109    out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
110}
111
112#define QUANT_NO_WEIGHTMAT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
113{                                                                                                                                                                \
114    WORD32 tmp;                                                                                                                                                  \
115    WORD32 sign;                                                                                                                                                 \
116    WORD32 transform_shift;                                                                                                                                      \
117    WORD32  q_bits, quant_multiplier;                                                                                                                            \
118                                                                                                                                                                 \
119    /* q_bits and q_add calculation*/                                                                                                                            \
120    /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
121                                                                                                                                                                 \
122    transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
123    quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
124    q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */;                                                                       \
125                                                                                                                                                                 \
126    sign = (inp)<0 ? -1:1;                                                                                                                                       \
127                                                                                                                                                                 \
128    tmp = (WORD32)(abs(inp));                                                                                                                                    \
129    tmp = tmp * (quant_coeff);                                                                                                                                   \
130    tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
131    tmp = tmp >> q_bits;                                                                                                                                         \
132                                                                                                                                                                 \
133    tmp = tmp * sign;                                                                                                                                            \
134    out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
135}
136/* Reference Inverse Quantization: "pi2_src"(Coefficients) will be clipped to 15 or 14 bits when (qp_div > shift_iq). Spec doesn't have any clip mentioned  */
137
138/* Inverse quantization other than 4x4 */
139/* No clipping is needed for "pi2_src"(coefficients) */
140#define IQUANT(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div)       \
141{                                                                                                                                              \
142    WORD32 tmp, add_iq;                                                                                                                        \
143                                                                                                                                               \
144    add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1));  /* To be moved outside in neon. To be computed once per transform call */                  \
145                                                                                                                                               \
146    tmp = coeff * dequant_coeff ;                                                                                                              \
147    tmp = tmp + add_iq;                                                                                                                        \
148    tmp = SHR_NEG(tmp,(shift_iq - qp_div));                                                                                                    \
149                                                                                                                                               \
150    res = CLIP_S16(tmp);                                                                                                                       \
151}
152
153/* 4x4 inverse quantization */
154/* Options : */
155/* 1. Clip "pi2_src"(coefficients) to 10 bits if "(qp_div >= shift_iq)" or 16 bits if "(qp_div < shift_iq)"*/
156/* 2. Increasing precision of "pi2_src"(coefficients) to 64 bits */
157
158#define IQUANT_4x4(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div)   \
159{                                                                                                                                              \
160    WORD32 clip_coeff, tmp;                                                                                                                    \
161    WORD32 coeff_min,coeff_max;                                                                                                                \
162    WORD32 coeff_bit_range;                                                                                                                    \
163    WORD32 add_iq;                                                                                                                             \
164    add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1));  /* To be moved outside in neon. To be computed once per transform call */                  \
165                                                                                                                                               \
166    coeff_bit_range = 16;                                                                                                                      \
167    if(qp_div > shift_iq)                                                                                                                      \
168        coeff_bit_range = 10;                                                                                                                  \
169                                                                                                                                               \
170    coeff_min = -(1<<(coeff_bit_range-1));                                                                                                     \
171    coeff_max = (1<<(coeff_bit_range-1)) - 1;                                                                                                  \
172                                                                                                                                               \
173    clip_coeff = CLIP3(coeff,coeff_min,coeff_max);                                                                                             \
174                                                                                                                                               \
175    tmp = clip_coeff * dequant_coeff ;                                                                                                         \
176    tmp = tmp + add_iq;                                                                                                                        \
177    tmp = SHR_NEG(tmp,(shift_iq - qp_div));                                                                                                    \
178                                                                                                                                               \
179    res = CLIP_S16(tmp);                                                                                                                       \
180}
181
182#endif /* IHEVC_TRANS_MACROS_H_ */
183