1/****************************************************************************** 2* 3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4* 5* Licensed under the Apache License, Version 2.0 (the "License"); 6* you may not use this file except in compliance with the License. 7* You may obtain a copy of the License at: 8* 9* http://www.apache.org/licenses/LICENSE-2.0 10* 11* Unless required by applicable law or agreed to in writing, software 12* distributed under the License is distributed on an "AS IS" BASIS, 13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14* See the License for the specific language governing permissions and 15* limitations under the License. 16* 17******************************************************************************/ 18/** 19******************************************************************************* 20* @file 21* ihevc_trans_macros.h 22* 23* @brief 24* Macros used in the forward transform and inverse transform functions 25* 26* @author 27* Ittiam 28* 29* @remarks 30* None 31* 32******************************************************************************* 33*/ 34#ifndef IHEVC_TRANS_MACROS_H_ 35#define IHEVC_TRANS_MACROS_H_ 36 37#define QUANT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \ 38{ \ 39 LWORD64 tmp; \ 40 WORD32 sign; \ 41 WORD32 bit_depth,transform_shift; \ 42 WORD32 q_bits, quant_multiplier; \ 43 \ 44 /* q_bits and q_add calculation*/ \ 45 /* To be moved outside in neon. To be computer once per transform call */ \ 46 bit_depth = 8; \ 47 transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \ 48 quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \ 49 q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ; \ 50 \ 51 sign = (inp)<0 ? -1:1; \ 52 \ 53 tmp = (LWORD64)(abs(inp)); \ 54 tmp = tmp * (quant_coeff); \ 55 tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \ 56 tmp = tmp >> q_bits; \ 57 \ 58 tmp = tmp * sign; \ 59 out = (WORD16) CLIP_S16(tmp); \ 60} \ 61 62#define QUANT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \ 63{ \ 64 LWORD64 tmp; \ 65 WORD32 sign; \ 66 WORD32 transform_shift; \ 67 WORD32 q_bits, quant_multiplier; \ 68 \ 69 /* q_bits and q_add calculation*/ \ 70 /* To be moved outside in neon. To be computer once per transform call */ \ 71 \ 72 transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \ 73 quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \ 74 q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ; \ 75 \ 76 sign = (inp)<0 ? -1:1; \ 77 \ 78 tmp = (LWORD64)(abs(inp)); \ 79 tmp = tmp * (quant_coeff); \ 80 tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \ 81 tmp = tmp >> q_bits; \ 82 \ 83 tmp = tmp * sign; \ 84 out = (WORD16) CLIP_S16(tmp); \ 85} 86/* added by 100028 */ 87#define QUANT_NO_WEIGHTMAT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \ 88{ \ 89 WORD32 tmp; \ 90 WORD32 sign; \ 91 WORD32 bit_depth,transform_shift; \ 92 WORD32 q_bits, quant_multiplier; \ 93 \ 94 /* q_bits and q_add calculation*/ \ 95 /* To be moved outside in neon. To be computer once per transform call */ \ 96 bit_depth = 8; \ 97 transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \ 98 quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \ 99 q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \ 100 \ 101 sign = (inp)<0 ? -1:1; \ 102 \ 103 tmp = (WORD32)(abs(inp)); \ 104 tmp = tmp * (quant_coeff); \ 105 tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \ 106 tmp = tmp >> q_bits; \ 107 \ 108 tmp = tmp * sign; \ 109 out = (WORD16) CLIP_S16(tmp); \ 110} 111 112#define QUANT_NO_WEIGHTMAT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \ 113{ \ 114 WORD32 tmp; \ 115 WORD32 sign; \ 116 WORD32 transform_shift; \ 117 WORD32 q_bits, quant_multiplier; \ 118 \ 119 /* q_bits and q_add calculation*/ \ 120 /* To be moved outside in neon. To be computer once per transform call */ \ 121 \ 122 transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \ 123 quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \ 124 q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \ 125 \ 126 sign = (inp)<0 ? -1:1; \ 127 \ 128 tmp = (WORD32)(abs(inp)); \ 129 tmp = tmp * (quant_coeff); \ 130 tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \ 131 tmp = tmp >> q_bits; \ 132 \ 133 tmp = tmp * sign; \ 134 out = (WORD16) CLIP_S16(tmp); \ 135} 136/* Reference Inverse Quantization: "pi2_src"(Coefficients) will be clipped to 15 or 14 bits when (qp_div > shift_iq). Spec doesn't have any clip mentioned */ 137 138/* Inverse quantization other than 4x4 */ 139/* No clipping is needed for "pi2_src"(coefficients) */ 140#define IQUANT(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div) \ 141{ \ 142 WORD32 tmp, add_iq; \ 143 \ 144 add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1)); /* To be moved outside in neon. To be computed once per transform call */ \ 145 \ 146 tmp = coeff * dequant_coeff ; \ 147 tmp = tmp + add_iq; \ 148 tmp = SHR_NEG(tmp,(shift_iq - qp_div)); \ 149 \ 150 res = CLIP_S16(tmp); \ 151} 152 153/* 4x4 inverse quantization */ 154/* Options : */ 155/* 1. Clip "pi2_src"(coefficients) to 10 bits if "(qp_div >= shift_iq)" or 16 bits if "(qp_div < shift_iq)"*/ 156/* 2. Increasing precision of "pi2_src"(coefficients) to 64 bits */ 157 158#define IQUANT_4x4(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div) \ 159{ \ 160 WORD32 clip_coeff, tmp; \ 161 WORD32 coeff_min,coeff_max; \ 162 WORD32 coeff_bit_range; \ 163 WORD32 add_iq; \ 164 add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1)); /* To be moved outside in neon. To be computed once per transform call */ \ 165 \ 166 coeff_bit_range = 16; \ 167 if(qp_div > shift_iq) \ 168 coeff_bit_range = 10; \ 169 \ 170 coeff_min = -(1<<(coeff_bit_range-1)); \ 171 coeff_max = (1<<(coeff_bit_range-1)) - 1; \ 172 \ 173 clip_coeff = CLIP3(coeff,coeff_min,coeff_max); \ 174 \ 175 tmp = clip_coeff * dequant_coeff ; \ 176 tmp = tmp + add_iq; \ 177 tmp = SHR_NEG(tmp,(shift_iq - qp_div)); \ 178 \ 179 res = CLIP_S16(tmp); \ 180} 181 182#endif /* IHEVC_TRANS_MACROS_H_ */ 183