ihevc_chroma_itrans_recon_8x8.c revision 0d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098
1/****************************************************************************** 2* 3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4* 5* Licensed under the Apache License, Version 2.0 (the "License"); 6* you may not use this file except in compliance with the License. 7* You may obtain a copy of the License at: 8* 9* http://www.apache.org/licenses/LICENSE-2.0 10* 11* Unless required by applicable law or agreed to in writing, software 12* distributed under the License is distributed on an "AS IS" BASIS, 13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14* See the License for the specific language governing permissions and 15* limitations under the License. 16* 17******************************************************************************/ 18/** 19 ******************************************************************************* 20 * @file 21 * ihevc_chroma_itrans_recon_8x8.c 22 * 23 * @brief 24 * Contains function definitions for 8x8 inverse transform and reconstruction 25 * of chroma interleaved data. 26 * 27 * @author 28 * 100470 29 * 30 * @par List of Functions: 31 * - ihevc_chroma_itrans_recon_8x8() 32 * 33 * @remarks 34 * None 35 * 36 ******************************************************************************* 37 */ 38 39#include <stdio.h> 40#include <string.h> 41#include "ihevc_typedefs.h" 42#include "ihevc_macros.h" 43#include "ihevc_platform_macros.h" 44#include "ihevc_defs.h" 45#include "ihevc_trans_tables.h" 46#include "ihevc_chroma_itrans_recon.h" 47#include "ihevc_func_selector.h" 48#include "ihevc_trans_macros.h" 49 50/* All the functions work one component(U or V) of interleaved data depending upon pointers passed to it */ 51/* Data visualization */ 52/* U V U V U V U V */ 53/* U V U V U V U V */ 54/* U V U V U V U V */ 55/* U V U V U V U V */ 56/* If the pointer points to first byte of above stream (U) , functions will operate on U component */ 57/* If the pointer points to second byte of above stream (V) , functions will operate on V component */ 58 59/** 60 ******************************************************************************* 61 * 62 * @brief 63 * This function performs Inverse transform and reconstruction for 8x8 64 * input block 65 * 66 * @par Description: 67 * Performs inverse transform and adds the prediction data and clips output 68 * to 8 bit 69 * 70 * @param[in] pi2_src 71 * Input 8x8 coefficients 72 * 73 * @param[in] pi2_tmp 74 * Temporary 8x8 buffer for storing inverse transform 75 * 1st stage output 76 * 77 * @param[in] pu1_pred 78 * Prediction 8x8 block 79 * 80 * @param[out] pu1_dst 81 * Output 8x8 block 82 * 83 * @param[in] src_strd 84 * Input stride 85 * 86 * @param[in] pred_strd 87 * Prediction stride 88 * 89 * @param[in] dst_strd 90 * Output Stride 91 * 92 * @param[in] shift 93 * Output shift 94 * 95 * @param[in] zero_cols 96 * Zero columns in pi2_src 97 * 98 * @returns Void 99 * 100 * @remarks 101 * None 102 * 103 ******************************************************************************* 104 */ 105 106 107void ihevc_chroma_itrans_recon_8x8(WORD16 *pi2_src, 108 WORD16 *pi2_tmp, 109 UWORD8 *pu1_pred, 110 UWORD8 *pu1_dst, 111 WORD32 src_strd, 112 WORD32 pred_strd, 113 WORD32 dst_strd, 114 WORD32 zero_cols, 115 WORD32 zero_rows) 116{ 117 WORD32 j, k; 118 WORD32 e[4], o[4]; 119 WORD32 ee[2], eo[2]; 120 WORD32 add; 121 WORD32 shift; 122 WORD16 *pi2_tmp_orig; 123 WORD32 trans_size; 124 WORD32 zero_rows_2nd_stage = zero_cols; 125 WORD32 row_limit_2nd_stage; 126 UNUSED(zero_rows); 127 trans_size = TRANS_SIZE_8; 128 129 pi2_tmp_orig = pi2_tmp; 130 131 if((zero_cols & 0xF0) == 0xF0) 132 row_limit_2nd_stage = 4; 133 else 134 row_limit_2nd_stage = TRANS_SIZE_8; 135 136 /* Inverse Transform 1st stage */ 137 shift = IT_SHIFT_STAGE_1; 138 add = 1 << (shift - 1); 139 { 140 /************************************************************************************************/ 141 /**********************************START - IT_RECON_8x8******************************************/ 142 /************************************************************************************************/ 143 144 for(j = 0; j < row_limit_2nd_stage; j++) 145 { 146 /* Checking for Zero Cols */ 147 if((zero_cols & 1) == 1) 148 { 149 memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); 150 } 151 else 152 { 153 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 154 for(k = 0; k < 4; k++) 155 { 156 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd] 157 + g_ai2_ihevc_trans_8[3][k] 158 * pi2_src[3 * src_strd] 159 + g_ai2_ihevc_trans_8[5][k] 160 * pi2_src[5 * src_strd] 161 + g_ai2_ihevc_trans_8[7][k] 162 * pi2_src[7 * src_strd]; 163 } 164 165 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd] 166 + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd]; 167 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd] 168 + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd]; 169 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0] 170 + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd]; 171 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0] 172 + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd]; 173 174 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 175 e[0] = ee[0] + eo[0]; 176 e[3] = ee[0] - eo[0]; 177 e[1] = ee[1] + eo[1]; 178 e[2] = ee[1] - eo[1]; 179 for(k = 0; k < 4; k++) 180 { 181 pi2_tmp[k] = 182 CLIP_S16(((e[k] + o[k] + add) >> shift)); 183 pi2_tmp[k + 4] = 184 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); 185 } 186 } 187 pi2_src++; 188 pi2_tmp += trans_size; 189 zero_cols = zero_cols >> 1; 190 } 191 192 pi2_tmp = pi2_tmp_orig; 193 194 /* Inverse Transform 2nd stage */ 195 shift = IT_SHIFT_STAGE_2; 196 add = 1 << (shift - 1); 197 198 if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ 199 { 200 for(j = 0; j < trans_size; j++) 201 { 202 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 203 for(k = 0; k < 4; k++) 204 { 205 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] 206 + g_ai2_ihevc_trans_8[3][k] 207 * pi2_tmp[3 * trans_size]; 208 } 209 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]; 210 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]; 211 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]; 212 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]; 213 214 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 215 e[0] = ee[0] + eo[0]; 216 e[3] = ee[0] - eo[0]; 217 e[1] = ee[1] + eo[1]; 218 e[2] = ee[1] - eo[1]; 219 for(k = 0; k < 4; k++) 220 { 221 WORD32 itrans_out; 222 itrans_out = 223 CLIP_S16(((e[k] + o[k] + add) >> shift)); 224 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); 225 itrans_out = 226 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); 227 pu1_dst[(k + 4) * 2] = 228 CLIP_U8((itrans_out + pu1_pred[(k + 4) * 2])); 229 } 230 pi2_tmp++; 231 pu1_pred += pred_strd; 232 pu1_dst += dst_strd; 233 } 234 } 235 else /* All rows of output of 1st stage are non-zero */ 236 { 237 for(j = 0; j < trans_size; j++) 238 { 239 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 240 for(k = 0; k < 4; k++) 241 { 242 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] 243 + g_ai2_ihevc_trans_8[3][k] 244 * pi2_tmp[3 * trans_size] 245 + g_ai2_ihevc_trans_8[5][k] 246 * pi2_tmp[5 * trans_size] 247 + g_ai2_ihevc_trans_8[7][k] 248 * pi2_tmp[7 * trans_size]; 249 } 250 251 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size] 252 + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size]; 253 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size] 254 + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size]; 255 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0] 256 + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size]; 257 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0] 258 + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size]; 259 260 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 261 e[0] = ee[0] + eo[0]; 262 e[3] = ee[0] - eo[0]; 263 e[1] = ee[1] + eo[1]; 264 e[2] = ee[1] - eo[1]; 265 for(k = 0; k < 4; k++) 266 { 267 WORD32 itrans_out; 268 itrans_out = 269 CLIP_S16(((e[k] + o[k] + add) >> shift)); 270 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); 271 itrans_out = 272 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); 273 pu1_dst[(k + 4) * 2] = 274 CLIP_U8((itrans_out + pu1_pred[(k + 4) * 2])); 275 } 276 pi2_tmp++; 277 pu1_pred += pred_strd; 278 pu1_dst += dst_strd; 279 } 280 } 281 /************************************************************************************************/ 282 /************************************END - IT_RECON_8x8******************************************/ 283 /************************************************************************************************/ 284 } 285} 286