1/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19 *******************************************************************************
20 * @file
21 *  ihevc_iquant_itrans_recon.c
22 *
23 * @brief
24 *  Contains function definitions for inverse  quantization, inverse
25 * transform and reconstruction
26 *
27 * @author
28 *  100470
29 *
30 * @par List of Functions:
31 *  - ihevc_iquant_itrans_recon_4x4_ttype1()
32 *  - ihevc_iquant_itrans_recon_4x4()
33 *
34 * @remarks
35 *  None
36 *
37 *******************************************************************************
38 */
39#include <stdio.h>
40#include <string.h>
41#include "ihevc_typedefs.h"
42#include "ihevc_macros.h"
43#include "ihevc_platform_macros.h"
44#include "ihevc_defs.h"
45#include "ihevc_trans_tables.h"
46#include "ihevc_iquant_itrans_recon.h"
47#include "ihevc_func_selector.h"
48#include "ihevc_trans_macros.h"
49
50/* All the functions here are replicated from ihevc_itrans.c and modified to */
51/* include reconstruction */
52
53/**
54 *******************************************************************************
55 *
56 * @brief
57 *  This function performs inverse quantization, inverse  transform
58 * type1(DST) and reconstruction for 4x4  input block
59 *
60 * @par Description:
61 *  Performs inverse quantization , inverse transform type 1  and adds
62 * prediction data and clips output to 8 bit
63 *
64 * @param[in] pi2_src
65 *  Input 4x4 coefficients
66 *
67 * @param[in] pi2_tmp
68 *  Temporary 4x4 buffer for storing inverse
69 *  transform 1st stage output
70 *
71 * @param[in] pu1_pred
72 *  Prediction 4x4 block
73 *
74 * @param[in] pi2_dequant_coeff
75 *  Dequant Coeffs
76 *
77 * @param[out] pu1_dst
78 *  Output 4x4 block
79 *
80 * @param[in] qp_div
81 *  Quantization parameter / 6
82 *
83 * @param[in] qp_rem
84 *  Quantization parameter % 6
85 *
86 * @param[in] src_strd
87 *  Input stride
88 *
89 * @param[in] pred_strd
90 *  Prediction stride
91 *
92 * @param[in] dst_strd
93 *  Output Stride
94 *
95 * @param[in] zero_cols
96 *  Zero columns in pi2_src
97 *
98 * @param[in] zero_rows
99 *  Zero Rows in pi2_src
100 *
101 * @returns  Void
102 *
103 * @remarks
104 *  None
105 *
106 *******************************************************************************
107 */
108
109void ihevc_iquant_itrans_recon_4x4_ttype1(WORD16 *pi2_src,
110                                          WORD16 *pi2_tmp,
111                                          UWORD8 *pu1_pred,
112                                          WORD16 *pi2_dequant_coeff,
113                                          UWORD8 *pu1_dst,
114                                          WORD32 qp_div, /* qpscaled / 6 */
115                                          WORD32 qp_rem, /* qpscaled % 6 */
116                                          WORD32 src_strd,
117                                          WORD32 pred_strd,
118                                          WORD32 dst_strd,
119                                          WORD32 zero_cols,
120                                          WORD32 zero_rows)
121{
122    UNUSED(zero_rows);
123    /* Inverse Quant and Inverse Transform and Reconstruction */
124    {
125        WORD32 i, c[4];
126        WORD32 add;
127        WORD32 shift;
128        WORD16 *pi2_tmp_orig;
129        WORD32 shift_iq;
130        WORD32 trans_size;
131        /* Inverse Quantization constants */
132        {
133            WORD32 log2_trans_size, bit_depth;
134
135            log2_trans_size = 2;
136            bit_depth = 8 + 0;
137            shift_iq = bit_depth + log2_trans_size - 5;
138        }
139
140        trans_size = TRANS_SIZE_4;
141        pi2_tmp_orig = pi2_tmp;
142
143        /* Inverse Transform 1st stage */
144        shift = IT_SHIFT_STAGE_1;
145        add = 1 << (shift - 1);
146
147        for(i = 0; i < trans_size; i++)
148        {
149            /* Checking for Zero Cols */
150            if((zero_cols & 1) == 1)
151            {
152                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
153            }
154            else
155            {
156                WORD32 iq_tmp_1, iq_tmp_2, iq_tmp_3;
157                // Intermediate Variables
158                IQUANT_4x4(iq_tmp_1,
159                           pi2_src[0 * src_strd],
160                           pi2_dequant_coeff[0 * trans_size] * g_ihevc_iquant_scales[qp_rem],
161                           shift_iq, qp_div);
162                IQUANT_4x4(iq_tmp_2,
163                           pi2_src[2 * src_strd],
164                           pi2_dequant_coeff[2 * trans_size] * g_ihevc_iquant_scales[qp_rem],
165                           shift_iq, qp_div);
166                c[0] = iq_tmp_1 + iq_tmp_2;
167
168                IQUANT_4x4(iq_tmp_1,
169                           pi2_src[2 * src_strd],
170                           pi2_dequant_coeff[2 * trans_size] * g_ihevc_iquant_scales[qp_rem],
171                           shift_iq, qp_div);
172                IQUANT_4x4(iq_tmp_2,
173                           pi2_src[3 * src_strd],
174                           pi2_dequant_coeff[3 * trans_size] * g_ihevc_iquant_scales[qp_rem],
175                           shift_iq, qp_div);
176                c[1] = iq_tmp_1 + iq_tmp_2;
177
178                IQUANT_4x4(iq_tmp_1,
179                           pi2_src[0 * src_strd],
180                           pi2_dequant_coeff[0 * trans_size] * g_ihevc_iquant_scales[qp_rem],
181                           shift_iq, qp_div);
182                IQUANT_4x4(iq_tmp_2,
183                           pi2_src[3 * src_strd],
184                           pi2_dequant_coeff[3 * trans_size] * g_ihevc_iquant_scales[qp_rem],
185                           shift_iq, qp_div);
186                c[2] = iq_tmp_1 - iq_tmp_2;
187
188                IQUANT_4x4(iq_tmp_1,
189                           pi2_src[1 * src_strd],
190                           pi2_dequant_coeff[1 * trans_size] * g_ihevc_iquant_scales[qp_rem],
191                           shift_iq, qp_div);
192                c[3] = 74 * iq_tmp_1;
193
194                pi2_tmp[0] =
195                                CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift);
196                pi2_tmp[1] =
197                                CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift);
198
199                IQUANT_4x4(iq_tmp_1,
200                           pi2_src[0 * src_strd],
201                           pi2_dequant_coeff[0 * trans_size] * g_ihevc_iquant_scales[qp_rem],
202                           shift_iq, qp_div);
203                IQUANT_4x4(iq_tmp_2,
204                           pi2_src[2 * src_strd],
205                           pi2_dequant_coeff[2 * trans_size] * g_ihevc_iquant_scales[qp_rem],
206                           shift_iq, qp_div);
207                IQUANT_4x4(iq_tmp_3,
208                           pi2_src[3 * src_strd],
209                           pi2_dequant_coeff[3 * trans_size] * g_ihevc_iquant_scales[qp_rem],
210                           shift_iq, qp_div);
211
212                pi2_tmp[2] =
213                                CLIP_S16((74 * (iq_tmp_1 - iq_tmp_2 + iq_tmp_3) + add) >> shift);
214                pi2_tmp[3] =
215                                CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift);
216            }
217            pi2_src++;
218            pi2_dequant_coeff++;
219            pi2_tmp += trans_size;
220            zero_cols = zero_cols >> 1;
221        }
222
223        pi2_tmp = pi2_tmp_orig;
224
225        /* Inverse Transform 2nd stage */
226        shift = IT_SHIFT_STAGE_2;
227        add = 1 << (shift - 1);
228
229        for(i = 0; i < trans_size; i++)
230        {
231            WORD32 itrans_out;
232
233            // Intermediate Variables
234            c[0] = pi2_tmp[0] + pi2_tmp[2 * trans_size];
235            c[1] = pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size];
236            c[2] = pi2_tmp[0] - pi2_tmp[3 * trans_size];
237            c[3] = 74 * pi2_tmp[trans_size];
238
239            itrans_out =
240                            CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift);
241            pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0]));
242
243            itrans_out =
244                            CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift);
245            pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1]));
246
247            itrans_out =
248                            CLIP_S16((74 * (pi2_tmp[0] - pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size]) + add) >> shift);
249            pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2]));
250
251            itrans_out =
252                            CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift);
253            pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3]));
254            pi2_tmp++;
255            pu1_pred += pred_strd;
256            pu1_dst += dst_strd;
257        }
258    }
259}
260
261/**
262 *******************************************************************************
263 *
264 * @brief
265 *  This function performs inverse quantization, inverse  transform and
266 * reconstruction for 4x4 input block
267 *
268 * @par Description:
269 *  Performs inverse quantization , inverse transform  and adds the
270 * prediction data and clips output to 8 bit
271 *
272 * @param[in] pi2_src
273 *  Input 4x4 coefficients
274 *
275 * @param[in] pi2_tmp
276 *  Temporary 4x4 buffer for storing inverse
277 *  transform 1st stage output
278 *
279 * @param[in] pu1_pred
280 *  Prediction 4x4 block
281 *
282 * @param[in] pi2_dequant_coeff
283 *  Dequant Coeffs
284 *
285 * @param[out] pu1_dst
286 *  Output 4x4 block
287 *
288 * @param[in] qp_div
289 *  Quantization parameter / 6
290 *
291 * @param[in] qp_rem
292 *  Quantization parameter % 6
293 *
294 * @param[in] src_strd
295 *  Input stride
296 *
297 * @param[in] pred_strd
298 *  Prediction stride
299 *
300 * @param[in] dst_strd
301 *  Output Stride
302 *
303 * @param[in] zero_cols
304 *  Zero columns in pi2_src
305 *
306 * @param[in] zero_rows
307 *  Zero Rows in pi2_src
308 *
309 * @returns  Void
310 *
311 * @remarks
312 *  None
313 *
314 *******************************************************************************
315 */
316
317void ihevc_iquant_itrans_recon_4x4(WORD16 *pi2_src,
318                                   WORD16 *pi2_tmp,
319                                   UWORD8 *pu1_pred,
320                                   WORD16 *pi2_dequant_coeff,
321                                   UWORD8 *pu1_dst,
322                                   WORD32 qp_div, /* qpscaled / 6 */
323                                   WORD32 qp_rem, /* qpscaled % 6 */
324                                   WORD32 src_strd,
325                                   WORD32 pred_strd,
326                                   WORD32 dst_strd,
327                                   WORD32 zero_cols,
328                                   WORD32 zero_rows)
329{
330    UNUSED(zero_rows);
331    /* Inverse Transform */
332    {
333        WORD32 j;
334        WORD32 e[2], o[2];
335        WORD32 add;
336        WORD32 shift;
337        WORD16 *pi2_tmp_orig;
338        WORD32 shift_iq;
339        WORD32 trans_size;
340        /* Inverse Quantization constants */
341        {
342            WORD32 log2_trans_size, bit_depth;
343
344            log2_trans_size = 2;
345            bit_depth = 8 + 0;
346            shift_iq = bit_depth + log2_trans_size - 5;
347        }
348
349        trans_size = TRANS_SIZE_4;
350        pi2_tmp_orig = pi2_tmp;
351
352        /* Inverse Transform 1st stage */
353        shift = IT_SHIFT_STAGE_1;
354        add = 1 << (shift - 1);
355
356        for(j = 0; j < trans_size; j++)
357        {
358            /* Checking for Zero Cols */
359            if((zero_cols & 1) == 1)
360            {
361                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
362            }
363            else
364            {
365                WORD32 iq_tmp_1, iq_tmp_2;
366                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
367                IQUANT_4x4(iq_tmp_1,
368                           pi2_src[1 * src_strd],
369                           pi2_dequant_coeff[1 * trans_size] * g_ihevc_iquant_scales[qp_rem],
370                           shift_iq, qp_div);
371                IQUANT_4x4(iq_tmp_2,
372                           pi2_src[3 * src_strd],
373                           pi2_dequant_coeff[3 * trans_size] * g_ihevc_iquant_scales[qp_rem],
374                           shift_iq, qp_div);
375
376                o[0] = g_ai2_ihevc_trans_4[1][0] * iq_tmp_1
377                                + g_ai2_ihevc_trans_4[3][0] * iq_tmp_2;
378                o[1] = g_ai2_ihevc_trans_4[1][1] * iq_tmp_1
379                                + g_ai2_ihevc_trans_4[3][1] * iq_tmp_2;
380
381                IQUANT_4x4(iq_tmp_1,
382                           pi2_src[0 * src_strd],
383                           pi2_dequant_coeff[0 * trans_size] * g_ihevc_iquant_scales[qp_rem],
384                           shift_iq, qp_div);
385                IQUANT_4x4(iq_tmp_2,
386                           pi2_src[2 * src_strd],
387                           pi2_dequant_coeff[2 * trans_size] * g_ihevc_iquant_scales[qp_rem],
388                           shift_iq, qp_div);
389
390                e[0] = g_ai2_ihevc_trans_4[0][0] * iq_tmp_1
391                                + g_ai2_ihevc_trans_4[2][0] * iq_tmp_2;
392                e[1] = g_ai2_ihevc_trans_4[0][1] * iq_tmp_1
393                                + g_ai2_ihevc_trans_4[2][1] * iq_tmp_2;
394
395                pi2_tmp[0] =
396                                CLIP_S16(((e[0] + o[0] + add) >> shift));
397                pi2_tmp[1] =
398                                CLIP_S16(((e[1] + o[1] + add) >> shift));
399                pi2_tmp[2] =
400                                CLIP_S16(((e[1] - o[1] + add) >> shift));
401                pi2_tmp[3] =
402                                CLIP_S16(((e[0] - o[0] + add) >> shift));
403            }
404            pi2_src++;
405            pi2_dequant_coeff++;
406            pi2_tmp += trans_size;
407            zero_cols = zero_cols >> 1;
408        }
409
410        pi2_tmp = pi2_tmp_orig;
411
412        /* Inverse Transform 2nd stage */
413        shift = IT_SHIFT_STAGE_2;
414        add = 1 << (shift - 1);
415
416        for(j = 0; j < trans_size; j++)
417        {
418            WORD32 itrans_out;
419
420            /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
421            o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_tmp[trans_size]
422                            + g_ai2_ihevc_trans_4[3][0]
423                                            * pi2_tmp[3 * trans_size];
424            o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_tmp[trans_size]
425                            + g_ai2_ihevc_trans_4[3][1]
426                                            * pi2_tmp[3 * trans_size];
427            e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_tmp[0]
428                            + g_ai2_ihevc_trans_4[2][0]
429                                            * pi2_tmp[2 * trans_size];
430            e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_tmp[0]
431                            + g_ai2_ihevc_trans_4[2][1]
432                                            * pi2_tmp[2 * trans_size];
433
434            itrans_out =
435                            CLIP_S16(((e[0] + o[0] + add) >> shift));
436            pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0]));
437
438            itrans_out =
439                            CLIP_S16(((e[1] + o[1] + add) >> shift));
440            pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1]));
441
442            itrans_out =
443                            CLIP_S16(((e[1] - o[1] + add) >> shift));
444            pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2]));
445
446            itrans_out =
447                            CLIP_S16(((e[0] - o[0] + add) >> shift));
448            pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3]));
449
450            pi2_tmp++;
451            pu1_pred += pred_strd;
452            pu1_dst += dst_strd;
453
454        }
455    }
456}
457