1/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20/**
21 *******************************************************************************
22 * @file
23 *  ih264_iquant_itrans_recon.c
24 *
25 * @brief
26 *  Contains definition of functions for h264 inverse quantization inverse transformation and recon
27 *
28 * @author
29 *  Ittiam
30 *
31 *  @par List of Functions:
32 *  - ih264_iquant_itrans_recon_4x4()
33 *  - ih264_iquant_itrans_recon_8x8()
34 *  - ih264_iquant_itrans_recon_4x4_dc()
35 *  - ih264_iquant_itrans_recon_8x8_dc()
36 *  - ih264_iquant_itrans_recon_chroma_4x4()
37 *  -ih264_iquant_itrans_recon_chroma_4x4_dc()
38 *
39 * @remarks
40 *
41 *******************************************************************************
42 */
43
44/*****************************************************************************/
45/* File Includes                                                             */
46/*****************************************************************************/
47
48/* User include files */
49#include "ih264_typedefs.h"
50#include "ih264_defs.h"
51#include "ih264_trans_macros.h"
52#include "ih264_macros.h"
53#include "ih264_platform_macros.h"
54#include "ih264_trans_data.h"
55#include "ih264_size_defs.h"
56#include "ih264_structs.h"
57#include "ih264_trans_quant_itrans_iquant.h"
58
59/*
60 ********************************************************************************
61 *
62 * @brief This function reconstructs a 4x4 sub block from quantized resiude and
63 * prediction buffer
64 *
65 * @par Description:
66 *  The quantized residue is first inverse quantized, then inverse transformed.
67 *  This inverse transformed content is added to the prediction buffer to recon-
68 *  struct the end output
69 *
70 * @param[in] pi2_src
71 *  quantized 4x4 block
72 *
73 * @param[in] pu1_pred
74 *  prediction 4x4 block
75 *
76 * @param[out] pu1_out
77 *  reconstructed 4x4 block
78 *
79 * @param[in] src_strd
80 *  quantization buffer stride
81 *
82 * @param[in] pred_strd,
83 *  Prediction buffer stride
84 *
85 * @param[in] out_strd
86 *  recon buffer Stride
87 *
88 * @param[in] pu2_scaling_list
89 *  pointer to scaling list
90 *
91 * @param[in] pu2_norm_adjust
92 *  pointer to inverse scale matrix
93 *
94 * @param[in] u4_qp_div_6
95 *  Floor (qp/6)
96 *
97 * @param[in] pi4_tmp
98 * temporary buffer of size 1*16
99 *
100 * @returns none
101 *
102 * @remarks none
103 *
104 *******************************************************************************
105 */
106void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src,
107                                   UWORD8 *pu1_pred,
108                                   UWORD8 *pu1_out,
109                                   WORD32 pred_strd,
110                                   WORD32 out_strd,
111                                   const UWORD16 *pu2_iscal_mat,
112                                   const UWORD16 *pu2_weigh_mat,
113                                   UWORD32 u4_qp_div_6,
114                                   WORD16 *pi2_tmp,
115                                   WORD32 iq_start_idx,
116                                   WORD16 *pi2_dc_ld_addr
117)
118{
119    WORD16 *pi2_src_ptr = pi2_src;
120    WORD16 *pi2_tmp_ptr = pi2_tmp;
121    UWORD8 *pu1_pred_ptr = pu1_pred;
122    UWORD8 *pu1_out_ptr = pu1_out;
123    WORD16 x0, x1, x2, x3, i;
124    WORD32 q0, q1, q2, q3;
125    WORD16 i_macro;
126    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
127
128    /* inverse quant */
129    /*horizontal inverse transform */
130    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
131    {
132        q0 = pi2_src_ptr[0];
133        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact,
134                  4);
135        if (i==0 && iq_start_idx == 1)
136            q0 = pi2_dc_ld_addr[0];     // Restoring dc value for intra case
137
138        q2 = pi2_src_ptr[2];
139        INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact,
140                  4);
141
142        x0 = q0 + q2;
143        x1 = q0 - q2;
144
145        q1 = pi2_src_ptr[1];
146        INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact,
147                  4);
148
149        q3 = pi2_src_ptr[3];
150        INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact,
151                  4);
152
153        x2 = (q1 >> 1) - q3;
154        x3 = q1 + (q3 >> 1);
155
156        pi2_tmp_ptr[0] = x0 + x3;
157        pi2_tmp_ptr[1] = x1 + x2;
158        pi2_tmp_ptr[2] = x1 - x2;
159        pi2_tmp_ptr[3] = x0 - x3;
160
161        pi2_src_ptr += SUB_BLK_WIDTH_4x4;
162        pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
163        pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
164        pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
165    }
166
167    /* vertical inverse transform */
168    pi2_tmp_ptr = pi2_tmp;
169    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
170    {
171        pu1_pred_ptr = pu1_pred;
172        pu1_out = pu1_out_ptr;
173
174        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
175        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
176        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
177        x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
178
179        /* inverse prediction */
180        i_macro = x0 + x3;
181        i_macro = ((i_macro + 32) >> 6);
182        i_macro += *pu1_pred_ptr;
183        *pu1_out = CLIP_U8(i_macro);
184        pu1_pred_ptr += pred_strd;
185        pu1_out += out_strd;
186
187        i_macro = x1 + x2;
188        i_macro = ((i_macro + 32) >> 6);
189        i_macro += *pu1_pred_ptr;
190        *pu1_out = CLIP_U8(i_macro);
191        pu1_pred_ptr += pred_strd;
192        pu1_out += out_strd;
193
194        i_macro = x1 - x2;
195        i_macro = ((i_macro + 32) >> 6);
196        i_macro += *pu1_pred_ptr;
197        *pu1_out = CLIP_U8(i_macro);
198        pu1_pred_ptr += pred_strd;
199        pu1_out += out_strd;
200
201        i_macro = x0 - x3;
202        i_macro = ((i_macro + 32) >> 6);
203        i_macro += *pu1_pred_ptr;
204        *pu1_out = CLIP_U8(i_macro);
205
206        pi2_tmp_ptr++;
207        pu1_out_ptr++;
208        pu1_pred++;
209    }
210
211}
212
213void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src,
214                                      UWORD8 *pu1_pred,
215                                      UWORD8 *pu1_out,
216                                      WORD32 pred_strd,
217                                      WORD32 out_strd,
218                                      const UWORD16 *pu2_iscal_mat,
219                                      const UWORD16 *pu2_weigh_mat,
220                                      UWORD32 u4_qp_div_6,
221                                      WORD16 *pi2_tmp,
222                                      WORD32 iq_start_idx,
223                                      WORD16 *pi2_dc_ld_addr)
224{
225    UWORD8 *pu1_pred_ptr = pu1_pred;
226    UWORD8 *pu1_out_ptr = pu1_out;
227    WORD32 q0;
228    WORD16 x, i_macro, i;
229    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
230    UNUSED(pi2_tmp);
231
232    if (iq_start_idx == 0)
233    {
234      q0 = pi2_src[0];
235      INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
236    }
237    else
238    {
239      q0 = pi2_dc_ld_addr[0];    // Restoring dc value for intra case3
240    }
241    i_macro = ((q0 + 32) >> 6);
242    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
243    {
244        pu1_pred_ptr = pu1_pred;
245        pu1_out = pu1_out_ptr;
246
247        /* inverse prediction */
248
249        x = i_macro + *pu1_pred_ptr;
250        *pu1_out = CLIP_U8(x);
251        pu1_pred_ptr += pred_strd;
252        pu1_out += out_strd;
253
254        x = i_macro + *pu1_pred_ptr;
255        *pu1_out = CLIP_U8(x);
256        pu1_pred_ptr += pred_strd;
257        pu1_out += out_strd;
258
259        x = i_macro + *pu1_pred_ptr;
260        *pu1_out = CLIP_U8(x);
261        pu1_pred_ptr += pred_strd;
262        pu1_out += out_strd;
263
264        x = i_macro + *pu1_pred_ptr;
265        *pu1_out = CLIP_U8(x);
266
267        pu1_out_ptr++;
268        pu1_pred++;
269    }
270}
271
272/**
273 *******************************************************************************
274 *
275 * @brief
276 *  This function performs inverse quant and Inverse transform type Ci4 for 8x8 block
277 *
278 * @par Description:
279 *  Performs inverse transform Ci8 and adds the residue to get the
280 *  reconstructed block
281 *
282 * @param[in] pi2_src
283 *  Input 8x8coefficients
284 *
285 * @param[in] pu1_pred
286 *  Prediction 8x8 block
287 *
288 * @param[out] pu1_recon
289 *  Output 8x8 block
290 *
291 * @param[in] q_div
292 *  QP/6
293 *
294 * @param[in] q_rem
295 *  QP%6
296 *
297 * @param[in] q_lev
298 *  Quantizer level
299 *
300 * @param[in] src_strd
301 *  Input stride
302 *
303 * @param[in] pred_strd,
304 *  Prediction stride
305 *
306 * @param[in] out_strd
307 *  Output Stride
308 *
309 * @param[in] pi4_tmp
310 *  temporary buffer of size 1*16 we dont need a bigger blcok since we reuse
311 *  the tmp for each block
312 *
313 * @param[in] pu4_iquant_mat
314 *  Pointer to the inverse quantization matrix
315 *
316 * @returns  Void
317 *
318 * @remarks
319 *  None
320 *
321 *******************************************************************************
322 */
323void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src,
324                                   UWORD8 *pu1_pred,
325                                   UWORD8 *pu1_out,
326                                   WORD32 pred_strd,
327                                   WORD32 out_strd,
328                                   const UWORD16 *pu2_iscale_mat,
329                                   const UWORD16 *pu2_weigh_mat,
330                                   UWORD32 qp_div,
331                                   WORD16 *pi2_tmp,
332                                   WORD32 iq_start_idx,
333                                   WORD16 *pi2_dc_ld_addr
334)
335{
336    WORD32 i;
337    WORD16 *pi2_tmp_ptr = pi2_tmp;
338    UWORD8 *pu1_pred_ptr = pu1_pred;
339    UWORD8 *pu1_out_ptr = pu1_out;
340    WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
341    WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
342    WORD16 i_macro;
343    WORD32 q;
344    WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
345    UNUSED(iq_start_idx);
346    UNUSED(pi2_dc_ld_addr);
347    /*************************************************************/
348    /* De quantization of coefficients. Will be replaced by SIMD */
349    /* operations on platform. Note : DC coeff is not scaled     */
350    /*************************************************************/
351    for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
352    {
353        q = pi2_src[i];
354        INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
355        pi2_tmp_ptr[i] = q;
356    }
357    /* Perform Inverse transform */
358    /*--------------------------------------------------------------------*/
359    /* IDCT [ Horizontal transformation ]                                 */
360    /*--------------------------------------------------------------------*/
361    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
362    {
363        /*------------------------------------------------------------------*/
364        /* y0 = w0 + w4                                                     */
365        /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
366        /* y2 = w0 - w4                                                     */
367        /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
368        /* y4 = (w2 >> 1) - w6                                              */
369        /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
370        /* y6 = w2 + (w6 >> 1)                                              */
371        /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
372        /*------------------------------------------------------------------*/
373        i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4] );
374
375        i_y1 = ((WORD32)(-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7]
376                        - (pi2_tmp_ptr[7] >> 1));
377
378        i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4] );
379
380        i_y3 = ((WORD32)pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3]
381                        - (pi2_tmp_ptr[3] >> 1));
382
383        i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6] );
384
385        i_y5 = ((WORD32)(-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5]
386                        + (pi2_tmp_ptr[5] >> 1));
387
388        i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
389
390        i_y7 = ((WORD32)pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1]
391                        + (pi2_tmp_ptr[1] >> 1));
392
393        /*------------------------------------------------------------------*/
394        /* z0 = y0 + y6                                                     */
395        /* z1 = y1 + (y7 >> 2)                                              */
396        /* z2 = y2 + y4                                                     */
397        /* z3 = y3 + (y5 >> 2)                                              */
398        /* z4 = y2 - y4                                                     */
399        /* z5 = (y3 >> 2) - y5                                              */
400        /* z6 = y0 - y6                                                     */
401        /* z7 = y7 - (y1 >> 2)                                              */
402        /*------------------------------------------------------------------*/
403        i_z0 = i_y0 + i_y6;
404        i_z1 = i_y1 + (i_y7 >> 2);
405        i_z2 = i_y2 + i_y4;
406        i_z3 = i_y3 + (i_y5 >> 2);
407        i_z4 = i_y2 - i_y4;
408        i_z5 = (i_y3 >> 2) - i_y5;
409        i_z6 = i_y0 - i_y6;
410        i_z7 = i_y7 - (i_y1 >> 2);
411
412        /*------------------------------------------------------------------*/
413        /* x0 = z0 + z7                                                     */
414        /* x1 = z2 + z5                                                     */
415        /* x2 = z4 + z3                                                     */
416        /* x3 = z6 + z1                                                     */
417        /* x4 = z6 - z1                                                     */
418        /* x5 = z4 - z3                                                     */
419        /* x6 = z2 - z5                                                     */
420        /* x7 = z0 - z7                                                     */
421        /*------------------------------------------------------------------*/
422        pi2_tmp_ptr[0] = i_z0 + i_z7;
423        pi2_tmp_ptr[1] = i_z2 + i_z5;
424        pi2_tmp_ptr[2] = i_z4 + i_z3;
425        pi2_tmp_ptr[3] = i_z6 + i_z1;
426        pi2_tmp_ptr[4] = i_z6 - i_z1;
427        pi2_tmp_ptr[5] = i_z4 - i_z3;
428        pi2_tmp_ptr[6] = i_z2 - i_z5;
429        pi2_tmp_ptr[7] = i_z0 - i_z7;
430
431        /* move to the next row */
432        //pi2_src_ptr += SUB_BLK_WIDTH_8x8;
433        pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
434    }
435    /*--------------------------------------------------------------------*/
436    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
437    /*                                                                    */
438    /* Add the prediction and store it back to reconstructed frame buffer */
439    /* [Prediction buffer itself in this case]                            */
440    /*--------------------------------------------------------------------*/
441
442    pi2_tmp_ptr = pi2_tmp;
443    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
444    {
445        pu1_pred_ptr = pu1_pred;
446        pu1_out = pu1_out_ptr;
447        /*------------------------------------------------------------------*/
448        /* y0j = w0j + w4j                                                  */
449        /* y1j = -w3j + w5j -w7j -(w7j >> 1)                                */
450        /* y2j = w0j -w4j                                                   */
451        /* y3j = w1j + w7j -w3j -(w3j >> 1)                                 */
452        /* y4j = ( w2j >> 1 ) -w6j                                          */
453        /* y5j = -w1j + w7j + w5j + (w5j >> 1)                              */
454        /* y6j = w2j + ( w6j >> 1 )                                         */
455        /* y7j = w3j + w5j + w1j + (w1j >> 1)                               */
456        /*------------------------------------------------------------------*/
457        i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
458
459        i_y1 = (WORD32)(-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56]
460                        - (pi2_tmp_ptr[56] >> 1);
461
462        i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
463
464        i_y3 = (WORD32)pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24]
465                        - (pi2_tmp_ptr[24] >> 1);
466
467        i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
468
469        i_y5 = (WORD32)(-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40]
470                        + (pi2_tmp_ptr[40] >> 1);
471
472        i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
473
474        i_y7 = (WORD32)pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8]
475                        + (pi2_tmp_ptr[8] >> 1);
476
477        /*------------------------------------------------------------------*/
478        /* z0j = y0j + y6j                                                  */
479        /* z1j = y1j + (y7j >> 2)                                           */
480        /* z2j = y2j + y4j                                                  */
481        /* z3j = y3j + (y5j >> 2)                                           */
482        /* z4j = y2j -y4j                                                   */
483        /* z5j = (y3j >> 2) -y5j                                            */
484        /* z6j = y0j -y6j                                                   */
485        /* z7j = y7j -(y1j >> 2)                                            */
486        /*------------------------------------------------------------------*/
487        i_z0 = i_y0 + i_y6;
488        i_z1 = i_y1 + (i_y7 >> 2);
489        i_z2 = i_y2 + i_y4;
490        i_z3 = i_y3 + (i_y5 >> 2);
491        i_z4 = i_y2 - i_y4;
492        i_z5 = (i_y3 >> 2) - i_y5;
493        i_z6 = i_y0 - i_y6;
494        i_z7 = i_y7 - (i_y1 >> 2);
495
496        /*------------------------------------------------------------------*/
497        /* x0j = z0j + z7j                                                  */
498        /* x1j = z2j + z5j                                                  */
499        /* x2j = z4j + z3j                                                  */
500        /* x3j = z6j + z1j                                                  */
501        /* x4j = z6j -z1j                                                   */
502        /* x5j = z4j -z3j                                                   */
503        /* x6j = z2j -z5j                                                   */
504        /* x7j = z0j -z7j                                                   */
505        /*------------------------------------------------------------------*/
506        i_macro = ((i_z0 + i_z7 + 32) >> 6) + *pu1_pred_ptr;
507        *pu1_out = CLIP_U8(i_macro);
508        /* Change uc_recBuffer to Point to next element in the same column*/
509        pu1_pred_ptr += pred_strd;
510        pu1_out += out_strd;
511
512        i_macro = ((i_z2 + i_z5 + 32) >> 6) + *pu1_pred_ptr;
513        *pu1_out = CLIP_U8(i_macro);
514        pu1_pred_ptr += pred_strd;
515        pu1_out += out_strd;
516
517        i_macro = ((i_z4 + i_z3 + 32) >> 6) + *pu1_pred_ptr;
518        *pu1_out = CLIP_U8(i_macro);
519        pu1_pred_ptr += pred_strd;
520        pu1_out += out_strd;
521
522        i_macro = ((i_z6 + i_z1 + 32) >> 6) + *pu1_pred_ptr;
523        *pu1_out = CLIP_U8(i_macro);
524        pu1_pred_ptr += pred_strd;
525        pu1_out += out_strd;
526
527        i_macro = ((i_z6 - i_z1 + 32) >> 6) + *pu1_pred_ptr;
528        *pu1_out = CLIP_U8(i_macro);
529        pu1_pred_ptr += pred_strd;
530        pu1_out += out_strd;
531
532        i_macro = ((i_z4 - i_z3 + 32) >> 6) + *pu1_pred_ptr;
533        *pu1_out = CLIP_U8(i_macro);
534        pu1_pred_ptr += pred_strd;
535        pu1_out += out_strd;
536
537        i_macro = ((i_z2 - i_z5 + 32) >> 6) + *pu1_pred_ptr;
538        *pu1_out = CLIP_U8(i_macro);
539        pu1_pred_ptr += pred_strd;
540        pu1_out += out_strd;
541
542        i_macro = ((i_z0 - i_z7 + 32) >> 6) + *pu1_pred_ptr;
543        *pu1_out = CLIP_U8(i_macro);
544
545        pi2_tmp_ptr++;
546        pu1_out_ptr++;
547        pu1_pred++;
548    }
549}
550
551void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src,
552                                      UWORD8 *pu1_pred,
553                                      UWORD8 *pu1_out,
554                                      WORD32 pred_strd,
555                                      WORD32 out_strd,
556                                      const UWORD16 *pu2_iscale_mat,
557                                      const UWORD16 *pu2_weigh_mat,
558                                      UWORD32 qp_div,
559                                      WORD16 *pi2_tmp,
560                                      WORD32 iq_start_idx,
561                                      WORD16 *pi2_dc_ld_addr)
562{
563    UWORD8 *pu1_pred_ptr = pu1_pred;
564    UWORD8 *pu1_out_ptr = pu1_out;
565    WORD16 x, i, i_macro;
566    WORD32 q;
567    WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
568    UNUSED(pi2_tmp);
569    UNUSED(iq_start_idx);
570    UNUSED(pi2_dc_ld_addr);
571    /*************************************************************/
572    /* Dequantization of coefficients. Will be replaced by SIMD  */
573    /* operations on platform. Note : DC coeff is not scaled     */
574    /*************************************************************/
575    q = pi2_src[0];
576    INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
577    i_macro = (q + 32) >> 6;
578    /* Perform Inverse transform */
579    /*--------------------------------------------------------------------*/
580    /* IDCT [ Horizontal transformation ]                                 */
581    /*--------------------------------------------------------------------*/
582    /*--------------------------------------------------------------------*/
583    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
584    /*                                                                    */
585    /* Add the prediction and store it back to reconstructed frame buffer */
586    /* [Prediction buffer itself in this case]                            */
587    /*--------------------------------------------------------------------*/
588    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
589    {
590        pu1_pred_ptr = pu1_pred;
591        pu1_out = pu1_out_ptr;
592
593        x = i_macro + *pu1_pred_ptr;
594        *pu1_out = CLIP_U8(x);
595        /* Change uc_recBuffer to Point to next element in the same column*/
596        pu1_pred_ptr += pred_strd;
597        pu1_out += out_strd;
598
599        x = i_macro + *pu1_pred_ptr;
600        *pu1_out = CLIP_U8(x);
601        pu1_pred_ptr += pred_strd;
602        pu1_out += out_strd;
603
604        x = i_macro + *pu1_pred_ptr;
605        *pu1_out = CLIP_U8(x);
606        pu1_pred_ptr += pred_strd;
607        pu1_out += out_strd;
608
609        x = i_macro + *pu1_pred_ptr;
610        *pu1_out = CLIP_U8(x);
611        pu1_pred_ptr += pred_strd;
612        pu1_out += out_strd;
613
614        x = i_macro + *pu1_pred_ptr;
615        *pu1_out = CLIP_U8(x);
616        pu1_pred_ptr += pred_strd;
617        pu1_out += out_strd;
618
619        x = i_macro + *pu1_pred_ptr;
620        *pu1_out = CLIP_U8(x);
621        pu1_pred_ptr += pred_strd;
622        pu1_out += out_strd;
623
624        x = i_macro + *pu1_pred_ptr;
625        *pu1_out = CLIP_U8(x);
626        pu1_pred_ptr += pred_strd;
627        pu1_out += out_strd;
628
629        x = i_macro + *pu1_pred_ptr;
630        *pu1_out = CLIP_U8(x);
631
632        pu1_out_ptr++;
633        pu1_pred++;
634    }
635}
636
637/*
638 ********************************************************************************
639 *
640 * @brief This function reconstructs a 4x4 sub block from quantized resiude and
641 * prediction buffer
642 *
643 * @par Description:
644 *  The quantized residue is first inverse quantized, then inverse transformed.
645 *  This inverse transformed content is added to the prediction buffer to recon-
646 *  struct the end output
647 *
648 * @param[in] pi2_src
649 *  quantized 4x4 block
650 *
651 * @param[in] pu1_pred
652 *  prediction 4x4 block
653 *
654 * @param[out] pu1_out
655 *  reconstructed 4x4 block
656 *
657 * @param[in] src_strd
658 *  quantization buffer stride
659 *
660 * @param[in] pred_strd,
661 *  Prediction buffer stride
662 *
663 * @param[in] out_strd
664 *  recon buffer Stride
665 *
666 * @param[in] pu2_scaling_list
667 *  pointer to scaling list
668 *
669 * @param[in] pu2_norm_adjust
670 *  pointer to inverse scale matrix
671 *
672 * @param[in] u4_qp_div_6
673 *  Floor (qp/6)
674 *
675 * @param[in] pi4_tmp
676 * temporary buffer of size 1*16
677 *
678 * @returns none
679 *
680 * @remarks none
681 *
682 *******************************************************************************
683 */
684void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src,
685                                          UWORD8 *pu1_pred,
686                                          UWORD8 *pu1_out,
687                                          WORD32 pred_strd,
688                                          WORD32 out_strd,
689                                          const UWORD16 *pu2_iscal_mat,
690                                          const UWORD16 *pu2_weigh_mat,
691                                          UWORD32 u4_qp_div_6,
692                                          WORD16 *pi2_tmp,
693                                          WORD16 *pi2_dc_src)
694{
695    WORD16 *pi2_src_ptr = pi2_src;
696    WORD16 *pi2_tmp_ptr = pi2_tmp;
697    UWORD8 *pu1_pred_ptr = pu1_pred;
698    UWORD8 *pu1_out_ptr = pu1_out;
699    WORD16 x0, x1, x2, x3, i;
700    WORD32 q0, q1, q2, q3;
701    WORD16 i_macro;
702    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
703
704    /* inverse quant */
705    /*horizontal inverse transform */
706    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
707    {
708      if(i==0)
709      {
710        q0 = pi2_dc_src[0];
711      }
712      else
713      {
714        q0 = pi2_src_ptr[0];
715        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
716      }
717
718      q2 = pi2_src_ptr[2];
719      INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact,
720                4);
721
722      x0 = q0 + q2;
723      x1 = q0 - q2;
724
725      q1 = pi2_src_ptr[1];
726      INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact,
727                4);
728
729      q3 = pi2_src_ptr[3];
730      INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact,
731                4);
732
733      x2 = (q1 >> 1) - q3;
734      x3 = q1 + (q3 >> 1);
735
736      pi2_tmp_ptr[0] = x0 + x3;
737      pi2_tmp_ptr[1] = x1 + x2;
738      pi2_tmp_ptr[2] = x1 - x2;
739      pi2_tmp_ptr[3] = x0 - x3;
740
741      pi2_src_ptr += SUB_BLK_WIDTH_4x4;
742      pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
743      pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
744      pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
745    }
746
747    /* vertical inverse transform */
748    pi2_tmp_ptr = pi2_tmp;
749    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
750    {
751        pu1_pred_ptr = pu1_pred;
752        pu1_out = pu1_out_ptr;
753
754        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
755        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
756        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
757        x3 =  pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
758
759        /* inverse prediction */
760        i_macro = x0 + x3;
761        i_macro = ((i_macro + 32) >> 6);
762        i_macro += *pu1_pred_ptr;
763        *pu1_out = CLIP_U8(i_macro);
764        pu1_pred_ptr += pred_strd;
765        pu1_out += out_strd;
766
767        i_macro = x1 + x2;
768        i_macro = ((i_macro + 32) >> 6);
769        i_macro += *pu1_pred_ptr;
770        *pu1_out = CLIP_U8(i_macro);
771        pu1_pred_ptr += pred_strd;
772        pu1_out += out_strd;
773
774        i_macro = x1 - x2;
775        i_macro = ((i_macro + 32) >> 6);
776        i_macro += *pu1_pred_ptr;
777        *pu1_out = CLIP_U8(i_macro);
778        pu1_pred_ptr += pred_strd;
779        pu1_out += out_strd;
780
781        i_macro = x0 - x3;
782        i_macro = ((i_macro + 32) >> 6);
783        i_macro += *pu1_pred_ptr;
784        *pu1_out = CLIP_U8(i_macro);
785
786        pi2_tmp_ptr++;
787        pu1_out_ptr+= 2;    //Interleaved store for output
788        pu1_pred+= 2;       //Interleaved load for pred buffer
789    }
790}
791
792/*
793 ********************************************************************************
794 *
795 * @brief This function reconstructs a 4x4 sub block from quantized resiude and
796 * prediction buffer if only dc value is present for residue
797 *
798 * @par Description:
799 *  The quantized residue is first inverse quantized,
800 *  This inverse quantized content is added to the prediction buffer to recon-
801 *  struct the end output
802 *
803 * @param[in] pi2_src
804 *  quantized dc coefficient
805 *
806 * @param[in] pu1_pred
807 *  prediction 4x4 block in interleaved format
808 *
809 * @param[in] pred_strd,
810 *  Prediction buffer stride in interleaved format
811 *
812 * @param[in] out_strd
813 *  recon buffer Stride
814 *
815 * @returns none
816 *
817 * @remarks none
818 *
819 *******************************************************************************
820 */
821
822void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src,
823                                             UWORD8 *pu1_pred,
824                                             UWORD8 *pu1_out,
825                                             WORD32 pred_strd,
826                                             WORD32 out_strd,
827                                             const UWORD16 *pu2_iscal_mat,
828                                             const UWORD16 *pu2_weigh_mat,
829                                             UWORD32 u4_qp_div_6,
830                                             WORD16 *pi2_tmp,
831                                             WORD16 *pi2_dc_src)
832{
833    UWORD8 *pu1_pred_ptr = pu1_pred;
834    UWORD8 *pu1_out_ptr = pu1_out;
835    WORD32 q0;
836    WORD16 x, i_macro, i;
837    UNUSED(pi2_src);
838    UNUSED(pu2_iscal_mat);
839    UNUSED(pu2_weigh_mat);
840    UNUSED(u4_qp_div_6);
841    UNUSED(pi2_tmp);
842
843    q0 = pi2_dc_src[0];    // Restoring dc value for intra case3
844    i_macro = ((q0 + 32) >> 6);
845
846    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
847    {
848        pu1_pred_ptr = pu1_pred;
849        pu1_out = pu1_out_ptr;
850
851        /* inverse prediction */
852        x = i_macro + *pu1_pred_ptr;
853        *pu1_out =  CLIP_U8(x);
854        pu1_pred_ptr += pred_strd;
855        pu1_out += out_strd;
856
857        x = i_macro + *pu1_pred_ptr;
858        *pu1_out = CLIP_U8(x);
859        pu1_pred_ptr += pred_strd;
860        pu1_out += out_strd;
861
862        x = i_macro + *pu1_pred_ptr;
863        *pu1_out = CLIP_U8(x);
864        pu1_pred_ptr += pred_strd;
865        pu1_out += out_strd;
866
867        x = i_macro + *pu1_pred_ptr;
868        *pu1_out = CLIP_U8(x);
869
870        pu1_out_ptr+=2;
871        pu1_pred+=2;
872    }
873}
874