1/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19 *******************************************************************************
20 * @file
21 *  ihevc_itrans_recon_8x8.c
22 *
23 * @brief
24 *  Contains function definitions for inverse transform  and reconstruction 8x8
25 *
26 *
27 * @author
28 *  100470
29 *
30 * @par List of Functions:
31 *  - ihevc_itrans_recon_8x8()
32 *
33 * @remarks
34 *  None
35 *
36 *******************************************************************************
37 */
38#include <stdio.h>
39#include <string.h>
40#include "ihevc_typedefs.h"
41#include "ihevc_macros.h"
42#include "ihevc_platform_macros.h"
43#include "ihevc_defs.h"
44#include "ihevc_trans_tables.h"
45#include "ihevc_itrans_recon.h"
46#include "ihevc_func_selector.h"
47#include "ihevc_trans_macros.h"
48
49/**
50 *******************************************************************************
51 *
52 * @brief
53 *  This function performs Inverse transform  and reconstruction for 8x8
54 * input block
55 *
56 * @par Description:
57 *  Performs inverse transform and adds the prediction  data and clips output
58 * to 8 bit
59 *
60 * @param[in] pi2_src
61 *  Input 8x8 coefficients
62 *
63 * @param[in] pi2_tmp
64 *  Temporary 8x8 buffer for storing inverse
65 *
66 *  transform
67 *  1st stage output
68 *
69 * @param[in] pu1_pred
70 *  Prediction 8x8 block
71 *
72 * @param[out] pu1_dst
73 *  Output 8x8 block
74 *
75 * @param[in] src_strd
76 *  Input stride
77 *
78 * @param[in] pred_strd
79 *  Prediction stride
80 *
81 * @param[in] dst_strd
82 *  Output Stride
83 *
84 * @param[in] shift
85 *  Output shift
86 *
87 * @param[in] zero_cols
88 *  Zero columns in pi2_src
89 *
90 * @returns  Void
91 *
92 * @remarks
93 *  None
94 *
95 *******************************************************************************
96 */
97
98void ihevc_itrans_recon_8x8(WORD16 *pi2_src,
99                            WORD16 *pi2_tmp,
100                            UWORD8 *pu1_pred,
101                            UWORD8 *pu1_dst,
102                            WORD32 src_strd,
103                            WORD32 pred_strd,
104                            WORD32 dst_strd,
105                            WORD32 zero_cols,
106                            WORD32 zero_rows)
107{
108    WORD32 j, k;
109    WORD32 e[4], o[4];
110    WORD32 ee[2], eo[2];
111    WORD32 add;
112    WORD32 shift;
113    WORD16 *pi2_tmp_orig;
114    WORD32 trans_size;
115    WORD32 zero_rows_2nd_stage = zero_cols;
116    WORD32 row_limit_2nd_stage;
117
118    trans_size = TRANS_SIZE_8;
119
120    pi2_tmp_orig = pi2_tmp;
121
122    if((zero_cols & 0xF0) == 0xF0)
123        row_limit_2nd_stage = 4;
124    else
125        row_limit_2nd_stage = TRANS_SIZE_8;
126
127
128    if((zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
129    {
130        /************************************************************************************************/
131        /**********************************START - IT_RECON_8x8******************************************/
132        /************************************************************************************************/
133
134        /* Inverse Transform 1st stage */
135        shift = IT_SHIFT_STAGE_1;
136        add = 1 << (shift - 1);
137
138        for(j = 0; j < row_limit_2nd_stage; j++)
139        {
140            /* Checking for Zero Cols */
141            if((zero_cols & 1) == 1)
142            {
143                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
144            }
145            else
146            {
147                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
148                for(k = 0; k < 4; k++)
149                {
150                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
151                                    + g_ai2_ihevc_trans_8[3][k]
152                                                    * pi2_src[3 * src_strd];
153                }
154                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd];
155                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd];
156                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0];
157                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0];
158
159                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
160                e[0] = ee[0] + eo[0];
161                e[3] = ee[0] - eo[0];
162                e[1] = ee[1] + eo[1];
163                e[2] = ee[1] - eo[1];
164                for(k = 0; k < 4; k++)
165                {
166                    pi2_tmp[k] =
167                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
168                    pi2_tmp[k + 4] =
169                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
170                }
171            }
172            pi2_src++;
173            pi2_tmp += trans_size;
174            zero_cols = zero_cols >> 1;
175        }
176
177        pi2_tmp = pi2_tmp_orig;
178
179        /* Inverse Transform 2nd stage */
180        shift = IT_SHIFT_STAGE_2;
181        add = 1 << (shift - 1);
182        if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
183        {
184            for(j = 0; j < trans_size; j++)
185            {
186                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
187                for(k = 0; k < 4; k++)
188                {
189                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
190                                    + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
191                }
192                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
193                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
194                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
195                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
196
197                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
198                e[0] = ee[0] + eo[0];
199                e[3] = ee[0] - eo[0];
200                e[1] = ee[1] + eo[1];
201                e[2] = ee[1] - eo[1];
202                for(k = 0; k < 4; k++)
203                {
204                    WORD32 itrans_out;
205                    itrans_out =
206                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
207                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
208                    itrans_out =
209                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
210                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
211                }
212                pi2_tmp++;
213                pu1_pred += pred_strd;
214                pu1_dst += dst_strd;
215            }
216        }
217        else /* All rows of output of 1st stage are non-zero */
218        {
219            for(j = 0; j < trans_size; j++)
220            {
221                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
222                for(k = 0; k < 4; k++)
223                {
224                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
225                                    + g_ai2_ihevc_trans_8[3][k]
226                                                    * pi2_tmp[3 * trans_size]
227                                    + g_ai2_ihevc_trans_8[5][k]
228                                                    * pi2_tmp[5 * trans_size]
229                                    + g_ai2_ihevc_trans_8[7][k]
230                                                    * pi2_tmp[7 * trans_size];
231                }
232
233                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
234                                + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
235                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
236                                + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
237                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
238                                + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
239                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
240                                + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
241
242                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
243                e[0] = ee[0] + eo[0];
244                e[3] = ee[0] - eo[0];
245                e[1] = ee[1] + eo[1];
246                e[2] = ee[1] - eo[1];
247                for(k = 0; k < 4; k++)
248                {
249                    WORD32 itrans_out;
250                    itrans_out =
251                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
252                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
253                    itrans_out =
254                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
255                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
256                }
257                pi2_tmp++;
258                pu1_pred += pred_strd;
259                pu1_dst += dst_strd;
260            }
261        }
262        /************************************************************************************************/
263        /************************************END - IT_RECON_8x8******************************************/
264        /************************************************************************************************/
265    }
266    else /* All rows of input are non-zero */
267    {
268        /************************************************************************************************/
269        /**********************************START - IT_RECON_8x8******************************************/
270        /************************************************************************************************/
271
272        /* Inverse Transform 1st stage */
273        shift = IT_SHIFT_STAGE_1;
274        add = 1 << (shift - 1);
275
276        for(j = 0; j < row_limit_2nd_stage; j++)
277        {
278            /* Checking for Zero Cols */
279            if((zero_cols & 1) == 1)
280            {
281                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
282            }
283            else
284            {
285                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
286                for(k = 0; k < 4; k++)
287                {
288                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
289                                    + g_ai2_ihevc_trans_8[3][k]
290                                                    * pi2_src[3 * src_strd]
291                                    + g_ai2_ihevc_trans_8[5][k]
292                                                    * pi2_src[5 * src_strd]
293                                    + g_ai2_ihevc_trans_8[7][k]
294                                                    * pi2_src[7 * src_strd];
295                }
296
297                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd]
298                                + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd];
299                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd]
300                                + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd];
301                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0]
302                                + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd];
303                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0]
304                                + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd];
305
306                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
307                e[0] = ee[0] + eo[0];
308                e[3] = ee[0] - eo[0];
309                e[1] = ee[1] + eo[1];
310                e[2] = ee[1] - eo[1];
311                for(k = 0; k < 4; k++)
312                {
313                    pi2_tmp[k] =
314                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
315                    pi2_tmp[k + 4] =
316                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
317                }
318            }
319            pi2_src++;
320            pi2_tmp += trans_size;
321            zero_cols = zero_cols >> 1;
322        }
323
324        pi2_tmp = pi2_tmp_orig;
325
326        /* Inverse Transform 2nd stage */
327        shift = IT_SHIFT_STAGE_2;
328        add = 1 << (shift - 1);
329        if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
330        {
331            for(j = 0; j < trans_size; j++)
332            {
333                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
334                for(k = 0; k < 4; k++)
335                {
336                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
337                                    + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
338                }
339                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
340                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
341                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
342                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
343
344                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
345                e[0] = ee[0] + eo[0];
346                e[3] = ee[0] - eo[0];
347                e[1] = ee[1] + eo[1];
348                e[2] = ee[1] - eo[1];
349                for(k = 0; k < 4; k++)
350                {
351                    WORD32 itrans_out;
352                    itrans_out =
353                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
354                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
355                    itrans_out =
356                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
357                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
358                }
359                pi2_tmp++;
360                pu1_pred += pred_strd;
361                pu1_dst += dst_strd;
362            }
363        }
364        else /* All rows of output of 1st stage are non-zero */
365        {
366            for(j = 0; j < trans_size; j++)
367            {
368                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
369                for(k = 0; k < 4; k++)
370                {
371                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
372                                    + g_ai2_ihevc_trans_8[3][k]
373                                                    * pi2_tmp[3 * trans_size]
374                                    + g_ai2_ihevc_trans_8[5][k]
375                                                    * pi2_tmp[5 * trans_size]
376                                    + g_ai2_ihevc_trans_8[7][k]
377                                                    * pi2_tmp[7 * trans_size];
378                }
379
380                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
381                                + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
382                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
383                                + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
384                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
385                                + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
386                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
387                                + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
388
389                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
390                e[0] = ee[0] + eo[0];
391                e[3] = ee[0] - eo[0];
392                e[1] = ee[1] + eo[1];
393                e[2] = ee[1] - eo[1];
394                for(k = 0; k < 4; k++)
395                {
396                    WORD32 itrans_out;
397                    itrans_out =
398                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
399                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
400                    itrans_out =
401                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
402                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
403                }
404                pi2_tmp++;
405                pu1_pred += pred_strd;
406                pu1_dst += dst_strd;
407            }
408        }
409        /************************************************************************************************/
410        /************************************END - IT_RECON_8x8******************************************/
411        /************************************************************************************************/
412    }
413}
414
415