idct.cpp revision 3306cfee3bf38ab207a0504e49c2d492bb73ffbf
1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18/*
19------------------------------------------------------------------------------
20 MODULE DESCRIPTION
21
22 This file contains the functions that transform an 8r8 image block from
23 dequantized DCT coefficients to spatial domain pirel values by calculating
24 inverse discrete cosine transform (IDCT).
25
26------------------------------------------------------------------------------
27*/
28/*----------------------------------------------------------------------------
29; INCLUDES
30----------------------------------------------------------------------------*/
31#include "mp4dec_lib.h"
32#include "idct.h"
33#include "motion_comp.h"
34#ifndef FAST_IDCT
35
36/*
37------------------------------------------------------------------------------
38 FUNCTION NAME: idct
39------------------------------------------------------------------------------
40 INPUT AND OUTPUT DEFINITIONS FOR idct
41
42 Inputs:
43    blk = pointer to the buffer containing the dequantized DCT
44          coefficients of type int for an 8r8 image block;
45          values range from (-2048, 2047) which defined as standard.
46
47 Local Stores/Buffers/Pointers Needed:
48    None
49
50 Global Stores/Buffers/Pointers Needed:
51    None
52
53 Outputs:
54    None
55
56 Pointers and Buffers Modified:
57    blk points to the found IDCT values for an 8r8 image block.
58
59 Local Stores Modified:
60    None
61
62 Global Stores Modified:
63    None
64
65------------------------------------------------------------------------------
66 FUNCTION DESCRIPTION FOR idct
67
68 This function transforms an 8r8 image block from dequantized DCT coefficients
69 (F(u,v)) to spatial domain pirel values (f(r,y)) by performing the two
70 dimensional inverse discrete cosine transform (IDCT).
71
72         _7_ _7_      C(u) C(v)
73    f(r,y) = \   \  F(u,v)---- ----cos[(2r+1)*u*pi/16]cos[(2y+1)*v*pi/16]
74         /__ /__    2    2
75         u=0 v=0
76
77    where   C(i) = 1/sqrt(2)    if i=0
78        C(i) = 1        otherwise
79
80 2-D IDCT can be separated as horizontal(row-wise) and vertical(column-wise)
81 1-D IDCTs. Therefore, 2-D IDCT values are found by the following two steps:
82 1. Find horizontal 1-D IDCT values for each row from 8r8 dequantized DCT
83    coefficients by row IDCT operation.
84
85          _7_        C(u)
86    g(r,v) =  \   F(u,v) ---- cos[(2r+1)*u*pi/16]
87          /__         2
88          u=0
89
90 2. Find vertical 1-D IDCT values for each column from the results of 1
91    by column IDCT operation.
92
93              _7_        C(v)
94    f(r,y) =  \   g(r,v) ---- cos[(2y+1)*v*pi/16]
95          /__         2
96          v=0
97
98------------------------------------------------------------------------------
99 REQUIREMENTS FOR idct
100
101 None
102
103------------------------------------------------------------------------------
104*/
105/*  REFERENCES FOR idct */
106/* idct.c, inverse fast discrete cosine transform
107 inverse two dimensional DCT, Chen-Wang algorithm
108 (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984)
109 32-bit integer arithmetic (8 bit coefficients)
110 11 mults, 29 adds per DCT
111 sE, 18.8.91
112
113 coefficients ertended to 12 bit for IEEE1180-1990
114 compliance                           sE,  2.1.94
115*/
116
117
118/*----------------------------------------------------------------------------
119; Function Code FOR idct
120----------------------------------------------------------------------------*/
121void idct_intra(
122    int *blk, uint8 *comp, int width
123)
124{
125    /*----------------------------------------------------------------------------
126    ; Define all local variables
127    ----------------------------------------------------------------------------*/
128    int i;
129    int32   tmpBLK[64];
130    int32   *tmpBLK32 = &tmpBLK[0];
131    int32   r0, r1, r2, r3, r4, r5, r6, r7, r8; /* butterfly nodes */
132    int32   a;
133    int offset = width - 8;
134    /*----------------------------------------------------------------------------
135    ; Function body here
136    ----------------------------------------------------------------------------*/
137    /* two dimensional inverse discrete cosine transform */
138
139
140    /* column (vertical) IDCT */
141    for (i = B_SIZE - 1; i >= 0; i--)
142    {
143        /* initialize butterfly nodes at first stage */
144
145        r1 = blk[B_SIZE * 4 + i] << 11;
146        /* since row IDCT results have net left shift by 3 */
147        /* this left shift by 8 gives net left shift by 11 */
148        /* in order to maintain the same scale as that of  */
149        /* coefficients Wi */
150
151        r2 = blk[B_SIZE * 6 + i];
152        r3 = blk[B_SIZE * 2 + i];
153        r4 = blk[B_SIZE * 1 + i];
154        r5 = blk[B_SIZE * 7 + i];
155        r6 = blk[B_SIZE * 5 + i];
156        r7 = blk[B_SIZE * 3 + i];
157
158        if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
159        {
160            /* shortcut */
161            /* execute if values of g(r,1) to g(r,7) in a column*/
162            /* are all zeros */
163
164            /* make output of IDCT >>3 or scaled by 1/8 and */
165            /* with the proper rounding */
166            a = (blk[B_SIZE * 0 + i]) << 3;
167            tmpBLK32[B_SIZE * 0 + i] = a;
168            tmpBLK32[B_SIZE * 1 + i] = a;
169            tmpBLK32[B_SIZE * 2 + i] = a;
170            tmpBLK32[B_SIZE * 3 + i] = a;
171            tmpBLK32[B_SIZE * 4 + i] = a;
172            tmpBLK32[B_SIZE * 5 + i] = a;
173            tmpBLK32[B_SIZE * 6 + i] = a;
174            tmpBLK32[B_SIZE * 7 + i] = a;
175        }
176        else
177        {
178            r0 = (blk[8 * 0 + i] << 11) + 128;
179
180            /* first stage */
181
182            r8 = W7 * (r4 + r5);
183            r4 = (r8 + (W1 - W7) * r4);
184            /* Multiplication with Wi increases the net left */
185            /* shift from 11 to 14,we have to shift back by 3*/
186            r5 = (r8 - (W1 + W7) * r5);
187            r8 = W3 * (r6 + r7);
188            r6 = (r8 - (W3 - W5) * r6);
189            r7 = (r8 - (W3 + W5) * r7);
190
191            /* second stage */
192            r8 = r0 + r1;
193            r0 -= r1;
194
195            r1 = W6 * (r3 + r2);
196            r2 = (r1 - (W2 + W6) * r2);
197            r3 = (r1 + (W2 - W6) * r3);
198
199            r1 = r4 + r6;
200            r4 -= r6;
201            r6 = r5 + r7;
202            r5 -= r7;
203
204            /* third stage */
205            r7 = r8 + r3;
206            r8 -= r3;
207            r3 = r0 + r2;
208            r0 -= r2;
209            r2 = (181 * (r4 + r5) + 128) >> 8;  /* rounding */
210            r4 = (181 * (r4 - r5) + 128) >> 8;
211
212            /* fourth stage */
213            /* net shift of IDCT is >>3 after the following */
214            /* shift operation, it makes output of 2-D IDCT */
215            /* scaled by 1/8, that is scaled twice by       */
216            /* 1/(2*sqrt(2)) for row IDCT and column IDCT.  */
217            /* see detail analysis in design doc.           */
218            tmpBLK32[0 + i] = (r7 + r1) >> 8;
219            tmpBLK32[(1<<3) + i] = (r3 + r2) >> 8;
220            tmpBLK32[(2<<3) + i] = (r0 + r4) >> 8;
221            tmpBLK32[(3<<3) + i] = (r8 + r6) >> 8;
222            tmpBLK32[(4<<3) + i] = (r8 - r6) >> 8;
223            tmpBLK32[(5<<3) + i] = (r0 - r4) >> 8;
224            tmpBLK32[(6<<3) + i] = (r3 - r2) >> 8;
225            tmpBLK32[(7<<3) + i] = (r7 - r1) >> 8;
226        }
227    }
228    /* row (horizontal) IDCT */
229    for (i = 0 ; i < B_SIZE; i++)
230    {
231        /* initialize butterfly nodes at the first stage */
232
233        r1 = ((int32)tmpBLK32[4+(i<<3)]) << 8;
234        /* r1 left shift by 11 is to maintain the same  */
235        /* scale as that of coefficients (W1,...W7) */
236        /* since blk[4] won't multiply with Wi.     */
237        /* see detail diagram in design document.   */
238
239        r2 = tmpBLK32[6+(i<<3)];
240        r3 = tmpBLK32[2+(i<<3)];
241        r4 = tmpBLK32[1+(i<<3)];
242        r5 = tmpBLK32[7+(i<<3)];
243        r6 = tmpBLK32[5+(i<<3)];
244        r7 = tmpBLK32[3+(i<<3)];
245
246        if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
247        {
248            /* shortcut */
249            /* execute if values of F(1,v) to F(7,v) in a row*/
250            /* are all zeros */
251
252            /* output of row IDCT scaled by 8 */
253            a = (((int32)tmpBLK32[0+(i<<3)] + 32) >> 6);
254            CLIP_RESULT(a)
255            *comp++ = a;
256            *comp++ = a;
257            *comp++ = a;
258            *comp++ = a;
259            *comp++ = a;
260            *comp++ = a;
261            *comp++ = a;
262            *comp++ = a;
263
264            comp += offset;
265        }
266
267        else
268        {
269            /* for proper rounding in the fourth stage */
270            r0 = (((int32)tmpBLK32[0+(i<<3)]) << 8) + 8192;
271
272            /* first stage */
273
274            r8 = W7 * (r4 + r5) + 4;
275            r4 = (r8 + (W1 - W7) * r4) >> 3;
276            r5 = (r8 - (W1 + W7) * r5) >> 3;
277
278            r8 = W3 * (r6 + r7) + 4;
279            r6 = (r8 - (W3 - W5) * r6) >> 3;
280            r7 = (r8 - (W3 + W5) * r7) >> 3;
281
282            /* second stage */
283            r8 = r0 + r1;
284            r0 -= r1;
285
286            r1 = W6 * (r3 + r2) + 4;
287            r2 = (r1 - (W2 + W6) * r2) >> 3;
288            r3 = (r1 + (W2 - W6) * r3) >> 3;
289
290            r1 = r4 + r6;
291            r4 -= r6;
292            r6 = r5 + r7;
293            r5 -= r7;
294
295            /* third stage */
296            r7 = r8 + r3;
297            r8 -= r3;
298            r3 = r0 + r2;
299            r0 -= r2;
300            r2 = (181 * (r4 + r5) + 128) >> 8;    /* rounding */
301            r4 = (181 * (r4 - r5) + 128) >> 8;
302
303            /* fourth stage */
304            /* net shift of this function is <<3 after the    */
305            /* following shift operation, it makes output of  */
306            /* row IDCT scaled by 8 to retain 3 bits precision*/
307            a = ((r7 + r1) >> 14);
308            CLIP_RESULT(a)
309            *comp++ = a;
310            a = ((r3 + r2) >> 14);
311            CLIP_RESULT(a)
312            *comp++ = a;
313            a = ((r0 + r4) >> 14);
314            CLIP_RESULT(a)
315            *comp++ = a;
316            a = ((r8 + r6) >> 14);
317            CLIP_RESULT(a)
318            *comp++ = a;
319            a = ((r8 - r6) >> 14);
320            CLIP_RESULT(a)
321            *comp++ = a;
322            a = ((r0 - r4) >> 14);
323            CLIP_RESULT(a)
324            *comp++ = a;
325            a = ((r3 - r2) >> 14);
326            CLIP_RESULT(a)
327            *comp++ = a;
328            a = ((r7 - r1) >> 14);
329            CLIP_RESULT(a)
330            *comp++ = a;
331
332            comp += offset;
333        }
334    }
335
336
337
338    /*----------------------------------------------------------------------------
339    ; Return nothing or data or data pointer
340    ----------------------------------------------------------------------------*/
341    return;
342}
343
344void idct(
345    int *blk, uint8 *pred, uint8 *dst, int width)
346{
347    /*----------------------------------------------------------------------------
348    ; Define all local variables
349    ----------------------------------------------------------------------------*/
350    int i;
351    int32   tmpBLK[64];
352    int32   *tmpBLK32 = &tmpBLK[0];
353    int32   r0, r1, r2, r3, r4, r5, r6, r7, r8; /* butterfly nodes */
354    int32   a;
355    int res;
356
357    /*----------------------------------------------------------------------------
358    ; Function body here
359    ----------------------------------------------------------------------------*/
360    /* two dimensional inverse discrete cosine transform */
361
362
363    /* column (vertical) IDCT */
364    for (i = B_SIZE - 1; i >= 0; i--)
365    {
366        /* initialize butterfly nodes at first stage */
367
368        r1 = blk[B_SIZE * 4 + i] << 11;
369        /* since row IDCT results have net left shift by 3 */
370        /* this left shift by 8 gives net left shift by 11 */
371        /* in order to maintain the same scale as that of  */
372        /* coefficients Wi */
373
374        r2 = blk[B_SIZE * 6 + i];
375        r3 = blk[B_SIZE * 2 + i];
376        r4 = blk[B_SIZE * 1 + i];
377        r5 = blk[B_SIZE * 7 + i];
378        r6 = blk[B_SIZE * 5 + i];
379        r7 = blk[B_SIZE * 3 + i];
380
381        if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
382        {
383            /* shortcut */
384            /* execute if values of g(r,1) to g(r,7) in a column*/
385            /* are all zeros */
386
387            /* make output of IDCT >>3 or scaled by 1/8 and */
388            /* with the proper rounding */
389            a = (blk[B_SIZE * 0 + i]) << 3;
390            tmpBLK32[B_SIZE * 0 + i] = a;
391            tmpBLK32[B_SIZE * 1 + i] = a;
392            tmpBLK32[B_SIZE * 2 + i] = a;
393            tmpBLK32[B_SIZE * 3 + i] = a;
394            tmpBLK32[B_SIZE * 4 + i] = a;
395            tmpBLK32[B_SIZE * 5 + i] = a;
396            tmpBLK32[B_SIZE * 6 + i] = a;
397            tmpBLK32[B_SIZE * 7 + i] = a;
398        }
399        else
400        {
401            r0 = (blk[8 * 0 + i] << 11) + 128;
402
403            /* first stage */
404
405            r8 = W7 * (r4 + r5);
406            r4 = (r8 + (W1 - W7) * r4);
407            /* Multiplication with Wi increases the net left */
408            /* shift from 11 to 14,we have to shift back by 3*/
409            r5 = (r8 - (W1 + W7) * r5);
410            r8 = W3 * (r6 + r7);
411            r6 = (r8 - (W3 - W5) * r6);
412            r7 = (r8 - (W3 + W5) * r7);
413
414            /* second stage */
415            r8 = r0 + r1;
416            r0 -= r1;
417
418            r1 = W6 * (r3 + r2);
419            r2 = (r1 - (W2 + W6) * r2);
420            r3 = (r1 + (W2 - W6) * r3);
421
422            r1 = r4 + r6;
423            r4 -= r6;
424            r6 = r5 + r7;
425            r5 -= r7;
426
427            /* third stage */
428            r7 = r8 + r3;
429            r8 -= r3;
430            r3 = r0 + r2;
431            r0 -= r2;
432            r2 = (181 * (r4 + r5) + 128) >> 8;  /* rounding */
433            r4 = (181 * (r4 - r5) + 128) >> 8;
434
435            /* fourth stage */
436            /* net shift of IDCT is >>3 after the following */
437            /* shift operation, it makes output of 2-D IDCT */
438            /* scaled by 1/8, that is scaled twice by       */
439            /* 1/(2*sqrt(2)) for row IDCT and column IDCT.  */
440            /* see detail analysis in design doc.           */
441            tmpBLK32[0 + i] = (r7 + r1) >> 8;
442            tmpBLK32[(1<<3) + i] = (r3 + r2) >> 8;
443            tmpBLK32[(2<<3) + i] = (r0 + r4) >> 8;
444            tmpBLK32[(3<<3) + i] = (r8 + r6) >> 8;
445            tmpBLK32[(4<<3) + i] = (r8 - r6) >> 8;
446            tmpBLK32[(5<<3) + i] = (r0 - r4) >> 8;
447            tmpBLK32[(6<<3) + i] = (r3 - r2) >> 8;
448            tmpBLK32[(7<<3) + i] = (r7 - r1) >> 8;
449        }
450    }
451    /* row (horizontal) IDCT */
452    for (i = B_SIZE - 1; i >= 0; i--)
453    {
454        /* initialize butterfly nodes at the first stage */
455
456        r1 = ((int32)tmpBLK32[4+(i<<3)]) << 8;
457        /* r1 left shift by 11 is to maintain the same  */
458        /* scale as that of coefficients (W1,...W7) */
459        /* since blk[4] won't multiply with Wi.     */
460        /* see detail diagram in design document.   */
461
462        r2 = tmpBLK32[6+(i<<3)];
463        r3 = tmpBLK32[2+(i<<3)];
464        r4 = tmpBLK32[1+(i<<3)];
465        r5 = tmpBLK32[7+(i<<3)];
466        r6 = tmpBLK32[5+(i<<3)];
467        r7 = tmpBLK32[3+(i<<3)];
468
469        if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
470        {
471            /* shortcut */
472            /* execute if values of F(1,v) to F(7,v) in a row*/
473            /* are all zeros */
474
475            /* output of row IDCT scaled by 8 */
476            a = (tmpBLK32[0+(i<<3)] + 32) >> 6;
477            blk[0+(i<<3)] = a;
478            blk[1+(i<<3)] = a;
479            blk[2+(i<<3)] = a;
480            blk[3+(i<<3)] = a;
481            blk[4+(i<<3)] = a;
482            blk[5+(i<<3)] = a;
483            blk[6+(i<<3)] = a;
484            blk[7+(i<<3)] = a;
485
486        }
487
488        else
489        {
490            /* for proper rounding in the fourth stage */
491            r0 = (((int32)tmpBLK32[0+(i<<3)]) << 8) + 8192;
492
493            /* first stage */
494
495            r8 = W7 * (r4 + r5) + 4;
496            r4 = (r8 + (W1 - W7) * r4) >> 3;
497            r5 = (r8 - (W1 + W7) * r5) >> 3;
498
499            r8 = W3 * (r6 + r7) + 4;
500            r6 = (r8 - (W3 - W5) * r6) >> 3;
501            r7 = (r8 - (W3 + W5) * r7) >> 3;
502
503            /* second stage */
504            r8 = r0 + r1;
505            r0 -= r1;
506
507            r1 = W6 * (r3 + r2) + 4;
508            r2 = (r1 - (W2 + W6) * r2) >> 3;
509            r3 = (r1 + (W2 - W6) * r3) >> 3;
510
511            r1 = r4 + r6;
512            r4 -= r6;
513            r6 = r5 + r7;
514            r5 -= r7;
515
516            /* third stage */
517            r7 = r8 + r3;
518            r8 -= r3;
519            r3 = r0 + r2;
520            r0 -= r2;
521            r2 = (181 * (r4 + r5) + 128) >> 8;    /* rounding */
522            r4 = (181 * (r4 - r5) + 128) >> 8;
523
524            /* fourth stage */
525            /* net shift of this function is <<3 after the    */
526            /* following shift operation, it makes output of  */
527            /* row IDCT scaled by 8 to retain 3 bits precision*/
528            blk[0+(i<<3)] = (r7 + r1) >> 14;
529            blk[1+(i<<3)] = (r3 + r2) >> 14;
530            blk[2+(i<<3)] = (r0 + r4) >> 14;
531            blk[3+(i<<3)] = (r8 + r6) >> 14;
532            blk[4+(i<<3)] = (r8 - r6) >> 14;
533            blk[5+(i<<3)] = (r0 - r4) >> 14;
534            blk[6+(i<<3)] = (r3 - r2) >> 14;
535            blk[7+(i<<3)] = (r7 - r1) >> 14;
536        }
537        /*  add with prediction ,  08/03/05 */
538        res = (*pred++ + block[0+(i<<3)]);
539        CLIP_RESULT(res);
540        *dst++ = res;
541        res = (*pred++ + block[1+(i<<3)]);
542        CLIP_RESULT(res);
543        *dst++ = res;
544        res = (*pred++ + block[2+(i<<3)]);
545        CLIP_RESULT(res);
546        *dst++ = res;
547        res = (*pred++ + block[3+(i<<3)]);
548        CLIP_RESULT(res);
549        *dst++ = res;
550        res = (*pred++ + block[4+(i<<3)]);
551        CLIP_RESULT(res);
552        *dst++ = res;
553        res = (*pred++ + block[5+(i<<3)]);
554        CLIP_RESULT(res);
555        *dst++ = res;
556        res = (*pred++ + block[6+(i<<3)]);
557        CLIP_RESULT(res);
558        *dst++ = res;
559        res = (*pred++ + block[7+(i<<3)]);
560        CLIP_RESULT(res);
561        *dst++ = res;
562
563        pred += 8;
564        dst += (width - 8);
565    }
566
567
568
569    /*----------------------------------------------------------------------------
570    ; Return nothing or data or data pointer
571    ----------------------------------------------------------------------------*/
572    return;
573}
574
575#endif
576/*----------------------------------------------------------------------------
577; End Function: idct
578----------------------------------------------------------------------------*/
579
580