13306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/* ------------------------------------------------------------------
23306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * Copyright (C) 1998-2009 PacketVideo
33306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong *
43306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * Licensed under the Apache License, Version 2.0 (the "License");
53306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * you may not use this file except in compliance with the License.
63306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * You may obtain a copy of the License at
73306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong *
83306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong *      http://www.apache.org/licenses/LICENSE-2.0
93306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong *
103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * Unless required by applicable law or agreed to in writing, software
113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * distributed under the License is distributed on an "AS IS" BASIS,
123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * express or implied.
143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * See the License for the specific language governing permissions
153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * and limitations under the License.
163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * -------------------------------------------------------------------
173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong */
183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/*
193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong------------------------------------------------------------------------------
203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong MODULE DESCRIPTION
213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong This file contains the functions that transform an 8r8 image block from
233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong dequantized DCT coefficients to spatial domain pirel values by calculating
243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong inverse discrete cosine transform (IDCT).
253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong------------------------------------------------------------------------------
273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong*/
283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/*----------------------------------------------------------------------------
293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong; INCLUDES
303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong----------------------------------------------------------------------------*/
313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong#include "mp4dec_lib.h"
323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong#include "idct.h"
333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong#include "motion_comp.h"
343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong#ifndef FAST_IDCT
353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/*
373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong------------------------------------------------------------------------------
383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong FUNCTION NAME: idct
393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong------------------------------------------------------------------------------
403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong INPUT AND OUTPUT DEFINITIONS FOR idct
413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Inputs:
433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    blk = pointer to the buffer containing the dequantized DCT
443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong          coefficients of type int for an 8r8 image block;
453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong          values range from (-2048, 2047) which defined as standard.
463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Local Stores/Buffers/Pointers Needed:
483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    None
493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Global Stores/Buffers/Pointers Needed:
513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    None
523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Outputs:
543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    None
553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Pointers and Buffers Modified:
573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    blk points to the found IDCT values for an 8r8 image block.
583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Local Stores Modified:
603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    None
613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Global Stores Modified:
633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    None
643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong------------------------------------------------------------------------------
663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong FUNCTION DESCRIPTION FOR idct
673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong This function transforms an 8r8 image block from dequantized DCT coefficients
693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong (F(u,v)) to spatial domain pirel values (f(r,y)) by performing the two
703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong dimensional inverse discrete cosine transform (IDCT).
713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong         _7_ _7_      C(u) C(v)
733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    f(r,y) = \   \  F(u,v)---- ----cos[(2r+1)*u*pi/16]cos[(2y+1)*v*pi/16]
743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong         /__ /__    2    2
753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong         u=0 v=0
763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    where   C(i) = 1/sqrt(2)    if i=0
783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        C(i) = 1        otherwise
793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong 2-D IDCT can be separated as horizontal(row-wise) and vertical(column-wise)
813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong 1-D IDCTs. Therefore, 2-D IDCT values are found by the following two steps:
823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong 1. Find horizontal 1-D IDCT values for each row from 8r8 dequantized DCT
833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    coefficients by row IDCT operation.
843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong          _7_        C(u)
863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    g(r,v) =  \   F(u,v) ---- cos[(2r+1)*u*pi/16]
873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong          /__         2
883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong          u=0
893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong 2. Find vertical 1-D IDCT values for each column from the results of 1
913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    by column IDCT operation.
923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong              _7_        C(v)
943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    f(r,y) =  \   g(r,v) ---- cos[(2y+1)*v*pi/16]
953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong          /__         2
963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong          v=0
973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong------------------------------------------------------------------------------
993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong REQUIREMENTS FOR idct
1003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong None
1023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong------------------------------------------------------------------------------
1043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong*/
1053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/*  REFERENCES FOR idct */
1063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/* idct.c, inverse fast discrete cosine transform
1073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong inverse two dimensional DCT, Chen-Wang algorithm
1083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984)
1093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong 32-bit integer arithmetic (8 bit coefficients)
1103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong 11 mults, 29 adds per DCT
1113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong sE, 18.8.91
1123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong coefficients ertended to 12 bit for IEEE1180-1990
1143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong compliance                           sE,  2.1.94
1153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong*/
1163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/*----------------------------------------------------------------------------
1193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong; Function Code FOR idct
1203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong----------------------------------------------------------------------------*/
1213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dongvoid idct_intra(
1223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int *blk, uint8 *comp, int width
1233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong)
1243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong{
1253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /*----------------------------------------------------------------------------
1263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    ; Define all local variables
1273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    ----------------------------------------------------------------------------*/
1283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int i;
1293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int32   tmpBLK[64];
1303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int32   *tmpBLK32 = &tmpBLK[0];
1313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int32   r0, r1, r2, r3, r4, r5, r6, r7, r8; /* butterfly nodes */
1323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int32   a;
1333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int offset = width - 8;
1343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /*----------------------------------------------------------------------------
1353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    ; Function body here
1363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    ----------------------------------------------------------------------------*/
1373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /* two dimensional inverse discrete cosine transform */
1383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /* column (vertical) IDCT */
1413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    for (i = B_SIZE - 1; i >= 0; i--)
1423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
1433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* initialize butterfly nodes at first stage */
1443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r1 = blk[B_SIZE * 4 + i] << 11;
1463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* since row IDCT results have net left shift by 3 */
1473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* this left shift by 8 gives net left shift by 11 */
1483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* in order to maintain the same scale as that of  */
1493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* coefficients Wi */
1503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r2 = blk[B_SIZE * 6 + i];
1523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r3 = blk[B_SIZE * 2 + i];
1533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r4 = blk[B_SIZE * 1 + i];
1543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r5 = blk[B_SIZE * 7 + i];
1553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r6 = blk[B_SIZE * 5 + i];
1563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r7 = blk[B_SIZE * 3 + i];
1573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
1593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
1603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* shortcut */
1613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* execute if values of g(r,1) to g(r,7) in a column*/
1623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* are all zeros */
1633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* make output of IDCT >>3 or scaled by 1/8 and */
1653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* with the proper rounding */
1663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            a = (blk[B_SIZE * 0 + i]) << 3;
1673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 0 + i] = a;
1683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 1 + i] = a;
1693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 2 + i] = a;
1703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 3 + i] = a;
1713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 4 + i] = a;
1723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 5 + i] = a;
1733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 6 + i] = a;
1743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 7 + i] = a;
1753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
1763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        else
1773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
1783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r0 = (blk[8 * 0 + i] << 11) + 128;
1793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* first stage */
1813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 = W7 * (r4 + r5);
1833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r4 = (r8 + (W1 - W7) * r4);
1843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* Multiplication with Wi increases the net left */
1853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* shift from 11 to 14,we have to shift back by 3*/
1863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r5 = (r8 - (W1 + W7) * r5);
1873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 = W3 * (r6 + r7);
1883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r6 = (r8 - (W3 - W5) * r6);
1893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r7 = (r8 - (W3 + W5) * r7);
1903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* second stage */
1923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 = r0 + r1;
1933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r0 -= r1;
1943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r1 = W6 * (r3 + r2);
1963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r2 = (r1 - (W2 + W6) * r2);
1973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r3 = (r1 + (W2 - W6) * r3);
1983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r1 = r4 + r6;
2003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r4 -= r6;
2013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r6 = r5 + r7;
2023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r5 -= r7;
2033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* third stage */
2053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r7 = r8 + r3;
2063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 -= r3;
2073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r3 = r0 + r2;
2083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r0 -= r2;
2093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r2 = (181 * (r4 + r5) + 128) >> 8;  /* rounding */
2103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r4 = (181 * (r4 - r5) + 128) >> 8;
2113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* fourth stage */
2133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* net shift of IDCT is >>3 after the following */
2143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* shift operation, it makes output of 2-D IDCT */
2153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* scaled by 1/8, that is scaled twice by       */
2163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* 1/(2*sqrt(2)) for row IDCT and column IDCT.  */
2173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* see detail analysis in design doc.           */
2183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[0 + i] = (r7 + r1) >> 8;
2193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(1<<3) + i] = (r3 + r2) >> 8;
2203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(2<<3) + i] = (r0 + r4) >> 8;
2213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(3<<3) + i] = (r8 + r6) >> 8;
2223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(4<<3) + i] = (r8 - r6) >> 8;
2233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(5<<3) + i] = (r0 - r4) >> 8;
2243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(6<<3) + i] = (r3 - r2) >> 8;
2253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(7<<3) + i] = (r7 - r1) >> 8;
2263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
2273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
2283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /* row (horizontal) IDCT */
2293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    for (i = 0 ; i < B_SIZE; i++)
2303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
2313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* initialize butterfly nodes at the first stage */
2323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r1 = ((int32)tmpBLK32[4+(i<<3)]) << 8;
2343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* r1 left shift by 11 is to maintain the same  */
2353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* scale as that of coefficients (W1,...W7) */
2363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* since blk[4] won't multiply with Wi.     */
2373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* see detail diagram in design document.   */
2383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r2 = tmpBLK32[6+(i<<3)];
2403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r3 = tmpBLK32[2+(i<<3)];
2413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r4 = tmpBLK32[1+(i<<3)];
2423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r5 = tmpBLK32[7+(i<<3)];
2433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r6 = tmpBLK32[5+(i<<3)];
2443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r7 = tmpBLK32[3+(i<<3)];
2453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
2473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
2483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* shortcut */
2493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* execute if values of F(1,v) to F(7,v) in a row*/
2503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* are all zeros */
2513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* output of row IDCT scaled by 8 */
2533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            a = (((int32)tmpBLK32[0+(i<<3)] + 32) >> 6);
2543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            CLIP_RESULT(a)
2553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
2563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
2573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
2583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
2593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
2603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
2613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
2623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
2633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            comp += offset;
2653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
2663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        else
2683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
2693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* for proper rounding in the fourth stage */
2703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r0 = (((int32)tmpBLK32[0+(i<<3)]) << 8) + 8192;
2713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* first stage */
2733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 = W7 * (r4 + r5) + 4;
2753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r4 = (r8 + (W1 - W7) * r4) >> 3;
2763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r5 = (r8 - (W1 + W7) * r5) >> 3;
2773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 = W3 * (r6 + r7) + 4;
2793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r6 = (r8 - (W3 - W5) * r6) >> 3;
2803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r7 = (r8 - (W3 + W5) * r7) >> 3;
2813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* second stage */
2833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 = r0 + r1;
2843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r0 -= r1;
2853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r1 = W6 * (r3 + r2) + 4;
2873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r2 = (r1 - (W2 + W6) * r2) >> 3;
2883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r3 = (r1 + (W2 - W6) * r3) >> 3;
2893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r1 = r4 + r6;
2913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r4 -= r6;
2923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r6 = r5 + r7;
2933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r5 -= r7;
2943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* third stage */
2963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r7 = r8 + r3;
2973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 -= r3;
2983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r3 = r0 + r2;
2993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r0 -= r2;
3003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r2 = (181 * (r4 + r5) + 128) >> 8;    /* rounding */
3013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r4 = (181 * (r4 - r5) + 128) >> 8;
3023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* fourth stage */
3043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* net shift of this function is <<3 after the    */
3053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* following shift operation, it makes output of  */
3063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* row IDCT scaled by 8 to retain 3 bits precision*/
3073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            a = ((r7 + r1) >> 14);
3083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            CLIP_RESULT(a)
3093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
3103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            a = ((r3 + r2) >> 14);
3113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            CLIP_RESULT(a)
3123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
3133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            a = ((r0 + r4) >> 14);
3143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            CLIP_RESULT(a)
3153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
3163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            a = ((r8 + r6) >> 14);
3173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            CLIP_RESULT(a)
3183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
3193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            a = ((r8 - r6) >> 14);
3203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            CLIP_RESULT(a)
3213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
3223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            a = ((r0 - r4) >> 14);
3233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            CLIP_RESULT(a)
3243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
3253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            a = ((r3 - r2) >> 14);
3263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            CLIP_RESULT(a)
3273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
3283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            a = ((r7 - r1) >> 14);
3293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            CLIP_RESULT(a)
3303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *comp++ = a;
3313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            comp += offset;
3333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
3343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
3353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /*----------------------------------------------------------------------------
3393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    ; Return nothing or data or data pointer
3403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    ----------------------------------------------------------------------------*/
3413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    return;
3423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong}
3433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dongvoid idct(
3453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int *blk, uint8 *pred, uint8 *dst, int width)
3463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong{
3473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /*----------------------------------------------------------------------------
3483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    ; Define all local variables
3493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    ----------------------------------------------------------------------------*/
3503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int i;
3513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int32   tmpBLK[64];
3523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int32   *tmpBLK32 = &tmpBLK[0];
3533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int32   r0, r1, r2, r3, r4, r5, r6, r7, r8; /* butterfly nodes */
3543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int32   a;
3553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int res;
3563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /*----------------------------------------------------------------------------
3583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    ; Function body here
3593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    ----------------------------------------------------------------------------*/
3603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /* two dimensional inverse discrete cosine transform */
3613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /* column (vertical) IDCT */
3643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    for (i = B_SIZE - 1; i >= 0; i--)
3653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
3663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* initialize butterfly nodes at first stage */
3673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r1 = blk[B_SIZE * 4 + i] << 11;
3693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* since row IDCT results have net left shift by 3 */
3703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* this left shift by 8 gives net left shift by 11 */
3713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* in order to maintain the same scale as that of  */
3723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* coefficients Wi */
3733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r2 = blk[B_SIZE * 6 + i];
3753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r3 = blk[B_SIZE * 2 + i];
3763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r4 = blk[B_SIZE * 1 + i];
3773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r5 = blk[B_SIZE * 7 + i];
3783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r6 = blk[B_SIZE * 5 + i];
3793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r7 = blk[B_SIZE * 3 + i];
3803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
3823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
3833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* shortcut */
3843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* execute if values of g(r,1) to g(r,7) in a column*/
3853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* are all zeros */
3863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* make output of IDCT >>3 or scaled by 1/8 and */
3883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* with the proper rounding */
3893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            a = (blk[B_SIZE * 0 + i]) << 3;
3903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 0 + i] = a;
3913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 1 + i] = a;
3923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 2 + i] = a;
3933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 3 + i] = a;
3943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 4 + i] = a;
3953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 5 + i] = a;
3963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 6 + i] = a;
3973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[B_SIZE * 7 + i] = a;
3983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
3993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        else
4003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
4013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r0 = (blk[8 * 0 + i] << 11) + 128;
4023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* first stage */
4043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 = W7 * (r4 + r5);
4063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r4 = (r8 + (W1 - W7) * r4);
4073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* Multiplication with Wi increases the net left */
4083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* shift from 11 to 14,we have to shift back by 3*/
4093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r5 = (r8 - (W1 + W7) * r5);
4103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 = W3 * (r6 + r7);
4113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r6 = (r8 - (W3 - W5) * r6);
4123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r7 = (r8 - (W3 + W5) * r7);
4133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* second stage */
4153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 = r0 + r1;
4163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r0 -= r1;
4173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r1 = W6 * (r3 + r2);
4193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r2 = (r1 - (W2 + W6) * r2);
4203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r3 = (r1 + (W2 - W6) * r3);
4213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r1 = r4 + r6;
4233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r4 -= r6;
4243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r6 = r5 + r7;
4253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r5 -= r7;
4263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* third stage */
4283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r7 = r8 + r3;
4293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 -= r3;
4303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r3 = r0 + r2;
4313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r0 -= r2;
4323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r2 = (181 * (r4 + r5) + 128) >> 8;  /* rounding */
4333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r4 = (181 * (r4 - r5) + 128) >> 8;
4343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* fourth stage */
4363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* net shift of IDCT is >>3 after the following */
4373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* shift operation, it makes output of 2-D IDCT */
4383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* scaled by 1/8, that is scaled twice by       */
4393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* 1/(2*sqrt(2)) for row IDCT and column IDCT.  */
4403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* see detail analysis in design doc.           */
4413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[0 + i] = (r7 + r1) >> 8;
4423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(1<<3) + i] = (r3 + r2) >> 8;
4433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(2<<3) + i] = (r0 + r4) >> 8;
4443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(3<<3) + i] = (r8 + r6) >> 8;
4453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(4<<3) + i] = (r8 - r6) >> 8;
4463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(5<<3) + i] = (r0 - r4) >> 8;
4473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(6<<3) + i] = (r3 - r2) >> 8;
4483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            tmpBLK32[(7<<3) + i] = (r7 - r1) >> 8;
4493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
4503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
4513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /* row (horizontal) IDCT */
4523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    for (i = B_SIZE - 1; i >= 0; i--)
4533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
4543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* initialize butterfly nodes at the first stage */
4553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r1 = ((int32)tmpBLK32[4+(i<<3)]) << 8;
4573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* r1 left shift by 11 is to maintain the same  */
4583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* scale as that of coefficients (W1,...W7) */
4593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* since blk[4] won't multiply with Wi.     */
4603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /* see detail diagram in design document.   */
4613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r2 = tmpBLK32[6+(i<<3)];
4633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r3 = tmpBLK32[2+(i<<3)];
4643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r4 = tmpBLK32[1+(i<<3)];
4653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r5 = tmpBLK32[7+(i<<3)];
4663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r6 = tmpBLK32[5+(i<<3)];
4673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        r7 = tmpBLK32[3+(i<<3)];
4683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
4703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
4713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* shortcut */
4723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* execute if values of F(1,v) to F(7,v) in a row*/
4733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* are all zeros */
4743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* output of row IDCT scaled by 8 */
4763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            a = (tmpBLK32[0+(i<<3)] + 32) >> 6;
4773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[0+(i<<3)] = a;
4783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[1+(i<<3)] = a;
4793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[2+(i<<3)] = a;
4803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[3+(i<<3)] = a;
4813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[4+(i<<3)] = a;
4823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[5+(i<<3)] = a;
4833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[6+(i<<3)] = a;
4843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[7+(i<<3)] = a;
4853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
4873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        else
4893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
4903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* for proper rounding in the fourth stage */
4913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r0 = (((int32)tmpBLK32[0+(i<<3)]) << 8) + 8192;
4923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* first stage */
4943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 = W7 * (r4 + r5) + 4;
4963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r4 = (r8 + (W1 - W7) * r4) >> 3;
4973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r5 = (r8 - (W1 + W7) * r5) >> 3;
4983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 = W3 * (r6 + r7) + 4;
5003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r6 = (r8 - (W3 - W5) * r6) >> 3;
5013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r7 = (r8 - (W3 + W5) * r7) >> 3;
5023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* second stage */
5043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 = r0 + r1;
5053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r0 -= r1;
5063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r1 = W6 * (r3 + r2) + 4;
5083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r2 = (r1 - (W2 + W6) * r2) >> 3;
5093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r3 = (r1 + (W2 - W6) * r3) >> 3;
5103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r1 = r4 + r6;
5123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r4 -= r6;
5133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r6 = r5 + r7;
5143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r5 -= r7;
5153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* third stage */
5173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r7 = r8 + r3;
5183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r8 -= r3;
5193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r3 = r0 + r2;
5203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r0 -= r2;
5213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r2 = (181 * (r4 + r5) + 128) >> 8;    /* rounding */
5223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            r4 = (181 * (r4 - r5) + 128) >> 8;
5233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* fourth stage */
5253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* net shift of this function is <<3 after the    */
5263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* following shift operation, it makes output of  */
5273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* row IDCT scaled by 8 to retain 3 bits precision*/
5283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[0+(i<<3)] = (r7 + r1) >> 14;
5293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[1+(i<<3)] = (r3 + r2) >> 14;
5303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[2+(i<<3)] = (r0 + r4) >> 14;
5313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[3+(i<<3)] = (r8 + r6) >> 14;
5323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[4+(i<<3)] = (r8 - r6) >> 14;
5333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[5+(i<<3)] = (r0 - r4) >> 14;
5343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[6+(i<<3)] = (r3 - r2) >> 14;
5353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            blk[7+(i<<3)] = (r7 - r1) >> 14;
5363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
5373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        /*  add with prediction ,  08/03/05 */
5383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        res = (*pred++ + block[0+(i<<3)]);
5393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        CLIP_RESULT(res);
5403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        *dst++ = res;
5413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        res = (*pred++ + block[1+(i<<3)]);
5423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        CLIP_RESULT(res);
5433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        *dst++ = res;
5443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        res = (*pred++ + block[2+(i<<3)]);
5453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        CLIP_RESULT(res);
5463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        *dst++ = res;
5473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        res = (*pred++ + block[3+(i<<3)]);
5483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        CLIP_RESULT(res);
5493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        *dst++ = res;
5503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        res = (*pred++ + block[4+(i<<3)]);
5513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        CLIP_RESULT(res);
5523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        *dst++ = res;
5533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        res = (*pred++ + block[5+(i<<3)]);
5543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        CLIP_RESULT(res);
5553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        *dst++ = res;
5563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        res = (*pred++ + block[6+(i<<3)]);
5573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        CLIP_RESULT(res);
5583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        *dst++ = res;
5593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        res = (*pred++ + block[7+(i<<3)]);
5603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        CLIP_RESULT(res);
5613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        *dst++ = res;
5623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        pred += 8;
5643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        dst += (width - 8);
5653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
5663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /*----------------------------------------------------------------------------
5703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    ; Return nothing or data or data pointer
5713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    ----------------------------------------------------------------------------*/
5723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    return;
5733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong}
5743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong#endif
5763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/*----------------------------------------------------------------------------
5773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong; End Function: idct
5783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong----------------------------------------------------------------------------*/
5793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
580