159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* ------------------------------------------------------------------
259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Copyright (C) 1998-2009 PacketVideo
359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong *
459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Licensed under the Apache License, Version 2.0 (the "License");
559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * you may not use this file except in compliance with the License.
659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * You may obtain a copy of the License at
759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong *
859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong *      http://www.apache.org/licenses/LICENSE-2.0
959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong *
1059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Unless required by applicable law or agreed to in writing, software
1159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * distributed under the License is distributed on an "AS IS" BASIS,
1259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
1359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * express or implied.
1459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * See the License for the specific language governing permissions
1559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * and limitations under the License.
1659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * -------------------------------------------------------------------
1759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong */
1859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*
1959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
2059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong------------------------------------------------------------------------------
2159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong REVISION HISTORY
2259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong Who:   Date: July/2001
2359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong Description:   1. Optimized BlockIDCT bitmap checking.
2459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                2. Rearranged functions.
2559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                3. Do column IDCT first, then row IDCT.
2659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                4. Combine motion comp and IDCT, require
2759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                   two sets of row IDCTs one for INTRA
2859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                   and one for INTER.
2959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                5. Add AAN IDCT
3059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
3159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong Who:   Date: 8/16/01
3259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                1. Increase the input precision to 8 bits, i.e. change RDCTBITS
3359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                   to 11, have to comment out all in-line assembly since 16 bit
3459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                    multiplication doesn't work. Try to use diffent precision with
3559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                    32 bit mult. but hasn't finished. Turns out that without in-line
3659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                    assembly the performance doesn't change much (only 1%).
3759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong Who:   Date: 9/04/05
3859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                1. Replace AAN IDCT with Chen's IDCT to accommodate 16 bit data type.
3959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
4059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong*/
4159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "mp4def.h"
4259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "mp4enc_lib.h"
4359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "mp4lib_int.h"
4459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "dct.h"
4559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
4659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define ADD_CLIP    { \
4759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            tmp = *rec + tmp; \
4859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        if((UInt)tmp > mask) tmp = mask&(~(tmp>>31)); \
4959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *rec++ = tmp;   \
5059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        }
5159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
5259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define INTRA_CLIP  { \
5359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        if((UInt)tmp > mask) tmp = mask&(~(tmp>>31)); \
5459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *rec++ = tmp;   \
5559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        }
5659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
5759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
5859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define CLIP_RESULT(x)      if((UInt)x > 0xFF){x = 0xFF & (~(x>>31));}
5959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define ADD_AND_CLIP1(x)    x += (pred_word&0xFF); CLIP_RESULT(x);
6059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define ADD_AND_CLIP2(x)    x += ((pred_word>>8)&0xFF); CLIP_RESULT(x);
6159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define ADD_AND_CLIP3(x)    x += ((pred_word>>16)&0xFF); CLIP_RESULT(x);
6259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define ADD_AND_CLIP4(x)    x += ((pred_word>>24)&0xFF); CLIP_RESULT(x);
6359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
6459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
6559f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_col0(Short *blk)
6659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
6759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    OSCL_UNUSED_ARG(blk);
6859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
6959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return;
7059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
7159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
7259f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_col1(Short *blk)
7359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
7459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[0] = blk[8] = blk[16] = blk[24] = blk[32] = blk[40] = blk[48] = blk[56] =
7559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                                              blk[0] << 3;
7659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
7759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
7859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
7959f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_col2(Short *blk)
8059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
8159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x3, x5, x7;//, x8;
8259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
8359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x1 = blk[8];
8459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x0 = ((int32)blk[0] << 11) + 128;
8559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* both upper and lower*/
8659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
8759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x7 = W7 * x1;
8859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x1 = W1 * x1;
8959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
9059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x3 = x7;
9159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 = (181 * (x1 - x7) + 128) >> 8;
9259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x7 = (181 * (x1 + x7) + 128) >> 8;
9359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
9459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[0] = (x0 + x1) >> 8;
9559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[8] = (x0 + x7) >> 8;
9659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[16] = (x0 + x5) >> 8;
9759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[24] = (x0 + x3) >> 8;
9859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[56] = (x0 - x1) >> 8;
9959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[48] = (x0 - x7) >> 8;
10059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[40] = (x0 - x5) >> 8;
10159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[32] = (x0 - x3) >> 8;
10259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
10359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
10459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
10559f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_col3(Short *blk)
10659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
10759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
10859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
10959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x2 = blk[16];
11059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x1 = blk[8];
11159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x0 = ((int32)blk[0] << 11) + 128;
11259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
11359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 = x0;
11459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x6 = W6 * x2;
11559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x2 = W2 * x2;
11659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x8 = x0 - x2;
11759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x0 += x2;
11859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x2 = x8;
11959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x8 = x4 - x6;
12059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 += x6;
12159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x6 = x8;
12259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
12359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x7 = W7 * x1;
12459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x1 = W1 * x1;
12559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x3 = x7;
12659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 = (181 * (x1 - x7) + 128) >> 8;
12759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x7 = (181 * (x1 + x7) + 128) >> 8;
12859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
12959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[0] = (x0 + x1) >> 8;
13059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[8] = (x4 + x7) >> 8;
13159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[16] = (x6 + x5) >> 8;
13259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[24] = (x2 + x3) >> 8;
13359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[56] = (x0 - x1) >> 8;
13459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[48] = (x4 - x7) >> 8;
13559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[40] = (x6 - x5) >> 8;
13659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[32] = (x2 - x3) >> 8;
13759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
13859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
13959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
14059f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_col4(Short *blk)
14159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
14259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
14359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x2 = blk[16];
14459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x1 = blk[8];
14559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x3 = blk[24];
14659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x0 = ((int32)blk[0] << 11) + 128;
14759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
14859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 = x0;
14959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x6 = W6 * x2;
15059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x2 = W2 * x2;
15159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x8 = x0 - x2;
15259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x0 += x2;
15359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x2 = x8;
15459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x8 = x4 - x6;
15559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 += x6;
15659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x6 = x8;
15759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
15859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x7 = W7 * x1;
15959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x1 = W1 * x1;
16059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 = W3 * x3;
16159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x3 = -W5 * x3;
16259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x8 = x1 - x5;
16359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x1 += x5;
16459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 = x8;
16559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x8 = x7 - x3;
16659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x3 += x7;
16759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x7 = (181 * (x5 + x8) + 128) >> 8;
16859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 = (181 * (x5 - x8) + 128) >> 8;
16959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
17059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
17159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[0] = (x0 + x1) >> 8;
17259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[8] = (x4 + x7) >> 8;
17359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[16] = (x6 + x5) >> 8;
17459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[24] = (x2 + x3) >> 8;
17559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[56] = (x0 - x1) >> 8;
17659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[48] = (x4 - x7) >> 8;
17759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[40] = (x6 - x5) >> 8;
17859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[32] = (x2 - x3) >> 8;
17959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
18059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
18159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
18259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#ifndef SMALL_DCT
18359f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_col0x40(Short *blk)
18459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
18559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x1, x3, x5, x7;//, x8;
18659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
18759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x1 = blk[8];
18859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* both upper and lower*/
18959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
19059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x7 = W7 * x1;
19159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x1 = W1 * x1;
19259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
19359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x3 = x7;
19459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 = (181 * (x1 - x7) + 128) >> 8;
19559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x7 = (181 * (x1 + x7) + 128) >> 8;
19659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
19759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[0] = (128 + x1) >> 8;
19859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[8] = (128 + x7) >> 8;
19959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[16] = (128 + x5) >> 8;
20059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[24] = (128 + x3) >> 8;
20159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[56] = (128 - x1) >> 8;
20259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[48] = (128 - x7) >> 8;
20359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[40] = (128 - x5) >> 8;
20459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[32] = (128 - x3) >> 8;
20559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
20659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
20759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
20859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
20959f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_col0x20(Short *blk)
21059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
21159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x2, x4, x6;
21259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
21359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x2 = blk[16];
21459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x6 = W6 * x2;
21559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x2 = W2 * x2;
21659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x0 = 128 + x2;
21759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x2 = 128 - x2;
21859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 = 128 + x6;
21959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x6 = 128 - x6;
22059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
22159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[0] = (x0) >> 8;
22259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[56] = (x0) >> 8;
22359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[8] = (x4) >> 8;
22459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[48] = (x4) >> 8;
22559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[16] = (x6) >> 8;
22659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[40] = (x6) >> 8;
22759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[24] = (x2) >> 8;
22859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[32] = (x2) >> 8;
22959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
23059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
23159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
23259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
23359f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_col0x10(Short *blk)
23459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
23559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x1, x3, x5,  x7;
23659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
23759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x3 = blk[24];
23859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x1 = W3 * x3;
23959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x3 = W5 * x3;
24059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
24159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x7 = (181 * (x3 - x1) + 128) >> 8;
24259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 = (-181 * (x1 + x3) + 128) >> 8;
24359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
24459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
24559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[0] = (128 + x1) >> 8;
24659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[8] = (128 + x7) >> 8;
24759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[16] = (128 + x5) >> 8;
24859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[24] = (128 - x3) >> 8;
24959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[56] = (128 - x1) >> 8;
25059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[48] = (128 - x7) >> 8;
25159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[40] = (128 - x5) >> 8;
25259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[32] = (128 + x3) >> 8;
25359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
25459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
25559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
25659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
25759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif /* SMALL_DCT */
25859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
25959f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_col(Short *blk)
26059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
26159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
26259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
26359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x1 = (int32)blk[32] << 11;
26459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x2 = blk[48];
26559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x3 = blk[16];
26659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 = blk[8];
26759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 = blk[56];
26859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x6 = blk[40];
26959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x7 = blk[24];
27059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x0 = ((int32)blk[0] << 11) + 128;
27159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
27259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* first stage */
27359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x8 = W7 * (x4 + x5);
27459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 = x8 + (W1 - W7) * x4;
27559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 = x8 - (W1 + W7) * x5;
27659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x8 = W3 * (x6 + x7);
27759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x6 = x8 - (W3 - W5) * x6;
27859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x7 = x8 - (W3 + W5) * x7;
27959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
28059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* second stage */
28159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x8 = x0 + x1;
28259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x0 -= x1;
28359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x1 = W6 * (x3 + x2);
28459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x2 = x1 - (W2 + W6) * x2;
28559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x3 = x1 + (W2 - W6) * x3;
28659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x1 = x4 + x6;
28759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 -= x6;
28859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x6 = x5 + x7;
28959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x5 -= x7;
29059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
29159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* third stage */
29259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x7 = x8 + x3;
29359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x8 -= x3;
29459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x3 = x0 + x2;
29559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x0 -= x2;
29659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x2 = (181 * (x4 + x5) + 128) >> 8;
29759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    x4 = (181 * (x4 - x5) + 128) >> 8;
29859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
29959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* fourth stage */
30059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[0]    = (x7 + x1) >> 8;
30159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[8] = (x3 + x2) >> 8;
30259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[16] = (x0 + x4) >> 8;
30359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[24] = (x8 + x6) >> 8;
30459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[32] = (x8 - x6) >> 8;
30559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[40] = (x0 - x4) >> 8;
30659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[48] = (x3 - x2) >> 8;
30759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk[56] = (x7 - x1) >> 8;
30859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
30959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
31059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
31159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
31259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* This function should not be called at all ****/
31359f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row0Inter(Short *srce, UChar *rec, Int lx)
31459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
31559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    OSCL_UNUSED_ARG(srce);
31659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
31759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    OSCL_UNUSED_ARG(rec);
31859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
31959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    OSCL_UNUSED_ARG(lx);
32059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
32159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return;
32259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
32359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
32459f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row1Inter(Short *blk, UChar *rec, Int lx)
32559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
32659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int tmp;
32759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
32859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
32959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
33059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
33159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
33259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
33359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
33459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
33559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
33659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
33759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = (*(blk += 8) + 32) >> 6;
33859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;
33959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
34059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */
34159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = tmp + (pred_word & 0xFF);
34259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res);
34359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = tmp + ((pred_word >> 8) & 0xFF);
34459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2);
34559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
34659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = tmp + ((pred_word >> 16) & 0xFF);
34759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res);
34859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
34959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = tmp + ((pred_word >> 24) & 0xFF);
35059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res);
35159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
35259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)rec) = dst_word; /* save 4 bytes to dst */
35359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
35459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */
35559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = tmp + (pred_word & 0xFF);
35659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res);
35759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = tmp + ((pred_word >> 8) & 0xFF);
35859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2);
35959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
36059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = tmp + ((pred_word >> 16) & 0xFF);
36159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res);
36259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
36359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = tmp + ((pred_word >> 24) & 0xFF);
36459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res);
36559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
36659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
36759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
36859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return;
36959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
37059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
37159f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row2Inter(Short *blk, UChar *rec, Int lx)
37259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
37359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x4, x5;
37459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
37559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
37659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
37759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
37859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
37959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
38059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
38159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
38259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
38359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
38459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* shortcut */
38559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = blk[9];
38659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[9] = 0;
38759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = ((*(blk += 8)) << 8) + 8192;
38859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;  /* for proper rounding in the fourth stage */
38959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
39059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* first stage */
39159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (W7 * x4 + 4) >> 3;
39259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = (W1 * x4 + 4) >> 3;
39359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
39459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* third stage */
39559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (181 * (x4 + x5) + 128) >> 8;
39659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (181 * (x4 - x5) + 128) >> 8;
39759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
39859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* fourth stage */
39959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */
40059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 + x4) >> 14;
40159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
40259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x0 + x2) >> 14;
40359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
40459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
40559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 + x1) >> 14;
40659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
40759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
40859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 + x5) >> 14;
40959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
41059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
41159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)rec) = dst_word; /* save 4 bytes to dst */
41259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
41359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */
41459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 - x5) >> 14;
41559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
41659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x0 - x1) >> 14;
41759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
41859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
41959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 - x2) >> 14;
42059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
42159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
42259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 - x4) >> 14;
42359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
42459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
42559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
42659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
42759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
42859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
42959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
43059f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row3Inter(Short *blk, UChar *rec, Int lx)
43159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
43259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
43359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
43459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
43559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
43659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
43759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
43859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
43959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
44059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
44159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
44259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
44359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = blk[10];
44459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[10] = 0;
44559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = blk[9];
44659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[9] = 0;
44759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = ((*(blk += 8)) << 8) + 8192;
44859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;  /* for proper rounding in the fourth stage */
44959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* both upper and lower*/
45059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* both x2orx6 and x0orx4 */
45159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
45259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = x0;
45359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = (W6 * x2 + 4) >> 3;
45459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (W2 * x2 + 4) >> 3;
45559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x0 - x2;
45659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 += x2;
45759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = x8;
45859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x4 - x6;
45959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 += x6;
46059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = x8;
46159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
46259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (W7 * x1 + 4) >> 3;
46359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (W1 * x1 + 4) >> 3;
46459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = x7;
46559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (181 * (x1 - x7) + 128) >> 8;
46659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (181 * (x1 + x7) + 128) >> 8;
46759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
46859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */
46959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 + x1) >> 14;
47059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
47159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x4 + x7) >> 14;
47259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
47359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
47459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x6 + x5) >> 14;
47559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
47659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
47759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x2 + x3) >> 14;
47859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
47959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
48059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)rec) = dst_word; /* save 4 bytes to dst */
48159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
48259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */
48359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x2 - x3) >> 14;
48459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
48559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x6 - x5) >> 14;
48659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
48759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
48859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x4 - x7) >> 14;
48959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
49059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
49159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 - x1) >> 14;
49259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
49359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
49459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
49559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
49659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
49759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
49859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
49959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
50059f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row4Inter(Short *blk, UChar *rec, Int lx)
50159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
50259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
50359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
50459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
50559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
50659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
50759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
50859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
50959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
51059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
51159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
51259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
51359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = blk[10];
51459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[10] = 0;
51559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = blk[9];
51659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[9] = 0;
51759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = blk[11];
51859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[11] = 0;
51959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = ((*(blk += 8)) << 8) + 8192;
52059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;   /* for proper rounding in the fourth stage */
52159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
52259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = x0;
52359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = (W6 * x2 + 4) >> 3;
52459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (W2 * x2 + 4) >> 3;
52559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x0 - x2;
52659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 += x2;
52759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = x8;
52859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x4 - x6;
52959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 += x6;
53059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = x8;
53159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
53259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (W7 * x1 + 4) >> 3;
53359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (W1 * x1 + 4) >> 3;
53459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (W3 * x3 + 4) >> 3;
53559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = (- W5 * x3 + 4) >> 3;
53659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x1 - x5;
53759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 += x5;
53859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = x8;
53959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x7 - x3;
54059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 += x7;
54159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (181 * (x5 + x8) + 128) >> 8;
54259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (181 * (x5 - x8) + 128) >> 8;
54359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
54459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */
54559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 + x1) >> 14;
54659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
54759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x4 + x7) >> 14;
54859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
54959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
55059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x6 + x5) >> 14;
55159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
55259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
55359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x2 + x3) >> 14;
55459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
55559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
55659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)rec) = dst_word; /* save 4 bytes to dst */
55759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
55859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */
55959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x2 - x3) >> 14;
56059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
56159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x6 - x5) >> 14;
56259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
56359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
56459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x4 - x7) >> 14;
56559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
56659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
56759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 - x1) >> 14;
56859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
56959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
57059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
57159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
57259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
57359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
57459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
57559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#ifndef SMALL_DCT
57659f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row0x40Inter(Short *blk, UChar *rec, Int lx)
57759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
57859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x1, x2, x4, x5;
57959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
58059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
58159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
58259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
58359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
58459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
58559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
58659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
58759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
58859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* shortcut */
58959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = blk[1];
59059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[1] = 0;
59159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk += 8;  /* for proper rounding in the fourth stage */
59259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
59359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* first stage */
59459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (W7 * x4 + 4) >> 3;
59559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = (W1 * x4 + 4) >> 3;
59659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
59759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* third stage */
59859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (181 * (x4 + x5) + 128) >> 8;
59959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (181 * (x4 - x5) + 128) >> 8;
60059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
60159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* fourth stage */
60259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */
60359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 + x4) >> 14;
60459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
60559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (8192 + x2) >> 14;
60659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
60759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
60859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 + x1) >> 14;
60959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
61059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
61159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 + x5) >> 14;
61259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
61359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
61459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)rec) = dst_word; /* save 4 bytes to dst */
61559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
61659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */
61759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 - x5) >> 14;
61859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
61959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (8192 - x1) >> 14;
62059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
62159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
62259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 - x2) >> 14;
62359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
62459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
62559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 - x4) >> 14;
62659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
62759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
62859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
62959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
63059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
63159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
63259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
63359f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row0x20Inter(Short *blk, UChar *rec, Int lx)
63459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
63559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x2, x4, x6;
63659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
63759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
63859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
63959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
64059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
64159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
64259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
64359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
64459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
64559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = blk[2];
64659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[2] = 0;
64759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk += 8; /* for proper rounding in the fourth stage */
64859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* both upper and lower*/
64959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* both x2orx6 and x0orx4 */
65059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = (W6 * x2 + 4) >> 3;
65159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (W2 * x2 + 4) >> 3;
65259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = 8192 + x2;
65359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = 8192 - x2;
65459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = 8192 + x6;
65559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = 8192 - x6;
65659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
65759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */
65859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0) >> 14;
65959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
66059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x4) >> 14;
66159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
66259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
66359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x6) >> 14;
66459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
66559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
66659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x2) >> 14;
66759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
66859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
66959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)rec) = dst_word; /* save 4 bytes to dst */
67059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
67159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */
67259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x2) >> 14;
67359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
67459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x6) >> 14;
67559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
67659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
67759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x4) >> 14;
67859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
67959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
68059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0) >> 14;
68159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
68259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
68359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
68459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
68559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
68659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
68759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
68859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
68959f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row0x10Inter(Short *blk, UChar *rec, Int lx)
69059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
69159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x1, x3, x5, x7;
69259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
69359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
69459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
69559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
69659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
69759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
69859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
69959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
70059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
70159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = blk[3];
70259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[3] = 0;
70359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk += 8;
70459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
70559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (W3 * x3 + 4) >> 3;
70659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = (-W5 * x3 + 4) >> 3;
70759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
70859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (-181 * (x3 + x1) + 128) >> 8;
70959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (181 * (x3 - x1) + 128) >> 8;
71059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
71159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */
71259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 + x1) >> 14;
71359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
71459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (8192 + x7) >> 14;
71559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
71659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
71759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 + x5) >> 14;
71859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
71959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
72059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 + x3) >> 14;
72159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
72259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
72359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)rec) = dst_word; /* save 4 bytes to dst */
72459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
72559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */
72659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 - x3) >> 14;
72759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
72859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (8192 - x5) >> 14;
72959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
73059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
73159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 - x7) >> 14;
73259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
73359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
73459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 - x1) >> 14;
73559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
73659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
73759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
73859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
73959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
74059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
74159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
74259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif /* SMALL_DCT */
74359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
74459f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_rowInter(Short *blk, UChar *rec, Int lx)
74559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
74659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
74759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
74859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
74959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
75059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
75159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
75259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
75359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
75459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
75559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
75659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
75759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (int32)blk[12] << 8;
75859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[12] = 0;
75959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = blk[14];
76059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[14] = 0;
76159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = blk[10];
76259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[10] = 0;
76359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = blk[9];
76459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[9] = 0;
76559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = blk[15];
76659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[15] = 0;
76759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = blk[13];
76859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[13] = 0;
76959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = blk[11];
77059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[11] = 0;
77159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = ((*(blk += 8)) << 8) + 8192;
77259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;   /* for proper rounding in the fourth stage */
77359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
77459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* first stage */
77559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = W7 * (x4 + x5) + 4;
77659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = (x8 + (W1 - W7) * x4) >> 3;
77759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (x8 - (W1 + W7) * x5) >> 3;
77859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = W3 * (x6 + x7) + 4;
77959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = (x8 - (W3 - W5) * x6) >> 3;
78059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (x8 - (W3 + W5) * x7) >> 3;
78159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
78259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* second stage */
78359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x0 + x1;
78459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 -= x1;
78559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = W6 * (x3 + x2) + 4;
78659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (x1 - (W2 + W6) * x2) >> 3;
78759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = (x1 + (W2 - W6) * x3) >> 3;
78859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = x4 + x6;
78959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 -= x6;
79059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = x5 + x7;
79159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 -= x7;
79259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
79359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* third stage */
79459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = x8 + x3;
79559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 -= x3;
79659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = x0 + x2;
79759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 -= x2;
79859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (181 * (x4 + x5) + 128) >> 8;
79959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = (181 * (x4 - x5) + 128) >> 8;
80059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
80159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* fourth stage */
80259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */
80359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
80459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x7 + x1) >> 14;
80559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
80659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x3 + x2) >> 14;
80759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
80859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
80959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 + x4) >> 14;
81059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
81159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
81259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x8 + x6) >> 14;
81359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
81459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
81559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)rec) = dst_word; /* save 4 bytes to dst */
81659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
81759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */
81859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
81959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x8 - x6) >> 14;
82059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
82159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x0 - x4) >> 14;
82259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
82359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
82459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x3 - x2) >> 14;
82559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
82659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
82759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x7 - x1) >> 14;
82859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
82959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
83059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
83159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
83259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return;
83359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
83459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
83559f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row0Intra(Short *srce, UChar *rec, Int lx)
83659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
83759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    OSCL_UNUSED_ARG(srce);
83859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
83959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    OSCL_UNUSED_ARG(rec);
84059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
84159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    OSCL_UNUSED_ARG(lx);
84259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
84359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return;
84459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
84559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
84659f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row1Intra(Short *blk, UChar *rec, Int lx)
84759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
84859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 tmp;
84959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
85059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
85159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
85259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
85359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
85459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
85559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = ((*(blk += 8) + 32) >> 6);
85659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;
85759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(tmp)
85859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
85959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp |= (tmp << 8);
86059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp |= (tmp << 16);
86159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = tmp;
86259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = tmp;
86359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
86459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return;
86559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
86659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
86759f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row2Intra(Short *blk, UChar *rec, Int lx)
86859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
86959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x4, x5;
87059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
87159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 dst_word;
87259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
87359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
87459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
87559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
87659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
87759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
87859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* shortcut */
87959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = blk[9];
88059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[9] = 0;
88159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = ((*(blk += 8)) << 8) + 8192;
88259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;   /* for proper rounding in the fourth stage */
88359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
88459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* first stage */
88559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (W7 * x4 + 4) >> 3;
88659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = (W1 * x4 + 4) >> 3;
88759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
88859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* third stage */
88959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (181 * (x4 + x5) + 128) >> 8;
89059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (181 * (x4 - x5) + 128) >> 8;
89159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
89259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* fourth stage */
89359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x0 + x4) >> 14);
89459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
89559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((x0 + x2) >> 14);
89659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
89759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
89859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x0 + x1) >> 14);
89959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
90059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
90159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x0 + x5) >> 14);
90259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
90359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
90459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word;
90559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
90659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x0 - x5) >> 14);
90759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
90859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((x0 - x1) >> 14);
90959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
91059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
91159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x0 - x2) >> 14);
91259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
91359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
91459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x0 - x4) >> 14);
91559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
91659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
91759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word;
91859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
91959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
92059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
92159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
92259f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row3Intra(Short *blk, UChar *rec, Int lx)
92359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
92459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
92559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
92659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 dst_word;
92759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
92859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
92959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
93059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
93159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
93259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
93359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = blk[10];
93459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[10] = 0;
93559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = blk[9];
93659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[9] = 0;
93759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = ((*(blk += 8)) << 8) + 8192;
93859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;/* for proper rounding in the fourth stage */
93959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* both upper and lower*/
94059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* both x2orx6 and x0orx4 */
94159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
94259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = x0;
94359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = (W6 * x2 + 4) >> 3;
94459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (W2 * x2 + 4) >> 3;
94559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x0 - x2;
94659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 += x2;
94759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = x8;
94859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x4 - x6;
94959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 += x6;
95059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = x8;
95159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
95259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (W7 * x1 + 4) >> 3;
95359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (W1 * x1 + 4) >> 3;
95459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = x7;
95559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (181 * (x1 - x7) + 128) >> 8;
95659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (181 * (x1 + x7) + 128) >> 8;
95759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
95859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x0 + x1) >> 14);
95959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
96059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((x4 + x7) >> 14);
96159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
96259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
96359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x6 + x5) >> 14);
96459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
96559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
96659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x2 + x3) >> 14);
96759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
96859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
96959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word;
97059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
97159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x2 - x3) >> 14);
97259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
97359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((x6 - x5) >> 14);
97459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
97559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
97659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x4 - x7) >> 14);
97759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
97859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
97959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x0 - x1) >> 14);
98059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
98159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
98259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word;
98359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
98459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
98559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
98659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
98759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
98859f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row4Intra(Short *blk, UChar *rec, Int lx)
98959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
99059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
99159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
99259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 dst_word;
99359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
99459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
99559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
99659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
99759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
99859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
99959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = blk[10];
100059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[10] = 0;
100159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = blk[9];
100259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[9] = 0;
100359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = blk[11];
100459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[11] = 0;
100559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = ((*(blk += 8)) << 8) + 8192;
100659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0; /* for proper rounding in the fourth stage */
100759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
100859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = x0;
100959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = (W6 * x2 + 4) >> 3;
101059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (W2 * x2 + 4) >> 3;
101159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x0 - x2;
101259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 += x2;
101359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = x8;
101459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x4 - x6;
101559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 += x6;
101659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = x8;
101759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
101859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (W7 * x1 + 4) >> 3;
101959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (W1 * x1 + 4) >> 3;
102059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (W3 * x3 + 4) >> 3;
102159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = (- W5 * x3 + 4) >> 3;
102259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x1 - x5;
102359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 += x5;
102459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = x8;
102559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x7 - x3;
102659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 += x7;
102759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (181 * (x5 + x8) + 128) >> 8;
102859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (181 * (x5 - x8) + 128) >> 8;
102959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
103059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x0 + x1) >> 14);
103159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
103259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((x4 + x7) >> 14);
103359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
103459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
103559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x6 + x5) >> 14);
103659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
103759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
103859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x2 + x3) >> 14);
103959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
104059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
104159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word;
104259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
104359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x2 - x3) >> 14);
104459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
104559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((x6 - x5) >> 14);
104659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
104759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
104859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x4 - x7) >> 14);
104959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
105059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
105159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x0 - x1) >> 14);
105259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
105359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
105459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word;
105559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
105659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
105759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
105859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
105959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
106059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#ifndef SMALL_DCT
106159f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row0x40Intra(Short *blk, UChar *rec, Int lx)
106259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
106359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32  x1, x2, x4, x5;
106459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
106559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 dst_word;
106659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
106759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
106859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
106959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
107059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
107159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
107259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* shortcut */
107359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = blk[1];
107459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[1] = 0;
107559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk += 8;
107659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
107759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* first stage */
107859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (W7 * x4 + 4) >> 3;
107959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = (W1 * x4 + 4) >> 3;
108059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
108159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* third stage */
108259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (181 * (x4 + x5) + 128) >> 8;
108359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (181 * (x4 - x5) + 128) >> 8;
108459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
108559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* fourth stage */
108659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((8192 + x4) >> 14);
108759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
108859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((8192 + x2) >> 14);
108959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
109059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
109159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((8192 + x1) >> 14);
109259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
109359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
109459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((8192 + x5) >> 14);
109559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
109659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
109759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word;
109859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
109959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((8192 - x5) >> 14);
110059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
110159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((8192 - x1) >> 14);
110259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
110359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
110459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((8192 - x2) >> 14);
110559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
110659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
110759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((8192 - x4) >> 14);
110859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
110959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
111059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word;
111159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
111259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
111359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
111459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
111559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
111659f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row0x20Intra(Short *blk, UChar *rec, Int lx)
111759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
111859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x2, x4, x6;
111959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
112059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 dst_word;
112159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
112259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
112359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
112459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
112559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
112659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = blk[2];
112759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[2] = 0;
112859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk += 8;
112959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
113059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* both upper and lower*/
113159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* both x2orx6 and x0orx4 */
113259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = (W6 * x2 + 4) >> 3;
113359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (W2 * x2 + 4) >> 3;
113459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = 8192 + x2;
113559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = 8192 - x2;
113659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = 8192 + x6;
113759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = 8192 - x6;
113859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
113959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x0) >> 14);
114059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
114159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((x4) >> 14);
114259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
114359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
114459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x6) >> 14);
114559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
114659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
114759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x2) >> 14);
114859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
114959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
115059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word;
115159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
115259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x2) >> 14);
115359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
115459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((x6) >> 14);
115559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
115659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
115759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x4) >> 14);
115859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
115959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
116059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x0) >> 14);
116159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
116259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
116359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word;
116459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
116559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
116659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
116759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
116859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
116959f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row0x10Intra(Short *blk, UChar *rec, Int lx)
117059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
117159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x1, x3, x5, x7;
117259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
117359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 dst_word;
117459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
117559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
117659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
117759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
117859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
117959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = blk[3];
118059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[3] = 0 ;
118159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk += 8;
118259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
118359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (W3 * x3 + 4) >> 3;
118459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = (W5 * x3 + 4) >> 3;
118559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
118659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (181 * (x3 - x1) + 128) >> 8;
118759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (-181 * (x1 + x3) + 128) >> 8;
118859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
118959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((8192 + x1) >> 14);
119059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
119159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((8192 + x7) >> 14);
119259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
119359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
119459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((8192 + x5) >> 14);
119559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
119659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
119759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((8192 - x3) >> 14);
119859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
119959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
120059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word;
120159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
120259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((8192 + x3) >> 14);
120359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
120459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((8192 - x5) >> 14);
120559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
120659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
120759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((8192 - x7) >> 14);
120859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
120959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
121059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((8192 - x1) >> 14);
121159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
121259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
121359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word;
121459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
121559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
121659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
121759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
121859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
121959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
122059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif /* SMALL_DCT */
122159f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_rowIntra(Short *blk, UChar *rec, Int lx)
122259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
122359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
122459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
122559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
122659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 dst_word;
122759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
122859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
122959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
123059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
123159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
123259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
123359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (int32)blk[12] << 8;
123459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[12] = 0;
123559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = blk[14];
123659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[14] = 0;
123759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = blk[10];
123859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[10] = 0;
123959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = blk[9];
124059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[9] = 0;
124159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = blk[15];
124259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[15] = 0;
124359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = blk[13];
124459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[13] = 0;
124559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = blk[11];
124659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[11] = 0;
124759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = ((*(blk += 8)) << 8) + 8192;
124859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;  /* for proper rounding in the fourth stage */
124959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
125059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* first stage */
125159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = W7 * (x4 + x5) + 4;
125259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = (x8 + (W1 - W7) * x4) >> 3;
125359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (x8 - (W1 + W7) * x5) >> 3;
125459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = W3 * (x6 + x7) + 4;
125559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = (x8 - (W3 - W5) * x6) >> 3;
125659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (x8 - (W3 + W5) * x7) >> 3;
125759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
125859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* second stage */
125959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x0 + x1;
126059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 -= x1;
126159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = W6 * (x3 + x2) + 4;
126259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (x1 - (W2 + W6) * x2) >> 3;
126359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = (x1 + (W2 - W6) * x3) >> 3;
126459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = x4 + x6;
126559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 -= x6;
126659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = x5 + x7;
126759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 -= x7;
126859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
126959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* third stage */
127059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = x8 + x3;
127159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 -= x3;
127259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = x0 + x2;
127359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 -= x2;
127459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (181 * (x4 + x5) + 128) >> 8;
127559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = (181 * (x4 - x5) + 128) >> 8;
127659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
127759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* fourth stage */
127859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x7 + x1) >> 14);
127959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
128059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((x3 + x2) >> 14);
128159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
128259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = res | (res2 << 8);
128359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x0 + x4) >> 14);
128459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
128559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
128659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x8 + x6) >> 14);
128759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
128859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
128959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word;
129059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
129159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x8 - x6) >> 14);
129259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
129359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = ((x0 - x4) >> 14);
129459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2)
129559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = res | (res2 << 8);
129659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x3 - x2) >> 14);
129759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
129859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
129959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = ((x7 - x1) >> 14);
130059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res)
130159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
130259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word;
130359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
130459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return;
130559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
130659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
130759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
130859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* This function should not be called at all ****/
130959f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row0zmv(Short *srce, UChar *rec, UChar *pred, Int lx)
131059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
131159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    OSCL_UNUSED_ARG(srce);
131259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    OSCL_UNUSED_ARG(rec);
131359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    OSCL_UNUSED_ARG(pred);
131459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    OSCL_UNUSED_ARG(lx);
131559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
131659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return;
131759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
131859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
131959f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row1zmv(Short *blk, UChar *rec, UChar *pred, Int lx)
132059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
132159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int tmp;
132259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
132359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
132459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
132559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
132659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
132759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    pred -= 16;
132859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
132959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
133059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
133159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
133259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
133359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = (*(blk += 8) + 32) >> 6;
133459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;
133559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
133659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */
133759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = tmp + (pred_word & 0xFF);
133859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res);
133959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = tmp + ((pred_word >> 8) & 0xFF);
134059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2);
134159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
134259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = tmp + ((pred_word >> 16) & 0xFF);
134359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res);
134459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
134559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = tmp + ((pred_word >> 24) & 0xFF);
134659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res);
134759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
134859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */
134959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
135059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */
135159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = tmp + (pred_word & 0xFF);
135259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res);
135359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = tmp + ((pred_word >> 8) & 0xFF);
135459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res2);
135559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
135659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = tmp + ((pred_word >> 16) & 0xFF);
135759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res);
135859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
135959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = tmp + ((pred_word >> 24) & 0xFF);
136059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        CLIP_RESULT(res);
136159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
136259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
136359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
136459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return;
136559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
136659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
136759f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row2zmv(Short *blk, UChar *rec, UChar *pred, Int lx)
136859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
136959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x4, x5;
137059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
137159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
137259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
137359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
137459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
137559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
137659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    pred -= 16;
137759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
137859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
137959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
138059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
138159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* shortcut */
138259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = blk[9];
138359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[9] = 0;
138459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = ((*(blk += 8)) << 8) + 8192;
138559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;  /* for proper rounding in the fourth stage */
138659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
138759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* first stage */
138859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (W7 * x4 + 4) >> 3;
138959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = (W1 * x4 + 4) >> 3;
139059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
139159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* third stage */
139259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (181 * (x4 + x5) + 128) >> 8;
139359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (181 * (x4 - x5) + 128) >> 8;
139459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
139559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* fourth stage */
139659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */
139759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 + x4) >> 14;
139859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
139959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x0 + x2) >> 14;
140059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
140159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
140259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 + x1) >> 14;
140359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
140459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
140559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 + x5) >> 14;
140659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
140759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
140859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */
140959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
141059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */
141159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 - x5) >> 14;
141259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
141359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x0 - x1) >> 14;
141459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
141559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
141659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 - x2) >> 14;
141759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
141859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
141959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 - x4) >> 14;
142059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
142159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
142259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
142359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
142459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
142559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
142659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
142759f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row3zmv(Short *blk, UChar *rec, UChar *pred, Int lx)
142859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
142959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
143059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
143159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
143259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
143359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
143459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
143559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
143659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    pred -= 16;
143759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
143859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
143959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
144059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
144159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = blk[10];
144259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[10] = 0;
144359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = blk[9];
144459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[9] = 0;
144559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = ((*(blk += 8)) << 8) + 8192;
144659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;  /* for proper rounding in the fourth stage */
144759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* both upper and lower*/
144859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* both x2orx6 and x0orx4 */
144959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
145059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = x0;
145159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = (W6 * x2 + 4) >> 3;
145259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (W2 * x2 + 4) >> 3;
145359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x0 - x2;
145459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 += x2;
145559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = x8;
145659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x4 - x6;
145759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 += x6;
145859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = x8;
145959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
146059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (W7 * x1 + 4) >> 3;
146159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (W1 * x1 + 4) >> 3;
146259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = x7;
146359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (181 * (x1 - x7) + 128) >> 8;
146459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (181 * (x1 + x7) + 128) >> 8;
146559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
146659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */
146759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 + x1) >> 14;
146859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
146959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x4 + x7) >> 14;
147059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
147159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
147259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x6 + x5) >> 14;
147359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
147459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
147559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x2 + x3) >> 14;
147659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
147759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
147859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */
147959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
148059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */
148159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x2 - x3) >> 14;
148259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
148359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x6 - x5) >> 14;
148459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
148559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
148659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x4 - x7) >> 14;
148759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
148859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
148959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 - x1) >> 14;
149059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
149159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
149259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
149359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
149459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
149559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
149659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
149759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
149859f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row4zmv(Short *blk, UChar *rec, UChar *pred, Int lx)
149959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
150059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
150159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
150259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
150359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
150459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
150559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
150659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
150759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    pred -= 16;
150859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
150959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
151059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
151159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
151259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = blk[10];
151359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[10] = 0;
151459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = blk[9];
151559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[9] = 0;
151659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = blk[11];
151759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[11] = 0;
151859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = ((*(blk += 8)) << 8) + 8192;
151959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;   /* for proper rounding in the fourth stage */
152059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
152159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = x0;
152259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = (W6 * x2 + 4) >> 3;
152359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (W2 * x2 + 4) >> 3;
152459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x0 - x2;
152559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 += x2;
152659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = x8;
152759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x4 - x6;
152859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 += x6;
152959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = x8;
153059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
153159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (W7 * x1 + 4) >> 3;
153259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (W1 * x1 + 4) >> 3;
153359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (W3 * x3 + 4) >> 3;
153459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = (- W5 * x3 + 4) >> 3;
153559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x1 - x5;
153659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 += x5;
153759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = x8;
153859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x7 - x3;
153959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 += x7;
154059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (181 * (x5 + x8) + 128) >> 8;
154159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (181 * (x5 - x8) + 128) >> 8;
154259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
154359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */
154459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 + x1) >> 14;
154559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
154659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x4 + x7) >> 14;
154759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
154859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
154959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x6 + x5) >> 14;
155059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
155159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
155259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x2 + x3) >> 14;
155359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
155459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
155559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */
155659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
155759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */
155859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x2 - x3) >> 14;
155959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
156059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x6 - x5) >> 14;
156159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
156259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
156359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x4 - x7) >> 14;
156459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
156559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
156659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 - x1) >> 14;
156759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
156859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
156959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
157059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
157159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
157259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
157359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
157459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#ifndef SMALL_DCT
157559f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row0x40zmv(Short *blk, UChar *rec, UChar *pred, Int lx)
157659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
157759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x1, x2, x4, x5;
157859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
157959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
158059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
158159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
158259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
158359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
158459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    pred -= 16;
158559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
158659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
158759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
158859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* shortcut */
158959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = blk[1];
159059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[1] = 0;
159159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk += 8;  /* for proper rounding in the fourth stage */
159259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
159359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* first stage */
159459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (W7 * x4 + 4) >> 3;
159559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = (W1 * x4 + 4) >> 3;
159659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
159759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* third stage */
159859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (181 * (x4 + x5) + 128) >> 8;
159959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (181 * (x4 - x5) + 128) >> 8;
160059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
160159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* fourth stage */
160259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */
160359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 + x4) >> 14;
160459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
160559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (8192 + x2) >> 14;
160659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
160759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
160859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 + x1) >> 14;
160959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
161059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
161159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 + x5) >> 14;
161259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
161359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
161459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */
161559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
161659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */
161759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 - x5) >> 14;
161859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
161959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (8192 - x1) >> 14;
162059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
162159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
162259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 - x2) >> 14;
162359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
162459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
162559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 - x4) >> 14;
162659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
162759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
162859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
162959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
163059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
163159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
163259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
163359f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row0x20zmv(Short *blk, UChar *rec, UChar *pred, Int lx)
163459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
163559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x2, x4, x6;
163659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
163759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
163859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
163959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
164059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
164159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
164259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    pred -= 16;
164359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
164459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
164559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
164659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = blk[2];
164759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[2] = 0;
164859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk += 8; /* for proper rounding in the fourth stage */
164959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* both upper and lower*/
165059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* both x2orx6 and x0orx4 */
165159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = (W6 * x2 + 4) >> 3;
165259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (W2 * x2 + 4) >> 3;
165359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = 8192 + x2;
165459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = 8192 - x2;
165559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = 8192 + x6;
165659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = 8192 - x6;
165759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
165859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */
165959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0) >> 14;
166059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
166159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x4) >> 14;
166259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
166359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
166459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x6) >> 14;
166559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
166659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
166759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x2) >> 14;
166859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
166959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
167059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */
167159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
167259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */
167359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x2) >> 14;
167459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
167559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x6) >> 14;
167659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
167759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
167859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x4) >> 14;
167959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
168059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
168159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0) >> 14;
168259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
168359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
168459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
168559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
168659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
168759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
168859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
168959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
169059f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_row0x10zmv(Short *blk, UChar *rec, UChar *pred, Int lx)
169159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
169259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x1, x3, x5, x7;
169359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
169459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
169559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
169659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
169759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
169859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
169959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    pred -= 16;
170059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
170159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
170259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
170359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = blk[3];
170459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[3] = 0;
170559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk += 8;
170659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
170759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (W3 * x3 + 4) >> 3;
170859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = (-W5 * x3 + 4) >> 3;
170959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
171059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (-181 * (x3 + x1) + 128) >> 8;
171159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (181 * (x3 - x1) + 128) >> 8;
171259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
171359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */
171459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 + x1) >> 14;
171559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
171659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (8192 + x7) >> 14;
171759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
171859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
171959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 + x5) >> 14;
172059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
172159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
172259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 + x3) >> 14;
172359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
172459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
172559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */
172659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
172759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */
172859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 - x3) >> 14;
172959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
173059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (8192 - x5) >> 14;
173159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
173259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
173359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 - x7) >> 14;
173459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
173559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
173659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (8192 - x1) >> 14;
173759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
173859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
173959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
174059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
174159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
174259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
174359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
174459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#endif /* SMALL_DCT */
174559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
174659f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid idct_rowzmv(Short *blk, UChar *rec, UChar *pred, Int lx)
174759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
174859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
174959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int i = 8;
175059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    uint32 pred_word, dst_word;
175159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    int res, res2;
175259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
175359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
175459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    rec -= lx;
175559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    pred -= 16;
175659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    blk -= 8;
175759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
175859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
175959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
176059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = (int32)blk[12] << 8;
176159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[12] = 0;
176259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = blk[14];
176359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[14] = 0;
176459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = blk[10];
176559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[10] = 0;
176659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = blk[9];
176759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[9] = 0;
176859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = blk[15];
176959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[15] = 0;
177059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = blk[13];
177159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[13] = 0;
177259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = blk[11];
177359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        blk[11] = 0;
177459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 = ((*(blk += 8)) << 8) + 8192;
177559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *blk = 0;   /* for proper rounding in the fourth stage */
177659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
177759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* first stage */
177859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = W7 * (x4 + x5) + 4;
177959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = (x8 + (W1 - W7) * x4) >> 3;
178059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 = (x8 - (W1 + W7) * x5) >> 3;
178159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = W3 * (x6 + x7) + 4;
178259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = (x8 - (W3 - W5) * x6) >> 3;
178359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = (x8 - (W3 + W5) * x7) >> 3;
178459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
178559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* second stage */
178659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 = x0 + x1;
178759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 -= x1;
178859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = W6 * (x3 + x2) + 4;
178959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (x1 - (W2 + W6) * x2) >> 3;
179059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = (x1 + (W2 - W6) * x3) >> 3;
179159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x1 = x4 + x6;
179259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 -= x6;
179359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x6 = x5 + x7;
179459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x5 -= x7;
179559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
179659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* third stage */
179759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x7 = x8 + x3;
179859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x8 -= x3;
179959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x3 = x0 + x2;
180059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x0 -= x2;
180159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x2 = (181 * (x4 + x5) + 128) >> 8;
180259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        x4 = (181 * (x4 - x5) + 128) >> 8;
180359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
180459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* fourth stage */
180559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */
180659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
180759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x7 + x1) >> 14;
180859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
180959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x3 + x2) >> 14;
181059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
181159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
181259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x0 + x4) >> 14;
181359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
181459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
181559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x8 + x6) >> 14;
181659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
181759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
181859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */
181959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
182059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */
182159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
182259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x8 - x6) >> 14;
182359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP1(res);
182459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res2 = (x0 - x4) >> 14;
182559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP2(res2);
182659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word = (res2 << 8) | res;
182759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x3 - x2) >> 14;
182859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP3(res);
182959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 16);
183059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        res = (x7 - x1) >> 14;
183159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ADD_AND_CLIP4(res);
183259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst_word |= (res << 24);
183359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */
183459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
183559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return;
183659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
183759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
183859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*----------------------------------------------------------------------------
183959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong;  End Function: idctcol
184059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong----------------------------------------------------------------------------*/
184159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* ======================================================================== */
184259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*  Function : BlockIDCTMotionComp                                              */
184359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*  Date     : 10/16/2000                                                   */
184459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*  Purpose  : fast IDCT routine                                    */
184559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*  In/out   :                                                              */
184659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*      Int* coeff_in   Dequantized coefficient
184759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        Int block_out   output IDCT coefficient
184859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        Int maxval      clip value                                          */
184959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*  Modified :   7/31/01, add checking for all-zero and DC-only block.  */
185059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*              do 8 columns at a time                                      */
185159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*               8/2/01, do column first then row-IDCT.                 */
185259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*               8/2/01, remove clipping (included in motion comp).     */
185359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*               8/7/01, combine with motion comp.                      */
185459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*               8/8/01, use AAN IDCT                                       */
185559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*               9/4/05, use Chen's IDCT and 16 bit block                   */
185659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* ======================================================================== */
185759f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid BlockIDCTMotionComp(Short *block, UChar *bitmapcol, UChar bitmaprow,
185859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                         Int dctMode, UChar *rec, UChar *pred, Int lx_intra)
185959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
186059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int i;
186159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int tmp, tmp2;
186259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    ULong tmp4;
186359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int bmap;
186459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Short *ptr = block;
186559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    UChar *endcol;
186659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    UInt mask = 0xFF;
186759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int lx = lx_intra >> 1;
186859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int intra = (lx_intra & 1);
186959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
187059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /*  all-zero block */
187159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    if (dctMode == 0 || bitmaprow == 0)
187259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
187359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        if (intra)
187459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        {
187559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)rec) = *((ULong*)(rec + 4)) = 0;
187659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = 0;
187759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = 0;
187859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = 0;
187959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = 0;
188059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = 0;
188159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = 0;
188259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = 0;
188359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = 0;
188459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = 0;
188559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = 0;
188659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = 0;
188759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = 0;
188859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = 0;
188959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = 0;
189059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            return ;
189159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        }
189259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        else /* copy from previous frame */
189359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        {
189459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)rec) = *((ULong*)pred);
189559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = *((ULong*)(pred + 4));
189659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = *((ULong*)(pred += 16));
189759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = *((ULong*)(pred + 4));
189859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = *((ULong*)(pred += 16));
189959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = *((ULong*)(pred + 4));
190059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = *((ULong*)(pred += 16));
190159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = *((ULong*)(pred + 4));
190259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = *((ULong*)(pred += 16));
190359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = *((ULong*)(pred + 4));
190459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = *((ULong*)(pred += 16));
190559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = *((ULong*)(pred + 4));
190659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = *((ULong*)(pred += 16));
190759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = *((ULong*)(pred + 4));
190859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = *((ULong*)(pred += 16));
190959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = *((ULong*)(pred + 4));
191059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            return ;
191159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        }
191259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
191359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
191459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* Test for DC only block */
191559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    if (dctMode == 1 || (bitmaprow == 0x80 && bitmapcol[0] == 0x80))
191659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
191759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        i = ((block[0] << 3) + 32) >> 6;
191859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        block[0] = 0;
191959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        if (intra)
192059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        {
192159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            if ((UInt)i > mask) i = mask & (~(i >> 31));
192259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
192359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            tmp = i | (i << 8);
192459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            tmp |= (tmp << 16);
192559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
192659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)rec) = *((ULong*)(rec + 4)) = tmp;
192759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = tmp;
192859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = tmp;
192959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = tmp;
193059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = tmp;
193159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = tmp;
193259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = tmp;
193359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = tmp;
193459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = tmp;
193559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = tmp;
193659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = tmp;
193759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = tmp;
193859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = tmp;
193959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec += lx)) = tmp;
194059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *((ULong*)(rec + 4)) = tmp;
194159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
194259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            return ;
194359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        }
194459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        else
194559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        {
194659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            endcol = rec + (lx << 3);
194759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            do
194859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            {
194959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp4 = *((ULong*)pred);
195059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp2 = tmp4 & 0xFF;
195159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp2 += i;
195259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                if ((UInt)tmp2 > mask) tmp2 = mask & (~(tmp2 >> 31));
195359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp = (tmp4 >> 8) & 0xFF;
195459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp += i;
195559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
195659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp2 |= (tmp << 8);
195759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp = (tmp4 >> 16) & 0xFF;
195859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp += i;
195959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
196059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp2 |= (tmp << 16);
196159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp = (tmp4 >> 24) & 0xFF;
196259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp += i;
196359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
196459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp2 |= (tmp << 24);
196559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                *((ULong*)rec) = tmp2;
196659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
196759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp4 = *((ULong*)(pred + 4));
196859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp2 = tmp4 & 0xFF;
196959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp2 += i;
197059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                if ((UInt)tmp2 > mask) tmp2 = mask & (~(tmp2 >> 31));
197159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp = (tmp4 >> 8) & 0xFF;
197259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp += i;
197359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
197459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp2 |= (tmp << 8);
197559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp = (tmp4 >> 16) & 0xFF;
197659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp += i;
197759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
197859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp2 |= (tmp << 16);
197959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp = (tmp4 >> 24) & 0xFF;
198059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp += i;
198159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
198259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                tmp2 |= (tmp << 24);
198359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                *((ULong*)(rec + 4)) = tmp2;
198459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
198559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                rec += lx;
198659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                pred += 16;
198759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            }
198859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            while (rec < endcol);
198959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            return ;
199059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        }
199159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
199259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
199359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    for (i = 0; i < dctMode; i++)
199459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
199559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        bmap = (Int)bitmapcol[i];
199659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        if (bmap)
199759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        {
199859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            if ((bmap&0xf) == 0)
199959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                (*(idctcolVCA[bmap>>4]))(ptr);
200059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            else
200159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                idct_col(ptr);
200259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        }
200359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        ptr++;
200459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
200559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
200659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    if ((bitmaprow&0xf) == 0)
200759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
200859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        if (intra)
200959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            (*(idctrowVCAIntra[(Int)(bitmaprow>>4)]))(block, rec, lx);
201059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        else
201159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            (*(idctrowVCAzmv[(Int)(bitmaprow>>4)]))(block, rec, pred, lx);
201259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
201359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    else
201459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
201559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        if (intra)
201659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            idct_rowIntra(block, rec, lx);
201759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        else
201859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            idct_rowzmv(block, rec, pred, lx);
201959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
202059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
2021