190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/*
2f71323e297a928af368937089d3ed71239786f86Andreas Huber *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
4f71323e297a928af368937089d3ed71239786f86Andreas Huber *  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber *  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber *  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber *  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber *  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber */
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/****************************************************************************
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * Notes:
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * This implementation makes use of 16 bit fixed point verio of two multiply
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * constants:
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *         1.   sqrt(2) * cos (pi/8)
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *         2.   sqrt(2) * sin (pi/8)
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * Becuase the first constant is bigger than 1, to maintain the same 16 bit
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * fixed point precision as the second one, we use a trick of
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *         x * a = x + x*(a-1)
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * so
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *         x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber **************************************************************************/
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic const int cospi8sqrt2minus1 = 20091;
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic const int sinpi8sqrt2      = 35468;
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic const int rounding = 0;
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vp8_short_idct4x4llm_c(short *input, short *output, int pitch)
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int i;
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int a1, b1, c1, d1;
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    short *ip = input;
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    short *op = output;
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int temp1, temp2;
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int shortpitch = pitch >> 1;
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    for (i = 0; i < 4; i++)
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    {
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        a1 = ip[0] + ip[8];
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        b1 = ip[0] - ip[8];
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        temp1 = (ip[4] * sinpi8sqrt2 + rounding) >> 16;
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1 + rounding) >> 16);
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        c1 = temp1 - temp2;
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1 + rounding) >> 16);
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        temp2 = (ip[12] * sinpi8sqrt2 + rounding) >> 16;
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        d1 = temp1 + temp2;
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[shortpitch*0] = a1 + d1;
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[shortpitch*3] = a1 - d1;
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[shortpitch*1] = b1 + c1;
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[shortpitch*2] = b1 - c1;
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ip++;
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op++;
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ip = output;
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    op = output;
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    for (i = 0; i < 4; i++)
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    {
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        a1 = ip[0] + ip[2];
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        b1 = ip[0] - ip[2];
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        temp1 = (ip[1] * sinpi8sqrt2 + rounding) >> 16;
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1 + rounding) >> 16);
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        c1 = temp1 - temp2;
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1 + rounding) >> 16);
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        temp2 = (ip[3] * sinpi8sqrt2 + rounding) >> 16;
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        d1 = temp1 + temp2;
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[0] = (a1 + d1 + 4) >> 3;
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[3] = (a1 - d1 + 4) >> 3;
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[1] = (b1 + c1 + 4) >> 3;
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[2] = (b1 - c1 + 4) >> 3;
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ip += shortpitch;
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op += shortpitch;
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch)
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int i;
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int a1;
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    short *op = output;
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int shortpitch = pitch >> 1;
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    a1 = ((input[0] + 4) >> 3);
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    for (i = 0; i < 4; i++)
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    {
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[0] = a1;
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[1] = a1;
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[2] = a1;
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[3] = a1;
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op += shortpitch;
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
107f71323e297a928af368937089d3ed71239786f86Andreas Hubervoid vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
109f71323e297a928af368937089d3ed71239786f86Andreas Huber    int a1 = ((input_dc + 4) >> 3);
110f71323e297a928af368937089d3ed71239786f86Andreas Huber    int r, c;
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
112f71323e297a928af368937089d3ed71239786f86Andreas Huber    for (r = 0; r < 4; r++)
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    {
114f71323e297a928af368937089d3ed71239786f86Andreas Huber        for (c = 0; c < 4; c++)
115f71323e297a928af368937089d3ed71239786f86Andreas Huber        {
116f71323e297a928af368937089d3ed71239786f86Andreas Huber            int a = a1 + pred_ptr[c] ;
117f71323e297a928af368937089d3ed71239786f86Andreas Huber
118f71323e297a928af368937089d3ed71239786f86Andreas Huber            if (a < 0)
119f71323e297a928af368937089d3ed71239786f86Andreas Huber                a = 0;
120f71323e297a928af368937089d3ed71239786f86Andreas Huber
121f71323e297a928af368937089d3ed71239786f86Andreas Huber            if (a > 255)
122f71323e297a928af368937089d3ed71239786f86Andreas Huber                a = 255;
123f71323e297a928af368937089d3ed71239786f86Andreas Huber
124f71323e297a928af368937089d3ed71239786f86Andreas Huber            dst_ptr[c] = (unsigned char) a ;
125f71323e297a928af368937089d3ed71239786f86Andreas Huber        }
126f71323e297a928af368937089d3ed71239786f86Andreas Huber
127f71323e297a928af368937089d3ed71239786f86Andreas Huber        dst_ptr += stride;
128f71323e297a928af368937089d3ed71239786f86Andreas Huber        pred_ptr += pitch;
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
130f71323e297a928af368937089d3ed71239786f86Andreas Huber
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vp8_short_inv_walsh4x4_c(short *input, short *output)
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int i;
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int a1, b1, c1, d1;
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int a2, b2, c2, d2;
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    short *ip = input;
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    short *op = output;
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    for (i = 0; i < 4; i++)
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    {
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        a1 = ip[0] + ip[12];
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        b1 = ip[4] + ip[8];
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        c1 = ip[4] - ip[8];
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        d1 = ip[0] - ip[12];
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[0] = a1 + b1;
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[4] = c1 + d1;
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[8] = a1 - b1;
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[12] = d1 - c1;
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ip++;
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op++;
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ip = output;
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    op = output;
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    for (i = 0; i < 4; i++)
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    {
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        a1 = ip[0] + ip[3];
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        b1 = ip[1] + ip[2];
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        c1 = ip[1] - ip[2];
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        d1 = ip[0] - ip[3];
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        a2 = a1 + b1;
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        b2 = c1 + d1;
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        c2 = a1 - b1;
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        d2 = d1 - c1;
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[0] = (a2 + 3) >> 3;
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[1] = (b2 + 3) >> 3;
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[2] = (c2 + 3) >> 3;
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[3] = (d2 + 3) >> 3;
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ip += 4;
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op += 4;
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vp8_short_inv_walsh4x4_1_c(short *input, short *output)
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int i;
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    int a1;
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    short *op = output;
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    a1 = ((input[0] + 3) >> 3);
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    for (i = 0; i < 4; i++)
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    {
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[0] = a1;
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[1] = a1;
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[2] = a1;
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op[3] = a1;
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        op += 4;
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
198