1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/*
2233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
4233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Use of this source code is governed by a BSD-style license
5233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  that can be found in the LICENSE file in the root of the source
6233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  tree. An additional intellectual property rights grant can be found
7233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  in the file PATENTS.  All contributing project authors may
8233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  be found in the AUTHORS file in the root of the source tree.
9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */
10233d2500723e5594f3e7c70896ffeeef32b9c950ywan
11233d2500723e5594f3e7c70896ffeeef32b9c950ywan
12233d2500723e5594f3e7c70896ffeeef32b9c950ywan/****************************************************************************
13233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Notes:
14233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
15233d2500723e5594f3e7c70896ffeeef32b9c950ywan * This implementation makes use of 16 bit fixed point verio of two multiply
16233d2500723e5594f3e7c70896ffeeef32b9c950ywan * constants:
17233d2500723e5594f3e7c70896ffeeef32b9c950ywan *         1.   sqrt(2) * cos (pi/8)
18233d2500723e5594f3e7c70896ffeeef32b9c950ywan *         2.   sqrt(2) * sin (pi/8)
19233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Becuase the first constant is bigger than 1, to maintain the same 16 bit
20233d2500723e5594f3e7c70896ffeeef32b9c950ywan * fixed point precision as the second one, we use a trick of
21233d2500723e5594f3e7c70896ffeeef32b9c950ywan *         x * a = x + x*(a-1)
22233d2500723e5594f3e7c70896ffeeef32b9c950ywan * so
23233d2500723e5594f3e7c70896ffeeef32b9c950ywan *         x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
24233d2500723e5594f3e7c70896ffeeef32b9c950ywan **************************************************************************/
25233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic const int cospi8sqrt2minus1 = 20091;
26233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic const int sinpi8sqrt2      = 35468;
27233d2500723e5594f3e7c70896ffeeef32b9c950ywan
28233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr,
29233d2500723e5594f3e7c70896ffeeef32b9c950ywan                            int pred_stride, unsigned char *dst_ptr,
30233d2500723e5594f3e7c70896ffeeef32b9c950ywan                            int dst_stride)
31233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
32233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int i;
33233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int r, c;
34233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int a1, b1, c1, d1;
35233d2500723e5594f3e7c70896ffeeef32b9c950ywan    short output[16];
36233d2500723e5594f3e7c70896ffeeef32b9c950ywan    short *ip = input;
37233d2500723e5594f3e7c70896ffeeef32b9c950ywan    short *op = output;
38233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int temp1, temp2;
39233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int shortpitch = 4;
40233d2500723e5594f3e7c70896ffeeef32b9c950ywan
41233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (i = 0; i < 4; i++)
42233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
43233d2500723e5594f3e7c70896ffeeef32b9c950ywan        a1 = ip[0] + ip[8];
44233d2500723e5594f3e7c70896ffeeef32b9c950ywan        b1 = ip[0] - ip[8];
45233d2500723e5594f3e7c70896ffeeef32b9c950ywan
46233d2500723e5594f3e7c70896ffeeef32b9c950ywan        temp1 = (ip[4] * sinpi8sqrt2) >> 16;
47233d2500723e5594f3e7c70896ffeeef32b9c950ywan        temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
48233d2500723e5594f3e7c70896ffeeef32b9c950ywan        c1 = temp1 - temp2;
49233d2500723e5594f3e7c70896ffeeef32b9c950ywan
50233d2500723e5594f3e7c70896ffeeef32b9c950ywan        temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
51233d2500723e5594f3e7c70896ffeeef32b9c950ywan        temp2 = (ip[12] * sinpi8sqrt2) >> 16;
52233d2500723e5594f3e7c70896ffeeef32b9c950ywan        d1 = temp1 + temp2;
53233d2500723e5594f3e7c70896ffeeef32b9c950ywan
54233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[shortpitch*0] = a1 + d1;
55233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[shortpitch*3] = a1 - d1;
56233d2500723e5594f3e7c70896ffeeef32b9c950ywan
57233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[shortpitch*1] = b1 + c1;
58233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[shortpitch*2] = b1 - c1;
59233d2500723e5594f3e7c70896ffeeef32b9c950ywan
60233d2500723e5594f3e7c70896ffeeef32b9c950ywan        ip++;
61233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op++;
62233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
63233d2500723e5594f3e7c70896ffeeef32b9c950ywan
64233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ip = output;
65233d2500723e5594f3e7c70896ffeeef32b9c950ywan    op = output;
66233d2500723e5594f3e7c70896ffeeef32b9c950ywan
67233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (i = 0; i < 4; i++)
68233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
69233d2500723e5594f3e7c70896ffeeef32b9c950ywan        a1 = ip[0] + ip[2];
70233d2500723e5594f3e7c70896ffeeef32b9c950ywan        b1 = ip[0] - ip[2];
71233d2500723e5594f3e7c70896ffeeef32b9c950ywan
72233d2500723e5594f3e7c70896ffeeef32b9c950ywan        temp1 = (ip[1] * sinpi8sqrt2) >> 16;
73233d2500723e5594f3e7c70896ffeeef32b9c950ywan        temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
74233d2500723e5594f3e7c70896ffeeef32b9c950ywan        c1 = temp1 - temp2;
75233d2500723e5594f3e7c70896ffeeef32b9c950ywan
76233d2500723e5594f3e7c70896ffeeef32b9c950ywan        temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
77233d2500723e5594f3e7c70896ffeeef32b9c950ywan        temp2 = (ip[3] * sinpi8sqrt2) >> 16;
78233d2500723e5594f3e7c70896ffeeef32b9c950ywan        d1 = temp1 + temp2;
79233d2500723e5594f3e7c70896ffeeef32b9c950ywan
80233d2500723e5594f3e7c70896ffeeef32b9c950ywan
81233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[0] = (a1 + d1 + 4) >> 3;
82233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[3] = (a1 - d1 + 4) >> 3;
83233d2500723e5594f3e7c70896ffeeef32b9c950ywan
84233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[1] = (b1 + c1 + 4) >> 3;
85233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[2] = (b1 - c1 + 4) >> 3;
86233d2500723e5594f3e7c70896ffeeef32b9c950ywan
87233d2500723e5594f3e7c70896ffeeef32b9c950ywan        ip += shortpitch;
88233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op += shortpitch;
89233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
90233d2500723e5594f3e7c70896ffeeef32b9c950ywan
91233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ip = output;
92233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (r = 0; r < 4; r++)
93233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
94233d2500723e5594f3e7c70896ffeeef32b9c950ywan        for (c = 0; c < 4; c++)
95233d2500723e5594f3e7c70896ffeeef32b9c950ywan        {
96233d2500723e5594f3e7c70896ffeeef32b9c950ywan            int a = ip[c] + pred_ptr[c] ;
97233d2500723e5594f3e7c70896ffeeef32b9c950ywan
98233d2500723e5594f3e7c70896ffeeef32b9c950ywan            if (a < 0)
99233d2500723e5594f3e7c70896ffeeef32b9c950ywan                a = 0;
100233d2500723e5594f3e7c70896ffeeef32b9c950ywan
101233d2500723e5594f3e7c70896ffeeef32b9c950ywan            if (a > 255)
102233d2500723e5594f3e7c70896ffeeef32b9c950ywan                a = 255;
103233d2500723e5594f3e7c70896ffeeef32b9c950ywan
104233d2500723e5594f3e7c70896ffeeef32b9c950ywan            dst_ptr[c] = (unsigned char) a ;
105233d2500723e5594f3e7c70896ffeeef32b9c950ywan        }
106233d2500723e5594f3e7c70896ffeeef32b9c950ywan        ip += 4;
107233d2500723e5594f3e7c70896ffeeef32b9c950ywan        dst_ptr += dst_stride;
108233d2500723e5594f3e7c70896ffeeef32b9c950ywan        pred_ptr += pred_stride;
109233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
110233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
111233d2500723e5594f3e7c70896ffeeef32b9c950ywan
112233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
113233d2500723e5594f3e7c70896ffeeef32b9c950ywan                            int pred_stride, unsigned char *dst_ptr,
114233d2500723e5594f3e7c70896ffeeef32b9c950ywan                            int dst_stride)
115233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
116233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int a1 = ((input_dc + 4) >> 3);
117233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int r, c;
118233d2500723e5594f3e7c70896ffeeef32b9c950ywan
119233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (r = 0; r < 4; r++)
120233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
121233d2500723e5594f3e7c70896ffeeef32b9c950ywan        for (c = 0; c < 4; c++)
122233d2500723e5594f3e7c70896ffeeef32b9c950ywan        {
123233d2500723e5594f3e7c70896ffeeef32b9c950ywan            int a = a1 + pred_ptr[c] ;
124233d2500723e5594f3e7c70896ffeeef32b9c950ywan
125233d2500723e5594f3e7c70896ffeeef32b9c950ywan            if (a < 0)
126233d2500723e5594f3e7c70896ffeeef32b9c950ywan                a = 0;
127233d2500723e5594f3e7c70896ffeeef32b9c950ywan
128233d2500723e5594f3e7c70896ffeeef32b9c950ywan            if (a > 255)
129233d2500723e5594f3e7c70896ffeeef32b9c950ywan                a = 255;
130233d2500723e5594f3e7c70896ffeeef32b9c950ywan
131233d2500723e5594f3e7c70896ffeeef32b9c950ywan            dst_ptr[c] = (unsigned char) a ;
132233d2500723e5594f3e7c70896ffeeef32b9c950ywan        }
133233d2500723e5594f3e7c70896ffeeef32b9c950ywan
134233d2500723e5594f3e7c70896ffeeef32b9c950ywan        dst_ptr += dst_stride;
135233d2500723e5594f3e7c70896ffeeef32b9c950ywan        pred_ptr += pred_stride;
136233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
137233d2500723e5594f3e7c70896ffeeef32b9c950ywan
138233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
139233d2500723e5594f3e7c70896ffeeef32b9c950ywan
140233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff)
141233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
142233d2500723e5594f3e7c70896ffeeef32b9c950ywan    short output[16];
143233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int i;
144233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int a1, b1, c1, d1;
145233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int a2, b2, c2, d2;
146233d2500723e5594f3e7c70896ffeeef32b9c950ywan    short *ip = input;
147233d2500723e5594f3e7c70896ffeeef32b9c950ywan    short *op = output;
148233d2500723e5594f3e7c70896ffeeef32b9c950ywan
149233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (i = 0; i < 4; i++)
150233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
151233d2500723e5594f3e7c70896ffeeef32b9c950ywan        a1 = ip[0] + ip[12];
152233d2500723e5594f3e7c70896ffeeef32b9c950ywan        b1 = ip[4] + ip[8];
153233d2500723e5594f3e7c70896ffeeef32b9c950ywan        c1 = ip[4] - ip[8];
154233d2500723e5594f3e7c70896ffeeef32b9c950ywan        d1 = ip[0] - ip[12];
155233d2500723e5594f3e7c70896ffeeef32b9c950ywan
156233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[0] = a1 + b1;
157233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[4] = c1 + d1;
158233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[8] = a1 - b1;
159233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[12] = d1 - c1;
160233d2500723e5594f3e7c70896ffeeef32b9c950ywan        ip++;
161233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op++;
162233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
163233d2500723e5594f3e7c70896ffeeef32b9c950ywan
164233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ip = output;
165233d2500723e5594f3e7c70896ffeeef32b9c950ywan    op = output;
166233d2500723e5594f3e7c70896ffeeef32b9c950ywan
167233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (i = 0; i < 4; i++)
168233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
169233d2500723e5594f3e7c70896ffeeef32b9c950ywan        a1 = ip[0] + ip[3];
170233d2500723e5594f3e7c70896ffeeef32b9c950ywan        b1 = ip[1] + ip[2];
171233d2500723e5594f3e7c70896ffeeef32b9c950ywan        c1 = ip[1] - ip[2];
172233d2500723e5594f3e7c70896ffeeef32b9c950ywan        d1 = ip[0] - ip[3];
173233d2500723e5594f3e7c70896ffeeef32b9c950ywan
174233d2500723e5594f3e7c70896ffeeef32b9c950ywan        a2 = a1 + b1;
175233d2500723e5594f3e7c70896ffeeef32b9c950ywan        b2 = c1 + d1;
176233d2500723e5594f3e7c70896ffeeef32b9c950ywan        c2 = a1 - b1;
177233d2500723e5594f3e7c70896ffeeef32b9c950ywan        d2 = d1 - c1;
178233d2500723e5594f3e7c70896ffeeef32b9c950ywan
179233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[0] = (a2 + 3) >> 3;
180233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[1] = (b2 + 3) >> 3;
181233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[2] = (c2 + 3) >> 3;
182233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op[3] = (d2 + 3) >> 3;
183233d2500723e5594f3e7c70896ffeeef32b9c950ywan
184233d2500723e5594f3e7c70896ffeeef32b9c950ywan        ip += 4;
185233d2500723e5594f3e7c70896ffeeef32b9c950ywan        op += 4;
186233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
187233d2500723e5594f3e7c70896ffeeef32b9c950ywan
188233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for(i = 0; i < 16; i++)
189233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
190233d2500723e5594f3e7c70896ffeeef32b9c950ywan        mb_dqcoeff[i * 16] = output[i];
191233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
192233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
193233d2500723e5594f3e7c70896ffeeef32b9c950ywan
194233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff)
195233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
196233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int i;
197233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int a1;
198233d2500723e5594f3e7c70896ffeeef32b9c950ywan
199233d2500723e5594f3e7c70896ffeeef32b9c950ywan    a1 = ((input[0] + 3) >> 3);
200233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for(i = 0; i < 16; i++)
201233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
202233d2500723e5594f3e7c70896ffeeef32b9c950ywan        mb_dqcoeff[i * 16] = a1;
203233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
204233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
205