1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12/****************************************************************************
13 * Notes:
14 *
15 * This implementation makes use of 16 bit fixed point verio of two multiply
16 * constants:
17 *         1.   sqrt(2) * cos (pi/8)
18 *         2.   sqrt(2) * sin (pi/8)
19 * Becuase the first constant is bigger than 1, to maintain the same 16 bit
20 * fixed point precision as the second one, we use a trick of
21 *         x * a = x + x*(a-1)
22 * so
23 *         x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
24 **************************************************************************/
25static const int cospi8sqrt2minus1 = 20091;
26static const int sinpi8sqrt2      = 35468;
27static const int rounding = 0;
28void vp8_short_idct4x4llm_c(short *input, short *output, int pitch)
29{
30    int i;
31    int a1, b1, c1, d1;
32
33    short *ip = input;
34    short *op = output;
35    int temp1, temp2;
36    int shortpitch = pitch >> 1;
37
38    for (i = 0; i < 4; i++)
39    {
40        a1 = ip[0] + ip[8];
41        b1 = ip[0] - ip[8];
42
43        temp1 = (ip[4] * sinpi8sqrt2 + rounding) >> 16;
44        temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1 + rounding) >> 16);
45        c1 = temp1 - temp2;
46
47        temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1 + rounding) >> 16);
48        temp2 = (ip[12] * sinpi8sqrt2 + rounding) >> 16;
49        d1 = temp1 + temp2;
50
51        op[shortpitch*0] = a1 + d1;
52        op[shortpitch*3] = a1 - d1;
53
54        op[shortpitch*1] = b1 + c1;
55        op[shortpitch*2] = b1 - c1;
56
57        ip++;
58        op++;
59    }
60
61    ip = output;
62    op = output;
63
64    for (i = 0; i < 4; i++)
65    {
66        a1 = ip[0] + ip[2];
67        b1 = ip[0] - ip[2];
68
69        temp1 = (ip[1] * sinpi8sqrt2 + rounding) >> 16;
70        temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1 + rounding) >> 16);
71        c1 = temp1 - temp2;
72
73        temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1 + rounding) >> 16);
74        temp2 = (ip[3] * sinpi8sqrt2 + rounding) >> 16;
75        d1 = temp1 + temp2;
76
77
78        op[0] = (a1 + d1 + 4) >> 3;
79        op[3] = (a1 - d1 + 4) >> 3;
80
81        op[1] = (b1 + c1 + 4) >> 3;
82        op[2] = (b1 - c1 + 4) >> 3;
83
84        ip += shortpitch;
85        op += shortpitch;
86    }
87}
88
89void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch)
90{
91    int i;
92    int a1;
93    short *op = output;
94    int shortpitch = pitch >> 1;
95    a1 = ((input[0] + 4) >> 3);
96
97    for (i = 0; i < 4; i++)
98    {
99        op[0] = a1;
100        op[1] = a1;
101        op[2] = a1;
102        op[3] = a1;
103        op += shortpitch;
104    }
105}
106
107void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
108{
109    int a1 = ((input_dc + 4) >> 3);
110    int r, c;
111
112    for (r = 0; r < 4; r++)
113    {
114        for (c = 0; c < 4; c++)
115        {
116            int a = a1 + pred_ptr[c] ;
117
118            if (a < 0)
119                a = 0;
120
121            if (a > 255)
122                a = 255;
123
124            dst_ptr[c] = (unsigned char) a ;
125        }
126
127        dst_ptr += stride;
128        pred_ptr += pitch;
129    }
130
131}
132
133void vp8_short_inv_walsh4x4_c(short *input, short *output)
134{
135    int i;
136    int a1, b1, c1, d1;
137    int a2, b2, c2, d2;
138    short *ip = input;
139    short *op = output;
140
141    for (i = 0; i < 4; i++)
142    {
143        a1 = ip[0] + ip[12];
144        b1 = ip[4] + ip[8];
145        c1 = ip[4] - ip[8];
146        d1 = ip[0] - ip[12];
147
148        op[0] = a1 + b1;
149        op[4] = c1 + d1;
150        op[8] = a1 - b1;
151        op[12] = d1 - c1;
152        ip++;
153        op++;
154    }
155
156    ip = output;
157    op = output;
158
159    for (i = 0; i < 4; i++)
160    {
161        a1 = ip[0] + ip[3];
162        b1 = ip[1] + ip[2];
163        c1 = ip[1] - ip[2];
164        d1 = ip[0] - ip[3];
165
166        a2 = a1 + b1;
167        b2 = c1 + d1;
168        c2 = a1 - b1;
169        d2 = d1 - c1;
170
171        op[0] = (a2 + 3) >> 3;
172        op[1] = (b2 + 3) >> 3;
173        op[2] = (c2 + 3) >> 3;
174        op[3] = (d2 + 3) >> 3;
175
176        ip += 4;
177        op += 4;
178    }
179}
180
181void vp8_short_inv_walsh4x4_1_c(short *input, short *output)
182{
183    int i;
184    int a1;
185    short *op = output;
186
187    a1 = ((input[0] + 3) >> 3);
188
189    for (i = 0; i < 4; i++)
190    {
191        op[0] = a1;
192        op[1] = a1;
193        op[2] = a1;
194        op[3] = a1;
195        op += 4;
196    }
197}
198