dct_inline.h revision 59f566c4ec3dfc097ad8163523e522280b27e5c3
1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18/*  Filename: dct_inline.h                                                      */
19/*  Description: Implementation for in-line functions used in dct.cpp           */
20/*  Modified:                                                                   */
21/*********************************************************************************/
22#ifndef _DCT_INLINE_H_
23#define _DCT_INLINE_H_
24
25#if !defined(PV_ARM_GCC_V5) && !defined(PV_ARM_GCC_V4)
26
27__inline int32 mla724(int32 op1, int32 op2, int32 op3)
28{
29    int32 out;
30
31    OSCL_UNUSED_ARG(op1);
32
33    out = op2 * 724 + op3; /* op1 is not used here */
34
35    return out;
36}
37
38__inline int32 mla392(int32 k0, int32 k14, int32 round)
39{
40    int32 k1;
41
42    OSCL_UNUSED_ARG(k14);
43
44    k1 = k0 * 392 + round;
45
46    return k1;
47}
48
49__inline int32 mla554(int32 k4, int32 k12, int32 k1)
50{
51    int32 k0;
52
53    OSCL_UNUSED_ARG(k12);
54
55    k0 = k4 * 554 + k1;
56
57    return k0;
58}
59
60__inline int32 mla1338(int32 k6, int32 k14, int32 k1)
61{
62    int32 out;
63
64    OSCL_UNUSED_ARG(k14);
65
66    out = k6 * 1338 + k1;
67
68    return out;
69}
70
71__inline int32 mla946(int32 k6, int32 k14, int32 k1)
72{
73    int32 out;
74
75    OSCL_UNUSED_ARG(k14);
76
77    out = k6 * 946 + k1;
78
79    return out;
80}
81
82__inline int32 sum_abs(int32 k0, int32 k1, int32 k2, int32 k3,
83                       int32 k4, int32 k5, int32 k6, int32 k7)
84{
85    int32 carry, abs_sum;
86
87    carry = k0 >> 31;
88    abs_sum = (k0 ^ carry);
89    carry = k1 >> 31;
90    abs_sum += (k1 ^ carry) - carry;
91    carry = k2 >> 31;
92    abs_sum += (k2 ^ carry) - carry;
93    carry = k3 >> 31;
94    abs_sum += (k3 ^ carry) - carry;
95    carry = k4 >> 31;
96    abs_sum += (k4 ^ carry) - carry;
97    carry = k5 >> 31;
98    abs_sum += (k5 ^ carry) - carry;
99    carry = k6 >> 31;
100    abs_sum += (k6 ^ carry) - carry;
101    carry = k7 >> 31;
102    abs_sum += (k7 ^ carry) - carry;
103
104    return abs_sum;
105}
106
107#elif defined(__CC_ARM)  /* only work with arm v5 */
108
109#if defined(__TARGET_ARCH_5TE)
110
111__inline int32 mla724(int32 op1, int32 op2, int32 op3)
112{
113    int32 out;
114
115    __asm
116    {
117        smlabb out, op1, op2, op3
118    }
119
120    return out;
121}
122
123__inline int32 mla392(int32 k0, int32 k14, int32 round)
124{
125    int32 k1;
126
127    __asm
128    {
129        smlabt k1, k0, k14, round
130    }
131
132    return k1;
133}
134
135__inline int32 mla554(int32 k4, int32 k12, int32 k1)
136{
137    int32 k0;
138
139    __asm
140    {
141        smlabt k0, k4, k12, k1
142    }
143
144    return k0;
145}
146
147__inline int32 mla1338(int32 k6, int32 k14, int32 k1)
148{
149    int32 out;
150
151    __asm
152    {
153        smlabb out, k6, k14, k1
154    }
155
156    return out;
157}
158
159__inline int32 mla946(int32 k6, int32 k14, int32 k1)
160{
161    int32 out;
162
163    __asm
164    {
165        smlabb out, k6, k14, k1
166    }
167
168    return out;
169}
170
171#else // not ARM5TE
172
173
174__inline int32 mla724(int32 op1, int32 op2, int32 op3)
175{
176    int32 out;
177
178    __asm
179    {
180        and out, op2, #0xFFFF
181        mla out, op1, out, op3
182    }
183
184    return out;
185}
186
187__inline int32 mla392(int32 k0, int32 k14, int32 round)
188{
189    int32 k1;
190
191    __asm
192    {
193        mov k1, k14, asr #16
194        mla k1, k0, k1, round
195    }
196
197    return k1;
198}
199
200__inline int32 mla554(int32 k4, int32 k12, int32 k1)
201{
202    int32 k0;
203
204    __asm
205    {
206        mov  k0, k12, asr #16
207        mla k0, k4, k0, k1
208    }
209
210    return k0;
211}
212
213__inline int32 mla1338(int32 k6, int32 k14, int32 k1)
214{
215    int32 out;
216
217    __asm
218    {
219        and out, k14, 0xFFFF
220        mla out, k6, out, k1
221    }
222
223    return out;
224}
225
226__inline int32 mla946(int32 k6, int32 k14, int32 k1)
227{
228    int32 out;
229
230    __asm
231    {
232        and out, k14, 0xFFFF
233        mla out, k6, out, k1
234    }
235
236    return out;
237}
238
239#endif
240
241__inline int32 sum_abs(int32 k0, int32 k1, int32 k2, int32 k3,
242                       int32 k4, int32 k5, int32 k6, int32 k7)
243{
244    int32 carry, abs_sum;
245    __asm
246    {
247        eor     carry, k0, k0, asr #31 ;
248        eors    abs_sum, k1, k1, asr #31 ;
249        adc     abs_sum, abs_sum, carry ;
250        eors    carry,  k2, k2, asr #31 ;
251        adc     abs_sum, abs_sum, carry ;
252        eors    carry,  k3, k3, asr #31 ;
253        adc     abs_sum, abs_sum, carry ;
254        eors    carry,  k4, k4, asr #31 ;
255        adc     abs_sum, abs_sum, carry ;
256        eors    carry,  k5, k5, asr #31 ;
257        adc     abs_sum, abs_sum, carry ;
258        eors    carry,  k6, k6, asr #31 ;
259        adc     abs_sum, abs_sum, carry ;
260        eors    carry,  k7, k7, asr #31 ;
261        adc     abs_sum, abs_sum, carry ;
262    }
263
264    return abs_sum;
265}
266
267#elif ( defined(PV_ARM_GCC_V5) || defined(PV_ARM_GCC_V4) )  /* ARM GNU COMPILER  */
268
269__inline int32 mla724(int32 op1, int32 op2, int32 op3)
270{
271    register int32 out;
272    register int32 aa = (int32)op1;
273    register int32 bb = (int32)op2;
274    register int32 cc = (int32)op3;
275
276    asm volatile("smlabb %0, %1, %2, %3"
277             : "=&r"(out)
278                         : "r"(aa),
279                         "r"(bb),
280                         "r"(cc));
281    return out;
282}
283
284
285__inline int32 mla392(int32 k0, int32 k14, int32 round)
286{
287    register int32 out;
288    register int32 aa = (int32)k0;
289    register int32 bb = (int32)k14;
290    register int32 cc = (int32)round;
291
292    asm volatile("smlabt %0, %1, %2, %3"
293             : "=&r"(out)
294                         : "r"(aa),
295                         "r"(bb),
296                         "r"(cc));
297
298    return out;
299}
300
301__inline int32 mla554(int32 k4, int32 k12, int32 k1)
302{
303    register int32 out;
304    register int32 aa = (int32)k4;
305    register int32 bb = (int32)k12;
306    register int32 cc = (int32)k1;
307
308    asm volatile("smlabt %0, %1, %2, %3"
309             : "=&r"(out)
310                         : "r"(aa),
311                         "r"(bb),
312                         "r"(cc));
313
314    return out;
315}
316
317__inline int32 mla1338(int32 k6, int32 k14, int32 k1)
318{
319    register int32 out;
320    register int32 aa = (int32)k6;
321    register int32 bb = (int32)k14;
322    register int32 cc = (int32)k1;
323
324    asm volatile("smlabb %0, %1, %2, %3"
325             : "=&r"(out)
326                         : "r"(aa),
327                         "r"(bb),
328                         "r"(cc));
329    return out;
330}
331
332__inline int32 mla946(int32 k6, int32 k14, int32 k1)
333{
334    register int32 out;
335    register int32 aa = (int32)k6;
336    register int32 bb = (int32)k14;
337    register int32 cc = (int32)k1;
338
339    asm volatile("smlabb %0, %1, %2, %3"
340             : "=&r"(out)
341                         : "r"(aa),
342                         "r"(bb),
343                         "r"(cc));
344    return out;
345}
346
347__inline int32 sum_abs(int32 k0, int32 k1, int32 k2, int32 k3,
348                       int32 k4, int32 k5, int32 k6, int32 k7)
349{
350    register int32 carry;
351    register int32 abs_sum;
352    register int32 aa = (int32)k0;
353    register int32 bb = (int32)k1;
354    register int32 cc = (int32)k2;
355    register int32 dd = (int32)k3;
356    register int32 ee = (int32)k4;
357    register int32 ff = (int32)k5;
358    register int32 gg = (int32)k6;
359    register int32 hh = (int32)k7;
360
361    asm volatile("eor  %0, %2, %2, asr #31\n\t"
362                 "eors %1, %3, %3, asr #31\n\t"
363                 "adc  %1, %1, %0\n\t"
364                 "eors %0, %4, %4, asr #31\n\t"
365                 "adc  %1, %1, %0\n\t"
366                 "eors %0, %5, %5, asr #31\n\t"
367                 "adc  %1, %1, %0\n\t"
368                 "eors %0, %6, %6, asr #31\n\t"
369                 "adc  %1, %1, %0\n\t"
370                 "eors %0, %7, %7, asr #31\n\t"
371                 "adc  %1, %1, %0\n\t"
372                 "eors %0, %8, %8, asr #31\n\t"
373                 "adc  %1, %1, %0\n\t"
374                 "eors %0, %9, %9, asr #31\n\t"
375                 "adc  %1, %1, %0\n\t"
376
377             : "=&r"(carry),
378                 "=&r"(abs_sum):
379                         "r"(aa),
380                         "r"(bb),
381                         "r"(cc),
382                         "r"(dd),
383                         "r"(ee),
384                         "r"(ff),
385                         "r"(gg),
386                         "r"(hh));
387
388    return abs_sum;
389}
390
391#endif // Diff. OS
392
393#endif //_DCT_INLINE_H_
394
395
396