1/************************************************************************
2 * Copyright (C) 2002-2009, Xiph.org Foundation
3 * Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *     * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *     * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 *     * Neither the names of the Xiph.org Foundation nor Pinknoise
17 * Productions Ltd nor the names of its contributors may be used to
18 * endorse or promote products derived from this software without
19 * specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 ************************************************************************
33
34 function: arm7 and later wide math functions
35
36 ************************************************************************/
37
38#ifdef _ARM_ASSEM_
39
40#if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
41#define _V_WIDE_MATH
42
43static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
44  int lo,hi;
45  asm volatile("smull\t%0, %1, %2, %3"
46               : "=&r"(lo),"=&r"(hi)
47               : "%r"(x),"r"(y)
48	       : "cc");
49  return(hi);
50}
51
52static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
53  return MULT32(x,y)<<1;
54}
55
56static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
57  int lo,hi;
58  asm volatile("smull	%0, %1, %2, %3\n\t"
59	       "movs	%0, %0, lsr #15\n\t"
60	       "adc	%1, %0, %1, lsl #17\n\t"
61               : "=&r"(lo),"=&r"(hi)
62               : "%r"(x),"r"(y)
63	       : "cc");
64  return(hi);
65}
66
67#define MB() asm volatile ("" : : : "memory")
68
69static inline void XPROD32(ogg_int32_t  a, ogg_int32_t  b,
70			   ogg_int32_t  t, ogg_int32_t  v,
71			   ogg_int32_t *x, ogg_int32_t *y)
72{
73  int x1, y1, l;
74  asm(	"smull	%0, %1, %4, %6\n\t"
75	"smlal	%0, %1, %5, %7\n\t"
76	"rsb	%3, %4, #0\n\t"
77	"smull	%0, %2, %5, %6\n\t"
78	"smlal	%0, %2, %3, %7"
79	: "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
80	: "3" (a), "r" (b), "r" (t), "r" (v)
81	: "cc" );
82  *x = x1;
83  MB();
84  *y = y1;
85}
86
87/* x = (a*t + b*v)>>31,    y = (b*t - a*v)>>31 */
88static inline void XPROD31(ogg_int32_t  a, ogg_int32_t  b,
89			   ogg_int32_t  t, ogg_int32_t  v,
90			   ogg_int32_t *x, ogg_int32_t *y)
91{
92  int x1, y1, l;
93  asm(	"smull	%0, %1, %4, %6\n\t"
94	"smlal	%0, %1, %5, %7\n\t"
95	"rsb	%3, %4, #0\n\t"
96	"smull	%0, %2, %5, %6\n\t"
97	"smlal	%0, %2, %3, %7"
98	: "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
99	: "3" (a), "r" (b), "r" (t), "r" (v)
100	: "cc" );
101  *x = x1 << 1;
102  MB();
103  *y = y1 << 1;
104}
105
106/* x = (a*t - b*v)>>31,     y = (b*t + a*v)>>31 */
107static inline void XNPROD31(ogg_int32_t  a, ogg_int32_t  b,
108			    ogg_int32_t  t, ogg_int32_t  v,
109			    ogg_int32_t *x, ogg_int32_t *y)
110{
111  int x1, y1, l;
112  asm(	"rsb	%2, %4, #0\n\t"
113	"smull	%0, %1, %3, %5\n\t"
114	"smlal	%0, %1, %2, %6\n\t"
115	"smull	%0, %2, %4, %5\n\t"
116	"smlal	%0, %2, %3, %6"
117	: "=&r" (l), "=&r" (x1), "=&r" (y1)
118	: "r" (a), "r" (b), "r" (t), "r" (v)
119	: "cc" );
120  *x = x1 << 1;
121  MB();
122  *y = y1 << 1;
123}
124
125#endif
126
127#ifndef _V_CLIP_MATH
128#define _V_CLIP_MATH
129
130static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) {
131  int tmp;
132  asm volatile("subs	%1, %0, #32768\n\t"
133	       "movpl	%0, #0x7f00\n\t"
134	       "orrpl	%0, %0, #0xff\n"
135	       "adds	%1, %0, #32768\n\t"
136	       "movmi	%0, #0x8000"
137	       : "+r"(x),"=r"(tmp)
138	       :
139	       : "cc");
140  return(x);
141}
142
143#endif
144
145#ifndef _V_LSP_MATH_ASM
146#define _V_LSP_MATH_ASM
147
148static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip,
149				ogg_int32_t *qexpp,
150				ogg_int32_t *ilsp,ogg_int32_t wi,
151				ogg_int32_t m){
152
153  ogg_uint32_t qi=*qip,pi=*pip;
154  ogg_int32_t qexp=*qexpp;
155
156  asm("mov     r0,%3;"
157      "mov     r1,%5,asr#1;"
158      "add     r0,r0,r1,lsl#3;"
159      "1:"
160
161      "ldmdb   r0!,{r1,r3};"
162      "subs    r1,r1,%4;"          //ilsp[j]-wi
163      "rsbmi   r1,r1,#0;"          //labs(ilsp[j]-wi)
164      "umull   %0,r2,r1,%0;"       //qi*=labs(ilsp[j]-wi)
165
166      "subs    r1,r3,%4;"          //ilsp[j+1]-wi
167      "rsbmi   r1,r1,#0;"          //labs(ilsp[j+1]-wi)
168      "umull   %1,r3,r1,%1;"       //pi*=labs(ilsp[j+1]-wi)
169
170      "cmn     r2,r3;"             // shift down 16?
171      "beq     0f;"
172      "add     %2,%2,#16;"
173      "mov     %0,%0,lsr #16;"
174      "orr     %0,%0,r2,lsl #16;"
175      "mov     %1,%1,lsr #16;"
176      "orr     %1,%1,r3,lsl #16;"
177      "0:"
178      "cmp     r0,%3;\n"
179      "bhi     1b;\n"
180
181      // odd filter assymetry
182      "ands    r0,%5,#1;\n"
183      "beq     2f;\n"
184      "add     r0,%3,%5,lsl#2;\n"
185
186      "ldr     r1,[r0,#-4];\n"
187      "mov     r0,#0x4000;\n"
188
189      "subs    r1,r1,%4;\n"          //ilsp[j]-wi
190      "rsbmi   r1,r1,#0;\n"          //labs(ilsp[j]-wi)
191      "umull   %0,r2,r1,%0;\n"       //qi*=labs(ilsp[j]-wi)
192      "umull   %1,r3,r0,%1;\n"       //pi*=labs(ilsp[j+1]-wi)
193
194      "cmn     r2,r3;\n"             // shift down 16?
195      "beq     2f;\n"
196      "add     %2,%2,#16;\n"
197      "mov     %0,%0,lsr #16;\n"
198      "orr     %0,%0,r2,lsl #16;\n"
199      "mov     %1,%1,lsr #16;\n"
200      "orr     %1,%1,r3,lsl #16;\n"
201
202      //qi=(pi>>shift)*labs(ilsp[j]-wi);
203      //pi=(qi>>shift)*labs(ilsp[j+1]-wi);
204      //qexp+=shift;
205
206      //}
207
208      /* normalize to max 16 sig figs */
209      "2:"
210      "mov     r2,#0;"
211      "orr     r1,%0,%1;"
212      "tst     r1,#0xff000000;"
213      "addne   r2,r2,#8;"
214      "movne   r1,r1,lsr #8;"
215      "tst     r1,#0x00f00000;"
216      "addne   r2,r2,#4;"
217      "movne   r1,r1,lsr #4;"
218      "tst     r1,#0x000c0000;"
219      "addne   r2,r2,#2;"
220      "movne   r1,r1,lsr #2;"
221      "tst     r1,#0x00020000;"
222      "addne   r2,r2,#1;"
223      "movne   r1,r1,lsr #1;"
224      "tst     r1,#0x00010000;"
225      "addne   r2,r2,#1;"
226      "mov     %0,%0,lsr r2;"
227      "mov     %1,%1,lsr r2;"
228      "add     %2,%2,r2;"
229
230      : "+r"(qi),"+r"(pi),"+r"(qexp)
231      : "r"(ilsp),"r"(wi),"r"(m)
232      : "r0","r1","r2","r3","cc");
233
234  *qip=qi;
235  *pip=pi;
236  *qexpp=qexp;
237}
238
239static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){
240
241  ogg_uint32_t qi=*qip;
242  ogg_int32_t qexp=*qexpp;
243
244  asm("tst     %0,#0x0000ff00;"
245      "moveq   %0,%0,lsl #8;"
246      "subeq   %1,%1,#8;"
247      "tst     %0,#0x0000f000;"
248      "moveq   %0,%0,lsl #4;"
249      "subeq   %1,%1,#4;"
250      "tst     %0,#0x0000c000;"
251      "moveq   %0,%0,lsl #2;"
252      "subeq   %1,%1,#2;"
253      "tst     %0,#0x00008000;"
254      "moveq   %0,%0,lsl #1;"
255      "subeq   %1,%1,#1;"
256      : "+r"(qi),"+r"(qexp)
257      :
258      : "cc");
259  *qip=qi;
260  *qexpp=qexp;
261}
262
263#endif
264#endif
265
266