1/*
2 *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
12#include "webrtc/typedefs.h"
13
14static void bitrv2_128_mips(float* a) {
15  // n is 128
16  float xr, xi, yr, yi;
17
18  xr = a[8];
19  xi = a[9];
20  yr = a[16];
21  yi = a[17];
22  a[8] = yr;
23  a[9] = yi;
24  a[16] = xr;
25  a[17] = xi;
26
27  xr = a[64];
28  xi = a[65];
29  yr = a[2];
30  yi = a[3];
31  a[64] = yr;
32  a[65] = yi;
33  a[2] = xr;
34  a[3] = xi;
35
36  xr = a[72];
37  xi = a[73];
38  yr = a[18];
39  yi = a[19];
40  a[72] = yr;
41  a[73] = yi;
42  a[18] = xr;
43  a[19] = xi;
44
45  xr = a[80];
46  xi = a[81];
47  yr = a[10];
48  yi = a[11];
49  a[80] = yr;
50  a[81] = yi;
51  a[10] = xr;
52  a[11] = xi;
53
54  xr = a[88];
55  xi = a[89];
56  yr = a[26];
57  yi = a[27];
58  a[88] = yr;
59  a[89] = yi;
60  a[26] = xr;
61  a[27] = xi;
62
63  xr = a[74];
64  xi = a[75];
65  yr = a[82];
66  yi = a[83];
67  a[74] = yr;
68  a[75] = yi;
69  a[82] = xr;
70  a[83] = xi;
71
72  xr = a[32];
73  xi = a[33];
74  yr = a[4];
75  yi = a[5];
76  a[32] = yr;
77  a[33] = yi;
78  a[4] = xr;
79  a[5] = xi;
80
81  xr = a[40];
82  xi = a[41];
83  yr = a[20];
84  yi = a[21];
85  a[40] = yr;
86  a[41] = yi;
87  a[20] = xr;
88  a[21] = xi;
89
90  xr = a[48];
91  xi = a[49];
92  yr = a[12];
93  yi = a[13];
94  a[48] = yr;
95  a[49] = yi;
96  a[12] = xr;
97  a[13] = xi;
98
99  xr = a[56];
100  xi = a[57];
101  yr = a[28];
102  yi = a[29];
103  a[56] = yr;
104  a[57] = yi;
105  a[28] = xr;
106  a[29] = xi;
107
108  xr = a[34];
109  xi = a[35];
110  yr = a[68];
111  yi = a[69];
112  a[34] = yr;
113  a[35] = yi;
114  a[68] = xr;
115  a[69] = xi;
116
117  xr = a[42];
118  xi = a[43];
119  yr = a[84];
120  yi = a[85];
121  a[42] = yr;
122  a[43] = yi;
123  a[84] = xr;
124  a[85] = xi;
125
126  xr = a[50];
127  xi = a[51];
128  yr = a[76];
129  yi = a[77];
130  a[50] = yr;
131  a[51] = yi;
132  a[76] = xr;
133  a[77] = xi;
134
135  xr = a[58];
136  xi = a[59];
137  yr = a[92];
138  yi = a[93];
139  a[58] = yr;
140  a[59] = yi;
141  a[92] = xr;
142  a[93] = xi;
143
144  xr = a[44];
145  xi = a[45];
146  yr = a[52];
147  yi = a[53];
148  a[44] = yr;
149  a[45] = yi;
150  a[52] = xr;
151  a[53] = xi;
152
153  xr = a[96];
154  xi = a[97];
155  yr = a[6];
156  yi = a[7];
157  a[96] = yr;
158  a[97] = yi;
159  a[6] = xr;
160  a[7] = xi;
161
162  xr = a[104];
163  xi = a[105];
164  yr = a[22];
165  yi = a[23];
166  a[104] = yr;
167  a[105] = yi;
168  a[22] = xr;
169  a[23] = xi;
170
171  xr = a[112];
172  xi = a[113];
173  yr = a[14];
174  yi = a[15];
175  a[112] = yr;
176  a[113] = yi;
177  a[14] = xr;
178  a[15] = xi;
179
180  xr = a[120];
181  xi = a[121];
182  yr = a[30];
183  yi = a[31];
184  a[120] = yr;
185  a[121] = yi;
186  a[30] = xr;
187  a[31] = xi;
188
189  xr = a[98];
190  xi = a[99];
191  yr = a[70];
192  yi = a[71];
193  a[98] = yr;
194  a[99] = yi;
195  a[70] = xr;
196  a[71] = xi;
197
198  xr = a[106];
199  xi = a[107];
200  yr = a[86];
201  yi = a[87];
202  a[106] = yr;
203  a[107] = yi;
204  a[86] = xr;
205  a[87] = xi;
206
207  xr = a[114];
208  xi = a[115];
209  yr = a[78];
210  yi = a[79];
211  a[114] = yr;
212  a[115] = yi;
213  a[78] = xr;
214  a[79] = xi;
215
216  xr = a[122];
217  xi = a[123];
218  yr = a[94];
219  yi = a[95];
220  a[122] = yr;
221  a[123] = yi;
222  a[94] = xr;
223  a[95] = xi;
224
225  xr = a[100];
226  xi = a[101];
227  yr = a[38];
228  yi = a[39];
229  a[100] = yr;
230  a[101] = yi;
231  a[38] = xr;
232  a[39] = xi;
233
234  xr = a[108];
235  xi = a[109];
236  yr = a[54];
237  yi = a[55];
238  a[108] = yr;
239  a[109] = yi;
240  a[54] = xr;
241  a[55] = xi;
242
243  xr = a[116];
244  xi = a[117];
245  yr = a[46];
246  yi = a[47];
247  a[116] = yr;
248  a[117] = yi;
249  a[46] = xr;
250  a[47] = xi;
251
252  xr = a[124];
253  xi = a[125];
254  yr = a[62];
255  yi = a[63];
256  a[124] = yr;
257  a[125] = yi;
258  a[62] = xr;
259  a[63] = xi;
260
261  xr = a[110];
262  xi = a[111];
263  yr = a[118];
264  yi = a[119];
265  a[110] = yr;
266  a[111] = yi;
267  a[118] = xr;
268  a[119] = xi;
269}
270
271static void cft1st_128_mips(float* a) {
272  float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
273  int a_ptr, p1_rdft, p2_rdft, count;
274  const float* first = rdft_wk3ri_first;
275  const float* second = rdft_wk3ri_second;
276
277  __asm __volatile (
278    ".set       push                                                    \n\t"
279    ".set       noreorder                                               \n\t"
280    // first 8
281    "lwc1       %[f0],        0(%[a])                                   \n\t"
282    "lwc1       %[f1],        4(%[a])                                   \n\t"
283    "lwc1       %[f2],        8(%[a])                                   \n\t"
284    "lwc1       %[f3],        12(%[a])                                  \n\t"
285    "lwc1       %[f4],        16(%[a])                                  \n\t"
286    "lwc1       %[f5],        20(%[a])                                  \n\t"
287    "lwc1       %[f6],        24(%[a])                                  \n\t"
288    "lwc1       %[f7],        28(%[a])                                  \n\t"
289    "add.s      %[f8],        %[f0],        %[f2]                       \n\t"
290    "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
291    "add.s      %[f2],        %[f4],        %[f6]                       \n\t"
292    "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
293    "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
294    "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
295    "add.s      %[f3],        %[f5],        %[f7]                       \n\t"
296    "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
297    "add.s      %[f7],        %[f8],        %[f2]                       \n\t"
298    "sub.s      %[f8],        %[f8],        %[f2]                       \n\t"
299    "sub.s      %[f2],        %[f1],        %[f4]                       \n\t"
300    "add.s      %[f1],        %[f1],        %[f4]                       \n\t"
301    "add.s      %[f4],        %[f6],        %[f3]                       \n\t"
302    "sub.s      %[f6],        %[f6],        %[f3]                       \n\t"
303    "sub.s      %[f3],        %[f0],        %[f5]                       \n\t"
304    "add.s      %[f0],        %[f0],        %[f5]                       \n\t"
305    "swc1       %[f7],        0(%[a])                                   \n\t"
306    "swc1       %[f8],        16(%[a])                                  \n\t"
307    "swc1       %[f2],        28(%[a])                                  \n\t"
308    "swc1       %[f1],        12(%[a])                                  \n\t"
309    "swc1       %[f4],        4(%[a])                                   \n\t"
310    "swc1       %[f6],        20(%[a])                                  \n\t"
311    "swc1       %[f3],        8(%[a])                                   \n\t"
312    "swc1       %[f0],        24(%[a])                                  \n\t"
313    // second 8
314    "lwc1       %[f0],        32(%[a])                                  \n\t"
315    "lwc1       %[f1],        36(%[a])                                  \n\t"
316    "lwc1       %[f2],        40(%[a])                                  \n\t"
317    "lwc1       %[f3],        44(%[a])                                  \n\t"
318    "lwc1       %[f4],        48(%[a])                                  \n\t"
319    "lwc1       %[f5],        52(%[a])                                  \n\t"
320    "lwc1       %[f6],        56(%[a])                                  \n\t"
321    "lwc1       %[f7],        60(%[a])                                  \n\t"
322    "add.s      %[f8],        %[f4],        %[f6]                       \n\t"
323    "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
324    "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
325    "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
326    "add.s      %[f3],        %[f0],        %[f2]                       \n\t"
327    "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
328    "add.s      %[f2],        %[f5],        %[f7]                       \n\t"
329    "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
330    "add.s      %[f7],        %[f4],        %[f1]                       \n\t"
331    "sub.s      %[f4],        %[f4],        %[f1]                       \n\t"
332    "add.s      %[f1],        %[f3],        %[f8]                       \n\t"
333    "sub.s      %[f3],        %[f3],        %[f8]                       \n\t"
334    "sub.s      %[f8],        %[f0],        %[f5]                       \n\t"
335    "add.s      %[f0],        %[f0],        %[f5]                       \n\t"
336    "add.s      %[f5],        %[f6],        %[f2]                       \n\t"
337    "sub.s      %[f6],        %[f2],        %[f6]                       \n\t"
338    "lwc1       %[f9],        8(%[rdft_w])                              \n\t"
339    "sub.s      %[f2],        %[f8],        %[f7]                       \n\t"
340    "add.s      %[f8],        %[f8],        %[f7]                       \n\t"
341    "sub.s      %[f7],        %[f4],        %[f0]                       \n\t"
342    "add.s      %[f4],        %[f4],        %[f0]                       \n\t"
343    // prepare for loop
344    "addiu      %[a_ptr],     %[a],         64                          \n\t"
345    "addiu      %[p1_rdft],   %[rdft_w],    8                           \n\t"
346    "addiu      %[p2_rdft],   %[rdft_w],    16                          \n\t"
347    "addiu      %[count],     $zero,        7                           \n\t"
348    // finish second 8
349    "mul.s      %[f2],        %[f9],        %[f2]                       \n\t"
350    "mul.s      %[f8],        %[f9],        %[f8]                       \n\t"
351    "mul.s      %[f7],        %[f9],        %[f7]                       \n\t"
352    "mul.s      %[f4],        %[f9],        %[f4]                       \n\t"
353    "swc1       %[f1],        32(%[a])                                  \n\t"
354    "swc1       %[f3],        52(%[a])                                  \n\t"
355    "swc1       %[f5],        36(%[a])                                  \n\t"
356    "swc1       %[f6],        48(%[a])                                  \n\t"
357    "swc1       %[f2],        40(%[a])                                  \n\t"
358    "swc1       %[f8],        44(%[a])                                  \n\t"
359    "swc1       %[f7],        56(%[a])                                  \n\t"
360    "swc1       %[f4],        60(%[a])                                  \n\t"
361    // loop
362   "1:                                                                  \n\t"
363    "lwc1       %[f0],        0(%[a_ptr])                               \n\t"
364    "lwc1       %[f1],        4(%[a_ptr])                               \n\t"
365    "lwc1       %[f2],        8(%[a_ptr])                               \n\t"
366    "lwc1       %[f3],        12(%[a_ptr])                              \n\t"
367    "lwc1       %[f4],        16(%[a_ptr])                              \n\t"
368    "lwc1       %[f5],        20(%[a_ptr])                              \n\t"
369    "lwc1       %[f6],        24(%[a_ptr])                              \n\t"
370    "lwc1       %[f7],        28(%[a_ptr])                              \n\t"
371    "add.s      %[f8],        %[f0],        %[f2]                       \n\t"
372    "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
373    "add.s      %[f2],        %[f4],        %[f6]                       \n\t"
374    "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
375    "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
376    "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
377    "add.s      %[f3],        %[f5],        %[f7]                       \n\t"
378    "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
379    "lwc1       %[f10],       4(%[p1_rdft])                             \n\t"
380    "lwc1       %[f11],       0(%[p2_rdft])                             \n\t"
381    "lwc1       %[f12],       4(%[p2_rdft])                             \n\t"
382    "lwc1       %[f13],       8(%[first])                               \n\t"
383    "lwc1       %[f14],       12(%[first])                              \n\t"
384    "add.s      %[f7],        %[f8],        %[f2]                       \n\t"
385    "sub.s      %[f8],        %[f8],        %[f2]                       \n\t"
386    "add.s      %[f2],        %[f6],        %[f3]                       \n\t"
387    "sub.s      %[f6],        %[f6],        %[f3]                       \n\t"
388    "add.s      %[f3],        %[f0],        %[f5]                       \n\t"
389    "sub.s      %[f0],        %[f0],        %[f5]                       \n\t"
390    "add.s      %[f5],        %[f1],        %[f4]                       \n\t"
391    "sub.s      %[f1],        %[f1],        %[f4]                       \n\t"
392    "swc1       %[f7],        0(%[a_ptr])                               \n\t"
393    "swc1       %[f2],        4(%[a_ptr])                               \n\t"
394    "mul.s      %[f4],        %[f9],        %[f8]                       \n\t"
395#if defined(MIPS32_R2_LE)
396    "mul.s      %[f8],        %[f10],       %[f8]                       \n\t"
397    "mul.s      %[f7],        %[f11],       %[f0]                       \n\t"
398    "mul.s      %[f0],        %[f12],       %[f0]                       \n\t"
399    "mul.s      %[f2],        %[f13],       %[f3]                       \n\t"
400    "mul.s      %[f3],        %[f14],       %[f3]                       \n\t"
401    "nmsub.s    %[f4],        %[f4],        %[f10],       %[f6]         \n\t"
402    "madd.s     %[f8],        %[f8],        %[f9],        %[f6]         \n\t"
403    "nmsub.s    %[f7],        %[f7],        %[f12],       %[f5]         \n\t"
404    "madd.s     %[f0],        %[f0],        %[f11],       %[f5]         \n\t"
405    "nmsub.s    %[f2],        %[f2],        %[f14],       %[f1]         \n\t"
406    "madd.s     %[f3],        %[f3],        %[f13],       %[f1]         \n\t"
407#else
408    "mul.s      %[f7],        %[f10],       %[f6]                       \n\t"
409    "mul.s      %[f6],        %[f9],        %[f6]                       \n\t"
410    "mul.s      %[f8],        %[f10],       %[f8]                       \n\t"
411    "mul.s      %[f2],        %[f11],       %[f0]                       \n\t"
412    "mul.s      %[f11],       %[f11],       %[f5]                       \n\t"
413    "mul.s      %[f5],        %[f12],       %[f5]                       \n\t"
414    "mul.s      %[f0],        %[f12],       %[f0]                       \n\t"
415    "mul.s      %[f12],       %[f13],       %[f3]                       \n\t"
416    "mul.s      %[f13],       %[f13],       %[f1]                       \n\t"
417    "mul.s      %[f1],        %[f14],       %[f1]                       \n\t"
418    "mul.s      %[f3],        %[f14],       %[f3]                       \n\t"
419    "sub.s      %[f4],        %[f4],        %[f7]                       \n\t"
420    "add.s      %[f8],        %[f6],        %[f8]                       \n\t"
421    "sub.s      %[f7],        %[f2],        %[f5]                       \n\t"
422    "add.s      %[f0],        %[f11],       %[f0]                       \n\t"
423    "sub.s      %[f2],        %[f12],       %[f1]                       \n\t"
424    "add.s      %[f3],        %[f13],       %[f3]                       \n\t"
425#endif
426    "swc1       %[f4],        16(%[a_ptr])                              \n\t"
427    "swc1       %[f8],        20(%[a_ptr])                              \n\t"
428    "swc1       %[f7],        8(%[a_ptr])                               \n\t"
429    "swc1       %[f0],        12(%[a_ptr])                              \n\t"
430    "swc1       %[f2],        24(%[a_ptr])                              \n\t"
431    "swc1       %[f3],        28(%[a_ptr])                              \n\t"
432    "lwc1       %[f0],        32(%[a_ptr])                              \n\t"
433    "lwc1       %[f1],        36(%[a_ptr])                              \n\t"
434    "lwc1       %[f2],        40(%[a_ptr])                              \n\t"
435    "lwc1       %[f3],        44(%[a_ptr])                              \n\t"
436    "lwc1       %[f4],        48(%[a_ptr])                              \n\t"
437    "lwc1       %[f5],        52(%[a_ptr])                              \n\t"
438    "lwc1       %[f6],        56(%[a_ptr])                              \n\t"
439    "lwc1       %[f7],        60(%[a_ptr])                              \n\t"
440    "add.s      %[f8],        %[f0],        %[f2]                       \n\t"
441    "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
442    "add.s      %[f2],        %[f4],        %[f6]                       \n\t"
443    "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
444    "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
445    "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
446    "add.s      %[f3],        %[f5],        %[f7]                       \n\t"
447    "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
448    "lwc1       %[f11],       8(%[p2_rdft])                             \n\t"
449    "lwc1       %[f12],       12(%[p2_rdft])                            \n\t"
450    "lwc1       %[f13],       8(%[second])                              \n\t"
451    "lwc1       %[f14],       12(%[second])                             \n\t"
452    "add.s      %[f7],        %[f8],        %[f2]                       \n\t"
453    "sub.s      %[f8],        %[f2],        %[f8]                       \n\t"
454    "add.s      %[f2],        %[f6],        %[f3]                       \n\t"
455    "sub.s      %[f6],        %[f3],        %[f6]                       \n\t"
456    "add.s      %[f3],        %[f0],        %[f5]                       \n\t"
457    "sub.s      %[f0],        %[f0],        %[f5]                       \n\t"
458    "add.s      %[f5],        %[f1],        %[f4]                       \n\t"
459    "sub.s      %[f1],        %[f1],        %[f4]                       \n\t"
460    "swc1       %[f7],        32(%[a_ptr])                              \n\t"
461    "swc1       %[f2],        36(%[a_ptr])                              \n\t"
462    "mul.s      %[f4],        %[f10],       %[f8]                       \n\t"
463#if defined(MIPS32_R2_LE)
464    "mul.s      %[f10],       %[f10],       %[f6]                       \n\t"
465    "mul.s      %[f7],        %[f11],       %[f0]                       \n\t"
466    "mul.s      %[f11],       %[f11],       %[f5]                       \n\t"
467    "mul.s      %[f2],        %[f13],       %[f3]                       \n\t"
468    "mul.s      %[f13],       %[f13],       %[f1]                       \n\t"
469    "madd.s     %[f4],        %[f4],        %[f9],        %[f6]         \n\t"
470    "nmsub.s    %[f10],       %[f10],       %[f9],        %[f8]         \n\t"
471    "nmsub.s    %[f7],        %[f7],        %[f12],       %[f5]         \n\t"
472    "madd.s     %[f11],       %[f11],       %[f12],       %[f0]         \n\t"
473    "nmsub.s    %[f2],        %[f2],        %[f14],       %[f1]         \n\t"
474    "madd.s     %[f13],       %[f13],       %[f14],       %[f3]         \n\t"
475#else
476    "mul.s      %[f2],        %[f9],        %[f6]                       \n\t"
477    "mul.s      %[f10],       %[f10],       %[f6]                       \n\t"
478    "mul.s      %[f9],        %[f9],        %[f8]                       \n\t"
479    "mul.s      %[f7],        %[f11],       %[f0]                       \n\t"
480    "mul.s      %[f8],        %[f12],       %[f5]                       \n\t"
481    "mul.s      %[f11],       %[f11],       %[f5]                       \n\t"
482    "mul.s      %[f12],       %[f12],       %[f0]                       \n\t"
483    "mul.s      %[f5],        %[f13],       %[f3]                       \n\t"
484    "mul.s      %[f0],        %[f14],       %[f1]                       \n\t"
485    "mul.s      %[f13],       %[f13],       %[f1]                       \n\t"
486    "mul.s      %[f14],       %[f14],       %[f3]                       \n\t"
487    "add.s      %[f4],        %[f4],        %[f2]                       \n\t"
488    "sub.s      %[f10],       %[f10],       %[f9]                       \n\t"
489    "sub.s      %[f7],        %[f7],        %[f8]                       \n\t"
490    "add.s      %[f11],       %[f11],       %[f12]                      \n\t"
491    "sub.s      %[f2],        %[f5],        %[f0]                       \n\t"
492    "add.s      %[f13],       %[f13],       %[f14]                      \n\t"
493#endif
494    "swc1       %[f4],        48(%[a_ptr])                              \n\t"
495    "swc1       %[f10],       52(%[a_ptr])                              \n\t"
496    "swc1       %[f7],        40(%[a_ptr])                              \n\t"
497    "swc1       %[f11],       44(%[a_ptr])                              \n\t"
498    "swc1       %[f2],        56(%[a_ptr])                              \n\t"
499    "swc1       %[f13],       60(%[a_ptr])                              \n\t"
500    "addiu      %[count],     %[count],     -1                          \n\t"
501    "lwc1       %[f9],        8(%[p1_rdft])                             \n\t"
502    "addiu      %[a_ptr],     %[a_ptr],     64                          \n\t"
503    "addiu      %[p1_rdft],   %[p1_rdft],   8                           \n\t"
504    "addiu      %[p2_rdft],   %[p2_rdft],   16                          \n\t"
505    "addiu      %[first],     %[first],     8                           \n\t"
506    "bgtz       %[count],     1b                                        \n\t"
507    " addiu     %[second],    %[second],    8                           \n\t"
508    ".set       pop                                                     \n\t"
509    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
510      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
511      [f8] "=&f" (f8), [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
512      [f12] "=&f" (f12), [f13] "=&f" (f13), [f14] "=&f" (f14),
513      [a_ptr] "=&r" (a_ptr), [p1_rdft] "=&r" (p1_rdft), [first] "+r" (first),
514      [p2_rdft] "=&r" (p2_rdft), [count] "=&r" (count), [second] "+r" (second)
515    : [a] "r" (a), [rdft_w] "r" (rdft_w)
516    : "memory"
517  );
518}
519
520static void cftmdl_128_mips(float* a) {
521  float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
522  int tmp_a, count;
523  __asm __volatile (
524    ".set       push                                      \n\t"
525    ".set       noreorder                                 \n\t"
526    "addiu      %[tmp_a],   %[a],         0               \n\t"
527    "addiu      %[count],   $zero,        4               \n\t"
528   "1:                                                    \n\t"
529    "addiu      %[count],   %[count],     -1              \n\t"
530    "lwc1       %[f0],      0(%[tmp_a])                   \n\t"
531    "lwc1       %[f2],      32(%[tmp_a])                  \n\t"
532    "lwc1       %[f4],      64(%[tmp_a])                  \n\t"
533    "lwc1       %[f6],      96(%[tmp_a])                  \n\t"
534    "lwc1       %[f1],      4(%[tmp_a])                   \n\t"
535    "lwc1       %[f3],      36(%[tmp_a])                  \n\t"
536    "lwc1       %[f5],      68(%[tmp_a])                  \n\t"
537    "lwc1       %[f7],      100(%[tmp_a])                 \n\t"
538    "add.s      %[f8],      %[f0],        %[f2]           \n\t"
539    "sub.s      %[f0],      %[f0],        %[f2]           \n\t"
540    "add.s      %[f2],      %[f4],        %[f6]           \n\t"
541    "sub.s      %[f4],      %[f4],        %[f6]           \n\t"
542    "add.s      %[f6],      %[f1],        %[f3]           \n\t"
543    "sub.s      %[f1],      %[f1],        %[f3]           \n\t"
544    "add.s      %[f3],      %[f5],        %[f7]           \n\t"
545    "sub.s      %[f5],      %[f5],        %[f7]           \n\t"
546    "add.s      %[f7],      %[f8],        %[f2]           \n\t"
547    "sub.s      %[f8],      %[f8],        %[f2]           \n\t"
548    "add.s      %[f2],      %[f1],        %[f4]           \n\t"
549    "sub.s      %[f1],      %[f1],        %[f4]           \n\t"
550    "add.s      %[f4],      %[f6],        %[f3]           \n\t"
551    "sub.s      %[f6],      %[f6],        %[f3]           \n\t"
552    "sub.s      %[f3],      %[f0],        %[f5]           \n\t"
553    "add.s      %[f0],      %[f0],        %[f5]           \n\t"
554    "swc1       %[f7],      0(%[tmp_a])                   \n\t"
555    "swc1       %[f8],      64(%[tmp_a])                  \n\t"
556    "swc1       %[f2],      36(%[tmp_a])                  \n\t"
557    "swc1       %[f1],      100(%[tmp_a])                 \n\t"
558    "swc1       %[f4],      4(%[tmp_a])                   \n\t"
559    "swc1       %[f6],      68(%[tmp_a])                  \n\t"
560    "swc1       %[f3],      32(%[tmp_a])                  \n\t"
561    "swc1       %[f0],      96(%[tmp_a])                  \n\t"
562    "bgtz       %[count],   1b                            \n\t"
563    " addiu     %[tmp_a],   %[tmp_a],     8               \n\t"
564    ".set       pop                                       \n\t"
565    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
566      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
567      [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
568    : [a] "r" (a)
569    : "memory"
570  );
571  f9 = rdft_w[2];
572  __asm __volatile (
573    ".set       push                                      \n\t"
574    ".set       noreorder                                 \n\t"
575    "addiu      %[tmp_a],   %[a],         128             \n\t"
576    "addiu      %[count],   $zero,        4               \n\t"
577   "1:                                                    \n\t"
578    "addiu      %[count],   %[count],     -1              \n\t"
579    "lwc1       %[f0],      0(%[tmp_a])                   \n\t"
580    "lwc1       %[f2],      32(%[tmp_a])                  \n\t"
581    "lwc1       %[f5],      68(%[tmp_a])                  \n\t"
582    "lwc1       %[f7],      100(%[tmp_a])                 \n\t"
583    "lwc1       %[f1],      4(%[tmp_a])                   \n\t"
584    "lwc1       %[f3],      36(%[tmp_a])                  \n\t"
585    "lwc1       %[f4],      64(%[tmp_a])                  \n\t"
586    "lwc1       %[f6],      96(%[tmp_a])                  \n\t"
587    "sub.s      %[f8],      %[f0],        %[f2]           \n\t"
588    "add.s      %[f0],      %[f0],        %[f2]           \n\t"
589    "sub.s      %[f2],      %[f5],        %[f7]           \n\t"
590    "add.s      %[f5],      %[f5],        %[f7]           \n\t"
591    "sub.s      %[f7],      %[f1],        %[f3]           \n\t"
592    "add.s      %[f1],      %[f1],        %[f3]           \n\t"
593    "sub.s      %[f3],      %[f4],        %[f6]           \n\t"
594    "add.s      %[f4],      %[f4],        %[f6]           \n\t"
595    "sub.s      %[f6],      %[f8],        %[f2]           \n\t"
596    "add.s      %[f8],      %[f8],        %[f2]           \n\t"
597    "add.s      %[f2],      %[f5],        %[f1]           \n\t"
598    "sub.s      %[f5],      %[f5],        %[f1]           \n\t"
599    "add.s      %[f1],      %[f3],        %[f7]           \n\t"
600    "sub.s      %[f3],      %[f3],        %[f7]           \n\t"
601    "add.s      %[f7],      %[f0],        %[f4]           \n\t"
602    "sub.s      %[f0],      %[f0],        %[f4]           \n\t"
603    "sub.s      %[f4],      %[f6],        %[f1]           \n\t"
604    "add.s      %[f6],      %[f6],        %[f1]           \n\t"
605    "sub.s      %[f1],      %[f3],        %[f8]           \n\t"
606    "add.s      %[f3],      %[f3],        %[f8]           \n\t"
607    "mul.s      %[f4],      %[f4],        %[f9]           \n\t"
608    "mul.s      %[f6],      %[f6],        %[f9]           \n\t"
609    "mul.s      %[f1],      %[f1],        %[f9]           \n\t"
610    "mul.s      %[f3],      %[f3],        %[f9]           \n\t"
611    "swc1       %[f7],      0(%[tmp_a])                   \n\t"
612    "swc1       %[f2],      4(%[tmp_a])                   \n\t"
613    "swc1       %[f5],      64(%[tmp_a])                  \n\t"
614    "swc1       %[f0],      68(%[tmp_a])                  \n\t"
615    "swc1       %[f4],      32(%[tmp_a])                  \n\t"
616    "swc1       %[f6],      36(%[tmp_a])                  \n\t"
617    "swc1       %[f1],      96(%[tmp_a])                  \n\t"
618    "swc1       %[f3],      100(%[tmp_a])                 \n\t"
619    "bgtz       %[count],   1b                            \n\t"
620    " addiu     %[tmp_a],   %[tmp_a],     8               \n\t"
621    ".set       pop                                       \n\t"
622    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
623      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
624      [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
625    : [a] "r" (a), [f9] "f" (f9)
626    : "memory"
627  );
628  f10 = rdft_w[3];
629  f11 = rdft_w[4];
630  f12 = rdft_w[5];
631  f13 = rdft_wk3ri_first[2];
632  f14 = rdft_wk3ri_first[3];
633
634  __asm __volatile (
635    ".set       push                                                    \n\t"
636    ".set       noreorder                                               \n\t"
637    "addiu      %[tmp_a],     %[a],         256                         \n\t"
638    "addiu      %[count],     $zero,        4                           \n\t"
639   "1:                                                                  \n\t"
640    "addiu      %[count],     %[count],     -1                          \n\t"
641    "lwc1       %[f0],        0(%[tmp_a])                               \n\t"
642    "lwc1       %[f2],        32(%[tmp_a])                              \n\t"
643    "lwc1       %[f4],        64(%[tmp_a])                              \n\t"
644    "lwc1       %[f6],        96(%[tmp_a])                              \n\t"
645    "lwc1       %[f1],        4(%[tmp_a])                               \n\t"
646    "lwc1       %[f3],        36(%[tmp_a])                              \n\t"
647    "lwc1       %[f5],        68(%[tmp_a])                              \n\t"
648    "lwc1       %[f7],        100(%[tmp_a])                             \n\t"
649    "add.s      %[f8],        %[f0],        %[f2]                       \n\t"
650    "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
651    "add.s      %[f2],        %[f4],        %[f6]                       \n\t"
652    "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
653    "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
654    "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
655    "add.s      %[f3],        %[f5],        %[f7]                       \n\t"
656    "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
657    "sub.s      %[f7],        %[f8],        %[f2]                       \n\t"
658    "add.s      %[f8],        %[f8],        %[f2]                       \n\t"
659    "add.s      %[f2],        %[f1],        %[f4]                       \n\t"
660    "sub.s      %[f1],        %[f1],        %[f4]                       \n\t"
661    "sub.s      %[f4],        %[f6],        %[f3]                       \n\t"
662    "add.s      %[f6],        %[f6],        %[f3]                       \n\t"
663    "sub.s      %[f3],        %[f0],        %[f5]                       \n\t"
664    "add.s      %[f0],        %[f0],        %[f5]                       \n\t"
665    "swc1       %[f8],        0(%[tmp_a])                               \n\t"
666    "swc1       %[f6],        4(%[tmp_a])                               \n\t"
667    "mul.s      %[f5],        %[f9],        %[f7]                       \n\t"
668#if defined(MIPS32_R2_LE)
669    "mul.s      %[f7],        %[f10],       %[f7]                       \n\t"
670    "mul.s      %[f8],        %[f11],       %[f3]                       \n\t"
671    "mul.s      %[f3],        %[f12],       %[f3]                       \n\t"
672    "mul.s      %[f6],        %[f13],       %[f0]                       \n\t"
673    "mul.s      %[f0],        %[f14],       %[f0]                       \n\t"
674    "nmsub.s    %[f5],        %[f5],        %[f10],       %[f4]         \n\t"
675    "madd.s     %[f7],        %[f7],        %[f9],        %[f4]         \n\t"
676    "nmsub.s    %[f8],        %[f8],        %[f12],       %[f2]         \n\t"
677    "madd.s     %[f3],        %[f3],        %[f11],       %[f2]         \n\t"
678    "nmsub.s    %[f6],        %[f6],        %[f14],       %[f1]         \n\t"
679    "madd.s     %[f0],        %[f0],        %[f13],       %[f1]         \n\t"
680    "swc1       %[f5],        64(%[tmp_a])                              \n\t"
681    "swc1       %[f7],        68(%[tmp_a])                              \n\t"
682#else
683    "mul.s      %[f8],        %[f10],       %[f4]                       \n\t"
684    "mul.s      %[f4],        %[f9],        %[f4]                       \n\t"
685    "mul.s      %[f7],        %[f10],       %[f7]                       \n\t"
686    "mul.s      %[f6],        %[f11],       %[f3]                       \n\t"
687    "mul.s      %[f3],        %[f12],       %[f3]                       \n\t"
688    "sub.s      %[f5],        %[f5],        %[f8]                       \n\t"
689    "mul.s      %[f8],        %[f12],       %[f2]                       \n\t"
690    "mul.s      %[f2],        %[f11],       %[f2]                       \n\t"
691    "add.s      %[f7],        %[f4],        %[f7]                       \n\t"
692    "mul.s      %[f4],        %[f13],       %[f0]                       \n\t"
693    "mul.s      %[f0],        %[f14],       %[f0]                       \n\t"
694    "sub.s      %[f8],        %[f6],        %[f8]                       \n\t"
695    "mul.s      %[f6],        %[f14],       %[f1]                       \n\t"
696    "mul.s      %[f1],        %[f13],       %[f1]                       \n\t"
697    "add.s      %[f3],        %[f2],        %[f3]                       \n\t"
698    "swc1       %[f5],        64(%[tmp_a])                              \n\t"
699    "swc1       %[f7],        68(%[tmp_a])                              \n\t"
700    "sub.s      %[f6],        %[f4],        %[f6]                       \n\t"
701    "add.s      %[f0],        %[f1],        %[f0]                       \n\t"
702#endif
703    "swc1       %[f8],        32(%[tmp_a])                              \n\t"
704    "swc1       %[f3],        36(%[tmp_a])                              \n\t"
705    "swc1       %[f6],        96(%[tmp_a])                              \n\t"
706    "swc1       %[f0],        100(%[tmp_a])                             \n\t"
707    "bgtz       %[count],     1b                                        \n\t"
708    " addiu     %[tmp_a],     %[tmp_a],     8                           \n\t"
709    ".set       pop                                                     \n\t"
710    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
711      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
712      [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
713    : [a] "r" (a),  [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11),
714      [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14)
715    : "memory"
716  );
717  f11 = rdft_w[6];
718  f12 = rdft_w[7];
719  f13 = rdft_wk3ri_second[2];
720  f14 = rdft_wk3ri_second[3];
721  __asm __volatile (
722    ".set       push                                                       \n\t"
723    ".set       noreorder                                                  \n\t"
724    "addiu      %[tmp_a],       %[a],           384                        \n\t"
725    "addiu      %[count],       $zero,          4                          \n\t"
726   "1:                                                                     \n\t"
727    "addiu      %[count],       %[count],       -1                         \n\t"
728    "lwc1       %[f0],          0(%[tmp_a])                                \n\t"
729    "lwc1       %[f1],          4(%[tmp_a])                                \n\t"
730    "lwc1       %[f2],          32(%[tmp_a])                               \n\t"
731    "lwc1       %[f3],          36(%[tmp_a])                               \n\t"
732    "lwc1       %[f4],          64(%[tmp_a])                               \n\t"
733    "lwc1       %[f5],          68(%[tmp_a])                               \n\t"
734    "lwc1       %[f6],          96(%[tmp_a])                               \n\t"
735    "lwc1       %[f7],          100(%[tmp_a])                              \n\t"
736    "add.s      %[f8],          %[f0],          %[f2]                      \n\t"
737    "sub.s      %[f0],          %[f0],          %[f2]                      \n\t"
738    "add.s      %[f2],          %[f4],          %[f6]                      \n\t"
739    "sub.s      %[f4],          %[f4],          %[f6]                      \n\t"
740    "add.s      %[f6],          %[f1],          %[f3]                      \n\t"
741    "sub.s      %[f1],          %[f1],          %[f3]                      \n\t"
742    "add.s      %[f3],          %[f5],          %[f7]                      \n\t"
743    "sub.s      %[f5],          %[f5],          %[f7]                      \n\t"
744    "sub.s      %[f7],          %[f2],          %[f8]                      \n\t"
745    "add.s      %[f2],          %[f2],          %[f8]                      \n\t"
746    "add.s      %[f8],          %[f1],          %[f4]                      \n\t"
747    "sub.s      %[f1],          %[f1],          %[f4]                      \n\t"
748    "sub.s      %[f4],          %[f3],          %[f6]                      \n\t"
749    "add.s      %[f3],          %[f3],          %[f6]                      \n\t"
750    "sub.s      %[f6],          %[f0],          %[f5]                      \n\t"
751    "add.s      %[f0],          %[f0],          %[f5]                      \n\t"
752    "swc1       %[f2],          0(%[tmp_a])                                \n\t"
753    "swc1       %[f3],          4(%[tmp_a])                                \n\t"
754    "mul.s      %[f5],          %[f10],         %[f7]                      \n\t"
755#if defined(MIPS32_R2_LE)
756    "mul.s      %[f7],          %[f9],          %[f7]                      \n\t"
757    "mul.s      %[f2],          %[f12],         %[f8]                      \n\t"
758    "mul.s      %[f8],          %[f11],         %[f8]                      \n\t"
759    "mul.s      %[f3],          %[f14],         %[f1]                      \n\t"
760    "mul.s      %[f1],          %[f13],         %[f1]                      \n\t"
761    "madd.s     %[f5],          %[f5],          %[f9],       %[f4]         \n\t"
762    "msub.s     %[f7],          %[f7],          %[f10],      %[f4]         \n\t"
763    "msub.s     %[f2],          %[f2],          %[f11],      %[f6]         \n\t"
764    "madd.s     %[f8],          %[f8],          %[f12],      %[f6]         \n\t"
765    "msub.s     %[f3],          %[f3],          %[f13],      %[f0]         \n\t"
766    "madd.s     %[f1],          %[f1],          %[f14],      %[f0]         \n\t"
767    "swc1       %[f5],          64(%[tmp_a])                               \n\t"
768    "swc1       %[f7],          68(%[tmp_a])                               \n\t"
769#else
770    "mul.s      %[f2],          %[f9],          %[f4]                      \n\t"
771    "mul.s      %[f4],          %[f10],         %[f4]                      \n\t"
772    "mul.s      %[f7],          %[f9],          %[f7]                      \n\t"
773    "mul.s      %[f3],          %[f11],         %[f6]                      \n\t"
774    "mul.s      %[f6],          %[f12],         %[f6]                      \n\t"
775    "add.s      %[f5],          %[f5],          %[f2]                      \n\t"
776    "sub.s      %[f7],          %[f4],          %[f7]                      \n\t"
777    "mul.s      %[f2],          %[f12],         %[f8]                      \n\t"
778    "mul.s      %[f8],          %[f11],         %[f8]                      \n\t"
779    "mul.s      %[f4],          %[f14],         %[f1]                      \n\t"
780    "mul.s      %[f1],          %[f13],         %[f1]                      \n\t"
781    "sub.s      %[f2],          %[f3],          %[f2]                      \n\t"
782    "mul.s      %[f3],          %[f13],         %[f0]                      \n\t"
783    "mul.s      %[f0],          %[f14],         %[f0]                      \n\t"
784    "add.s      %[f8],          %[f8],          %[f6]                      \n\t"
785    "swc1       %[f5],          64(%[tmp_a])                               \n\t"
786    "swc1       %[f7],          68(%[tmp_a])                               \n\t"
787    "sub.s      %[f3],          %[f3],          %[f4]                      \n\t"
788    "add.s      %[f1],          %[f1],          %[f0]                      \n\t"
789#endif
790    "swc1       %[f2],          32(%[tmp_a])                               \n\t"
791    "swc1       %[f8],          36(%[tmp_a])                               \n\t"
792    "swc1       %[f3],          96(%[tmp_a])                               \n\t"
793    "swc1       %[f1],          100(%[tmp_a])                              \n\t"
794    "bgtz       %[count],       1b                                         \n\t"
795    " addiu     %[tmp_a],       %[tmp_a],       8                          \n\t"
796    ".set       pop                                                        \n\t"
797    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
798      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
799      [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
800    : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11),
801      [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14)
802    : "memory"
803  );
804}
805
806static void cftfsub_128_mips(float* a) {
807  float f0, f1, f2, f3, f4, f5, f6, f7, f8;
808  int tmp_a, count;
809
810  cft1st_128(a);
811  cftmdl_128(a);
812
813  __asm __volatile (
814    ".set       push                                      \n\t"
815    ".set       noreorder                                 \n\t"
816    "addiu      %[tmp_a],       %[a],         0           \n\t"
817    "addiu      %[count],       $zero,        16          \n\t"
818   "1:                                                    \n\t"
819    "addiu      %[count],       %[count],     -1          \n\t"
820    "lwc1       %[f0],          0(%[tmp_a])               \n\t"
821    "lwc1       %[f2],          128(%[tmp_a])             \n\t"
822    "lwc1       %[f4],          256(%[tmp_a])             \n\t"
823    "lwc1       %[f6],          384(%[tmp_a])             \n\t"
824    "lwc1       %[f1],          4(%[tmp_a])               \n\t"
825    "lwc1       %[f3],          132(%[tmp_a])             \n\t"
826    "lwc1       %[f5],          260(%[tmp_a])             \n\t"
827    "lwc1       %[f7],          388(%[tmp_a])             \n\t"
828    "add.s      %[f8],          %[f0],        %[f2]       \n\t"
829    "sub.s      %[f0],          %[f0],        %[f2]       \n\t"
830    "add.s      %[f2],          %[f4],        %[f6]       \n\t"
831    "sub.s      %[f4],          %[f4],        %[f6]       \n\t"
832    "add.s      %[f6],          %[f1],        %[f3]       \n\t"
833    "sub.s      %[f1],          %[f1],        %[f3]       \n\t"
834    "add.s      %[f3],          %[f5],        %[f7]       \n\t"
835    "sub.s      %[f5],          %[f5],        %[f7]       \n\t"
836    "add.s      %[f7],          %[f8],        %[f2]       \n\t"
837    "sub.s      %[f8],          %[f8],        %[f2]       \n\t"
838    "add.s      %[f2],          %[f1],        %[f4]       \n\t"
839    "sub.s      %[f1],          %[f1],        %[f4]       \n\t"
840    "add.s      %[f4],          %[f6],        %[f3]       \n\t"
841    "sub.s      %[f6],          %[f6],        %[f3]       \n\t"
842    "sub.s      %[f3],          %[f0],        %[f5]       \n\t"
843    "add.s      %[f0],          %[f0],        %[f5]       \n\t"
844    "swc1       %[f7],          0(%[tmp_a])               \n\t"
845    "swc1       %[f8],          256(%[tmp_a])             \n\t"
846    "swc1       %[f2],          132(%[tmp_a])             \n\t"
847    "swc1       %[f1],          388(%[tmp_a])             \n\t"
848    "swc1       %[f4],          4(%[tmp_a])               \n\t"
849    "swc1       %[f6],          260(%[tmp_a])             \n\t"
850    "swc1       %[f3],          128(%[tmp_a])             \n\t"
851    "swc1       %[f0],          384(%[tmp_a])             \n\t"
852    "bgtz       %[count],       1b                        \n\t"
853    " addiu     %[tmp_a],       %[tmp_a],   8             \n\t"
854    ".set       pop                                       \n\t"
855    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
856      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
857      [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a),
858      [count] "=&r" (count)
859    : [a] "r" (a)
860    : "memory"
861  );
862}
863
864static void cftbsub_128_mips(float* a) {
865  float f0, f1, f2, f3, f4, f5, f6, f7, f8;
866  int tmp_a, count;
867
868  cft1st_128(a);
869  cftmdl_128(a);
870
871  __asm __volatile (
872    ".set       push                                        \n\t"
873    ".set       noreorder                                   \n\t"
874    "addiu      %[tmp_a],   %[a],           0               \n\t"
875    "addiu      %[count],   $zero,          16              \n\t"
876   "1:                                                      \n\t"
877    "addiu      %[count],   %[count],       -1              \n\t"
878    "lwc1       %[f0],      0(%[tmp_a])                     \n\t"
879    "lwc1       %[f2],      128(%[tmp_a])                   \n\t"
880    "lwc1       %[f4],      256(%[tmp_a])                   \n\t"
881    "lwc1       %[f6],      384(%[tmp_a])                   \n\t"
882    "lwc1       %[f1],      4(%[tmp_a])                     \n\t"
883    "lwc1       %[f3],      132(%[tmp_a])                   \n\t"
884    "lwc1       %[f5],      260(%[tmp_a])                   \n\t"
885    "lwc1       %[f7],      388(%[tmp_a])                   \n\t"
886    "add.s      %[f8],      %[f0],          %[f2]           \n\t"
887    "sub.s      %[f0],      %[f0],          %[f2]           \n\t"
888    "add.s      %[f2],      %[f4],          %[f6]           \n\t"
889    "sub.s      %[f4],      %[f4],          %[f6]           \n\t"
890    "add.s      %[f6],      %[f1],          %[f3]           \n\t"
891    "sub.s      %[f1],      %[f3],          %[f1]           \n\t"
892    "add.s      %[f3],      %[f5],          %[f7]           \n\t"
893    "sub.s      %[f5],      %[f5],          %[f7]           \n\t"
894    "add.s      %[f7],      %[f8],          %[f2]           \n\t"
895    "sub.s      %[f8],      %[f8],          %[f2]           \n\t"
896    "sub.s      %[f2],      %[f1],          %[f4]           \n\t"
897    "add.s      %[f1],      %[f1],          %[f4]           \n\t"
898    "add.s      %[f4],      %[f3],          %[f6]           \n\t"
899    "sub.s      %[f6],      %[f3],          %[f6]           \n\t"
900    "sub.s      %[f3],      %[f0],          %[f5]           \n\t"
901    "add.s      %[f0],      %[f0],          %[f5]           \n\t"
902    "neg.s      %[f4],      %[f4]                           \n\t"
903    "swc1       %[f7],      0(%[tmp_a])                     \n\t"
904    "swc1       %[f8],      256(%[tmp_a])                   \n\t"
905    "swc1       %[f2],      132(%[tmp_a])                   \n\t"
906    "swc1       %[f1],      388(%[tmp_a])                   \n\t"
907    "swc1       %[f6],      260(%[tmp_a])                   \n\t"
908    "swc1       %[f3],      128(%[tmp_a])                   \n\t"
909    "swc1       %[f0],      384(%[tmp_a])                   \n\t"
910    "swc1       %[f4],       4(%[tmp_a])                     \n\t"
911    "bgtz       %[count],   1b                              \n\t"
912    " addiu     %[tmp_a],   %[tmp_a],       8               \n\t"
913    ".set       pop                                         \n\t"
914    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
915      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
916      [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
917    : [a] "r" (a)
918    : "memory"
919  );
920}
921
922static void rftfsub_128_mips(float* a) {
923  const float* c = rdft_w + 32;
924  const float f0 = 0.5f;
925  float* a1 = &a[2];
926  float* a2 = &a[126];
927  const float* c1 = &c[1];
928  const float* c2 = &c[31];
929  float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15;
930  int count;
931
932  __asm __volatile (
933    ".set      push                                             \n\t"
934    ".set      noreorder                                        \n\t"
935    "lwc1      %[f6],       0(%[c2])                            \n\t"
936    "lwc1      %[f1],       0(%[a1])                            \n\t"
937    "lwc1      %[f2],       0(%[a2])                            \n\t"
938    "lwc1      %[f3],       4(%[a1])                            \n\t"
939    "lwc1      %[f4],       4(%[a2])                            \n\t"
940    "lwc1      %[f5],       0(%[c1])                            \n\t"
941    "sub.s     %[f6],       %[f0],        %[f6]                 \n\t"
942    "sub.s     %[f7],       %[f1],        %[f2]                 \n\t"
943    "add.s     %[f8],       %[f3],        %[f4]                 \n\t"
944    "addiu     %[count],    $zero,        15                    \n\t"
945    "mul.s     %[f9],       %[f6],        %[f7]                 \n\t"
946    "mul.s     %[f6],       %[f6],        %[f8]                 \n\t"
947#if !defined(MIPS32_R2_LE)
948    "mul.s     %[f8],       %[f5],        %[f8]                 \n\t"
949    "mul.s     %[f5],       %[f5],        %[f7]                 \n\t"
950    "sub.s     %[f9],       %[f9],        %[f8]                 \n\t"
951    "add.s     %[f6],       %[f6],        %[f5]                 \n\t"
952#else
953    "nmsub.s   %[f9],       %[f9],        %[f5],      %[f8]     \n\t"
954    "madd.s    %[f6],       %[f6],        %[f5],      %[f7]     \n\t"
955#endif
956    "sub.s     %[f1],       %[f1],        %[f9]                 \n\t"
957    "add.s     %[f2],       %[f2],        %[f9]                 \n\t"
958    "sub.s     %[f3],       %[f3],        %[f6]                 \n\t"
959    "sub.s     %[f4],       %[f4],        %[f6]                 \n\t"
960    "swc1      %[f1],       0(%[a1])                            \n\t"
961    "swc1      %[f2],       0(%[a2])                            \n\t"
962    "swc1      %[f3],       4(%[a1])                            \n\t"
963    "swc1      %[f4],       4(%[a2])                            \n\t"
964    "addiu     %[a1],       %[a1],        8                     \n\t"
965    "addiu     %[a2],       %[a2],        -8                    \n\t"
966    "addiu     %[c1],       %[c1],        4                     \n\t"
967    "addiu     %[c2],       %[c2],        -4                    \n\t"
968   "1:                                                          \n\t"
969    "lwc1      %[f6],       0(%[c2])                            \n\t"
970    "lwc1      %[f1],       0(%[a1])                            \n\t"
971    "lwc1      %[f2],       0(%[a2])                            \n\t"
972    "lwc1      %[f3],       4(%[a1])                            \n\t"
973    "lwc1      %[f4],       4(%[a2])                            \n\t"
974    "lwc1      %[f5],       0(%[c1])                            \n\t"
975    "sub.s     %[f6],       %[f0],        %[f6]                 \n\t"
976    "sub.s     %[f7],       %[f1],        %[f2]                 \n\t"
977    "add.s     %[f8],       %[f3],        %[f4]                 \n\t"
978    "lwc1      %[f10],      -4(%[c2])                           \n\t"
979    "lwc1      %[f11],      8(%[a1])                            \n\t"
980    "lwc1      %[f12],      -8(%[a2])                           \n\t"
981    "mul.s     %[f9],       %[f6],        %[f7]                 \n\t"
982    "mul.s     %[f6],       %[f6],        %[f8]                 \n\t"
983#if !defined(MIPS32_R2_LE)
984    "mul.s     %[f8],       %[f5],        %[f8]                 \n\t"
985    "mul.s     %[f5],       %[f5],        %[f7]                 \n\t"
986    "lwc1      %[f13],      12(%[a1])                           \n\t"
987    "lwc1      %[f14],      -4(%[a2])                           \n\t"
988    "lwc1      %[f15],      4(%[c1])                            \n\t"
989    "sub.s     %[f9],       %[f9],        %[f8]                 \n\t"
990    "add.s     %[f6],       %[f6],        %[f5]                 \n\t"
991#else
992    "lwc1      %[f13],      12(%[a1])                           \n\t"
993    "lwc1      %[f14],      -4(%[a2])                           \n\t"
994    "lwc1      %[f15],      4(%[c1])                            \n\t"
995    "nmsub.s   %[f9],       %[f9],        %[f5],      %[f8]     \n\t"
996    "madd.s    %[f6],       %[f6],        %[f5],      %[f7]     \n\t"
997#endif
998    "sub.s     %[f10],      %[f0],        %[f10]                \n\t"
999    "sub.s     %[f5],       %[f11],       %[f12]                \n\t"
1000    "add.s     %[f7],       %[f13],       %[f14]                \n\t"
1001    "sub.s     %[f1],       %[f1],        %[f9]                 \n\t"
1002    "add.s     %[f2],       %[f2],        %[f9]                 \n\t"
1003    "sub.s     %[f3],       %[f3],        %[f6]                 \n\t"
1004    "mul.s     %[f8],       %[f10],       %[f5]                 \n\t"
1005    "mul.s     %[f10],      %[f10],       %[f7]                 \n\t"
1006#if !defined(MIPS32_R2_LE)
1007    "mul.s     %[f9],       %[f15],       %[f7]                 \n\t"
1008    "mul.s     %[f15],      %[f15],       %[f5]                 \n\t"
1009    "sub.s     %[f4],       %[f4],        %[f6]                 \n\t"
1010    "swc1      %[f1],       0(%[a1])                            \n\t"
1011    "swc1      %[f2],       0(%[a2])                            \n\t"
1012    "sub.s     %[f8],       %[f8],        %[f9]                 \n\t"
1013    "add.s     %[f10],      %[f10],       %[f15]                \n\t"
1014#else
1015    "swc1      %[f1],       0(%[a1])                            \n\t"
1016    "swc1      %[f2],       0(%[a2])                            \n\t"
1017    "sub.s     %[f4],       %[f4],        %[f6]                 \n\t"
1018    "nmsub.s   %[f8],       %[f8],        %[f15],     %[f7]     \n\t"
1019    "madd.s    %[f10],      %[f10],       %[f15],     %[f5]     \n\t"
1020#endif
1021    "swc1      %[f3],       4(%[a1])                            \n\t"
1022    "swc1      %[f4],       4(%[a2])                            \n\t"
1023    "sub.s     %[f11],      %[f11],       %[f8]                 \n\t"
1024    "add.s     %[f12],      %[f12],       %[f8]                 \n\t"
1025    "sub.s     %[f13],      %[f13],       %[f10]                \n\t"
1026    "sub.s     %[f14],      %[f14],       %[f10]                \n\t"
1027    "addiu     %[c2],       %[c2],        -8                    \n\t"
1028    "addiu     %[c1],       %[c1],        8                     \n\t"
1029    "swc1      %[f11],      8(%[a1])                            \n\t"
1030    "swc1      %[f12],      -8(%[a2])                           \n\t"
1031    "swc1      %[f13],      12(%[a1])                           \n\t"
1032    "swc1      %[f14],      -4(%[a2])                           \n\t"
1033    "addiu     %[a1],       %[a1],        16                    \n\t"
1034    "addiu     %[count],    %[count],     -1                    \n\t"
1035    "bgtz      %[count],    1b                                  \n\t"
1036    " addiu    %[a2],       %[a2],        -16                   \n\t"
1037    ".set      pop                                              \n\t"
1038    : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2),
1039      [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4),
1040      [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
1041      [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12),
1042      [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15),
1043      [count] "=&r" (count)
1044    : [f0] "f" (f0)
1045    : "memory"
1046  );
1047}
1048
1049static void rftbsub_128_mips(float* a) {
1050  const float *c = rdft_w + 32;
1051  const float f0 = 0.5f;
1052  float* a1 = &a[2];
1053  float* a2 = &a[126];
1054  const float* c1 = &c[1];
1055  const float* c2 = &c[31];
1056  float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15;
1057  int count;
1058
1059  a[1] = -a[1];
1060  a[65] = -a[65];
1061
1062  __asm __volatile (
1063    ".set      push                                             \n\t"
1064    ".set      noreorder                                        \n\t"
1065    "lwc1      %[f6],       0(%[c2])                            \n\t"
1066    "lwc1      %[f1],       0(%[a1])                            \n\t"
1067    "lwc1      %[f2],       0(%[a2])                            \n\t"
1068    "lwc1      %[f3],       4(%[a1])                            \n\t"
1069    "lwc1      %[f4],       4(%[a2])                            \n\t"
1070    "lwc1      %[f5],       0(%[c1])                            \n\t"
1071    "sub.s     %[f6],       %[f0],        %[f6]                 \n\t"
1072    "sub.s     %[f7],       %[f1],        %[f2]                 \n\t"
1073    "add.s     %[f8],       %[f3],        %[f4]                 \n\t"
1074    "addiu     %[count],    $zero,        15                    \n\t"
1075    "mul.s     %[f9],       %[f6],        %[f7]                 \n\t"
1076    "mul.s     %[f6],       %[f6],        %[f8]                 \n\t"
1077#if !defined(MIPS32_R2_LE)
1078    "mul.s     %[f8],       %[f5],        %[f8]                 \n\t"
1079    "mul.s     %[f5],       %[f5],        %[f7]                 \n\t"
1080    "add.s     %[f9],       %[f9],        %[f8]                 \n\t"
1081    "sub.s     %[f6],       %[f6],        %[f5]                 \n\t"
1082#else
1083    "madd.s    %[f9],       %[f9],        %[f5],      %[f8]     \n\t"
1084    "nmsub.s   %[f6],       %[f6],        %[f5],      %[f7]     \n\t"
1085#endif
1086    "sub.s     %[f1],       %[f1],        %[f9]                 \n\t"
1087    "add.s     %[f2],       %[f2],        %[f9]                 \n\t"
1088    "sub.s     %[f3],       %[f6],        %[f3]                 \n\t"
1089    "sub.s     %[f4],       %[f6],        %[f4]                 \n\t"
1090    "swc1      %[f1],       0(%[a1])                            \n\t"
1091    "swc1      %[f2],       0(%[a2])                            \n\t"
1092    "swc1      %[f3],       4(%[a1])                            \n\t"
1093    "swc1      %[f4],       4(%[a2])                            \n\t"
1094    "addiu     %[a1],       %[a1],        8                     \n\t"
1095    "addiu     %[a2],       %[a2],        -8                    \n\t"
1096    "addiu     %[c1],       %[c1],        4                     \n\t"
1097    "addiu     %[c2],       %[c2],        -4                    \n\t"
1098   "1:                                                          \n\t"
1099    "lwc1      %[f6],       0(%[c2])                            \n\t"
1100    "lwc1      %[f1],       0(%[a1])                            \n\t"
1101    "lwc1      %[f2],       0(%[a2])                            \n\t"
1102    "lwc1      %[f3],       4(%[a1])                            \n\t"
1103    "lwc1      %[f4],       4(%[a2])                            \n\t"
1104    "lwc1      %[f5],       0(%[c1])                            \n\t"
1105    "sub.s     %[f6],       %[f0],        %[f6]                 \n\t"
1106    "sub.s     %[f7],       %[f1],        %[f2]                 \n\t"
1107    "add.s     %[f8],       %[f3],        %[f4]                 \n\t"
1108    "lwc1      %[f10],      -4(%[c2])                           \n\t"
1109    "lwc1      %[f11],      8(%[a1])                            \n\t"
1110    "lwc1      %[f12],      -8(%[a2])                           \n\t"
1111    "mul.s     %[f9],       %[f6],        %[f7]                 \n\t"
1112    "mul.s     %[f6],       %[f6],        %[f8]                 \n\t"
1113#if !defined(MIPS32_R2_LE)
1114    "mul.s     %[f8],       %[f5],        %[f8]                 \n\t"
1115    "mul.s     %[f5],       %[f5],        %[f7]                 \n\t"
1116    "lwc1      %[f13],      12(%[a1])                           \n\t"
1117    "lwc1      %[f14],      -4(%[a2])                           \n\t"
1118    "lwc1      %[f15],      4(%[c1])                            \n\t"
1119    "add.s     %[f9],       %[f9],        %[f8]                 \n\t"
1120    "sub.s     %[f6],       %[f6],        %[f5]                 \n\t"
1121#else
1122    "lwc1      %[f13],      12(%[a1])                           \n\t"
1123    "lwc1      %[f14],      -4(%[a2])                           \n\t"
1124    "lwc1      %[f15],      4(%[c1])                            \n\t"
1125    "madd.s    %[f9],       %[f9],        %[f5],      %[f8]     \n\t"
1126    "nmsub.s   %[f6],       %[f6],        %[f5],      %[f7]     \n\t"
1127#endif
1128    "sub.s     %[f10],      %[f0],        %[f10]                \n\t"
1129    "sub.s     %[f5],       %[f11],       %[f12]                \n\t"
1130    "add.s     %[f7],       %[f13],       %[f14]                \n\t"
1131    "sub.s     %[f1],       %[f1],        %[f9]                 \n\t"
1132    "add.s     %[f2],       %[f2],        %[f9]                 \n\t"
1133    "sub.s     %[f3],       %[f6],        %[f3]                 \n\t"
1134    "mul.s     %[f8],       %[f10],       %[f5]                 \n\t"
1135    "mul.s     %[f10],      %[f10],       %[f7]                 \n\t"
1136#if !defined(MIPS32_R2_LE)
1137    "mul.s     %[f9],       %[f15],       %[f7]                 \n\t"
1138    "mul.s     %[f15],      %[f15],       %[f5]                 \n\t"
1139    "sub.s     %[f4],       %[f6],        %[f4]                 \n\t"
1140    "swc1      %[f1],       0(%[a1])                            \n\t"
1141    "swc1      %[f2],       0(%[a2])                            \n\t"
1142    "add.s     %[f8],       %[f8],        %[f9]                 \n\t"
1143    "sub.s     %[f10],      %[f10],       %[f15]                \n\t"
1144#else
1145    "swc1      %[f1],       0(%[a1])                            \n\t"
1146    "swc1      %[f2],       0(%[a2])                            \n\t"
1147    "sub.s     %[f4],       %[f6],        %[f4]                 \n\t"
1148    "madd.s    %[f8],       %[f8],        %[f15],     %[f7]     \n\t"
1149    "nmsub.s   %[f10],      %[f10],       %[f15],     %[f5]     \n\t"
1150#endif
1151    "swc1      %[f3],       4(%[a1])                            \n\t"
1152    "swc1      %[f4],       4(%[a2])                            \n\t"
1153    "sub.s     %[f11],      %[f11],       %[f8]                 \n\t"
1154    "add.s     %[f12],      %[f12],       %[f8]                 \n\t"
1155    "sub.s     %[f13],      %[f10],       %[f13]                \n\t"
1156    "sub.s     %[f14],      %[f10],       %[f14]                \n\t"
1157    "addiu     %[c2],       %[c2],        -8                    \n\t"
1158    "addiu     %[c1],       %[c1],        8                     \n\t"
1159    "swc1      %[f11],      8(%[a1])                            \n\t"
1160    "swc1      %[f12],      -8(%[a2])                           \n\t"
1161    "swc1      %[f13],      12(%[a1])                           \n\t"
1162    "swc1      %[f14],      -4(%[a2])                           \n\t"
1163    "addiu     %[a1],       %[a1],        16                    \n\t"
1164    "addiu     %[count],    %[count],     -1                    \n\t"
1165    "bgtz      %[count],    1b                                  \n\t"
1166    " addiu    %[a2],       %[a2],        -16                   \n\t"
1167    ".set      pop                                              \n\t"
1168    : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2),
1169      [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4),
1170      [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
1171      [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12),
1172      [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15),
1173      [count] "=&r" (count)
1174    : [f0] "f" (f0)
1175    : "memory"
1176  );
1177}
1178
1179void aec_rdft_init_mips(void) {
1180  cft1st_128 = cft1st_128_mips;
1181  cftmdl_128 = cftmdl_128_mips;
1182  rftfsub_128 = rftfsub_128_mips;
1183  rftbsub_128 = rftbsub_128_mips;
1184  cftfsub_128 = cftfsub_128_mips;
1185  cftbsub_128 = cftbsub_128_mips;
1186  bitrv2_128 = bitrv2_128_mips;
1187}
1188