smallft.c revision 98913fed6520d8849fb2e246be943e04474aefa4
1/********************************************************************
2 *                                                                  *
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7 *                                                                  *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001             *
9 * by the XIPHOPHORUS Company http://www.xiph.org/                  *
10 *                                                                  *
11 ********************************************************************
12
13 function: *unnormalized* fft transform
14 last mod: $Id: smallft.c,v 1.19 2003/10/08 05:12:37 jm Exp $
15
16 ********************************************************************/
17
18/* FFT implementation from OggSquish, minus cosine transforms,
19 * minus all but radix 2/4 case.  In Vorbis we only need this
20 * cut-down version.
21 *
22 * To do more than just power-of-two sized vectors, see the full
23 * version I wrote for NetLib.
24 *
25 * Note that the packing is a little strange; rather than the FFT r/i
26 * packing following R_0, I_n, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1,
27 * it follows R_0, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1, I_n like the
28 * FORTRAN version
29 */
30
31#ifdef HAVE_CONFIG_H
32#include "config.h"
33#endif
34
35#include <math.h>
36#include "smallft.h"
37#include "arch.h"
38#include "os_support.h"
39
40static void drfti1(int n, float *wa, int *ifac){
41  static int ntryh[4] = { 4,2,3,5 };
42  static float tpi = 6.28318530717958648f;
43  float arg,argh,argld,fi;
44  int ntry=0,i,j=-1;
45  int k1, l1, l2, ib;
46  int ld, ii, ip, is, nq, nr;
47  int ido, ipm, nfm1;
48  int nl=n;
49  int nf=0;
50
51 L101:
52  j++;
53  if (j < 4)
54    ntry=ntryh[j];
55  else
56    ntry+=2;
57
58 L104:
59  nq=nl/ntry;
60  nr=nl-ntry*nq;
61  if (nr!=0) goto L101;
62
63  nf++;
64  ifac[nf+1]=ntry;
65  nl=nq;
66  if(ntry!=2)goto L107;
67  if(nf==1)goto L107;
68
69  for (i=1;i<nf;i++){
70    ib=nf-i+1;
71    ifac[ib+1]=ifac[ib];
72  }
73  ifac[2] = 2;
74
75 L107:
76  if(nl!=1)goto L104;
77  ifac[0]=n;
78  ifac[1]=nf;
79  argh=tpi/n;
80  is=0;
81  nfm1=nf-1;
82  l1=1;
83
84  if(nfm1==0)return;
85
86  for (k1=0;k1<nfm1;k1++){
87    ip=ifac[k1+2];
88    ld=0;
89    l2=l1*ip;
90    ido=n/l2;
91    ipm=ip-1;
92
93    for (j=0;j<ipm;j++){
94      ld+=l1;
95      i=is;
96      argld=(float)ld*argh;
97      fi=0.f;
98      for (ii=2;ii<ido;ii+=2){
99	fi+=1.f;
100	arg=fi*argld;
101	wa[i++]=cos(arg);
102	wa[i++]=sin(arg);
103      }
104      is+=ido;
105    }
106    l1=l2;
107  }
108}
109
110static void fdrffti(int n, float *wsave, int *ifac){
111
112  if (n == 1) return;
113  drfti1(n, wsave+n, ifac);
114}
115
116static void dradf2(int ido,int l1,float *cc,float *ch,float *wa1){
117  int i,k;
118  float ti2,tr2;
119  int t0,t1,t2,t3,t4,t5,t6;
120
121  t1=0;
122  t0=(t2=l1*ido);
123  t3=ido<<1;
124  for(k=0;k<l1;k++){
125    ch[t1<<1]=cc[t1]+cc[t2];
126    ch[(t1<<1)+t3-1]=cc[t1]-cc[t2];
127    t1+=ido;
128    t2+=ido;
129  }
130
131  if(ido<2)return;
132  if(ido==2)goto L105;
133
134  t1=0;
135  t2=t0;
136  for(k=0;k<l1;k++){
137    t3=t2;
138    t4=(t1<<1)+(ido<<1);
139    t5=t1;
140    t6=t1+t1;
141    for(i=2;i<ido;i+=2){
142      t3+=2;
143      t4-=2;
144      t5+=2;
145      t6+=2;
146      tr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
147      ti2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
148      ch[t6]=cc[t5]+ti2;
149      ch[t4]=ti2-cc[t5];
150      ch[t6-1]=cc[t5-1]+tr2;
151      ch[t4-1]=cc[t5-1]-tr2;
152    }
153    t1+=ido;
154    t2+=ido;
155  }
156
157  if(ido%2==1)return;
158
159 L105:
160  t3=(t2=(t1=ido)-1);
161  t2+=t0;
162  for(k=0;k<l1;k++){
163    ch[t1]=-cc[t2];
164    ch[t1-1]=cc[t3];
165    t1+=ido<<1;
166    t2+=ido;
167    t3+=ido;
168  }
169}
170
171static void dradf4(int ido,int l1,float *cc,float *ch,float *wa1,
172	    float *wa2,float *wa3){
173  static float hsqt2 = .70710678118654752f;
174  int i,k,t0,t1,t2,t3,t4,t5,t6;
175  float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
176  t0=l1*ido;
177
178  t1=t0;
179  t4=t1<<1;
180  t2=t1+(t1<<1);
181  t3=0;
182
183  for(k=0;k<l1;k++){
184    tr1=cc[t1]+cc[t2];
185    tr2=cc[t3]+cc[t4];
186
187    ch[t5=t3<<2]=tr1+tr2;
188    ch[(ido<<2)+t5-1]=tr2-tr1;
189    ch[(t5+=(ido<<1))-1]=cc[t3]-cc[t4];
190    ch[t5]=cc[t2]-cc[t1];
191
192    t1+=ido;
193    t2+=ido;
194    t3+=ido;
195    t4+=ido;
196  }
197
198  if(ido<2)return;
199  if(ido==2)goto L105;
200
201
202  t1=0;
203  for(k=0;k<l1;k++){
204    t2=t1;
205    t4=t1<<2;
206    t5=(t6=ido<<1)+t4;
207    for(i=2;i<ido;i+=2){
208      t3=(t2+=2);
209      t4+=2;
210      t5-=2;
211
212      t3+=t0;
213      cr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
214      ci2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
215      t3+=t0;
216      cr3=wa2[i-2]*cc[t3-1]+wa2[i-1]*cc[t3];
217      ci3=wa2[i-2]*cc[t3]-wa2[i-1]*cc[t3-1];
218      t3+=t0;
219      cr4=wa3[i-2]*cc[t3-1]+wa3[i-1]*cc[t3];
220      ci4=wa3[i-2]*cc[t3]-wa3[i-1]*cc[t3-1];
221
222      tr1=cr2+cr4;
223      tr4=cr4-cr2;
224      ti1=ci2+ci4;
225      ti4=ci2-ci4;
226
227      ti2=cc[t2]+ci3;
228      ti3=cc[t2]-ci3;
229      tr2=cc[t2-1]+cr3;
230      tr3=cc[t2-1]-cr3;
231
232      ch[t4-1]=tr1+tr2;
233      ch[t4]=ti1+ti2;
234
235      ch[t5-1]=tr3-ti4;
236      ch[t5]=tr4-ti3;
237
238      ch[t4+t6-1]=ti4+tr3;
239      ch[t4+t6]=tr4+ti3;
240
241      ch[t5+t6-1]=tr2-tr1;
242      ch[t5+t6]=ti1-ti2;
243    }
244    t1+=ido;
245  }
246  if(ido&1)return;
247
248 L105:
249
250  t2=(t1=t0+ido-1)+(t0<<1);
251  t3=ido<<2;
252  t4=ido;
253  t5=ido<<1;
254  t6=ido;
255
256  for(k=0;k<l1;k++){
257    ti1=-hsqt2*(cc[t1]+cc[t2]);
258    tr1=hsqt2*(cc[t1]-cc[t2]);
259
260    ch[t4-1]=tr1+cc[t6-1];
261    ch[t4+t5-1]=cc[t6-1]-tr1;
262
263    ch[t4]=ti1-cc[t1+t0];
264    ch[t4+t5]=ti1+cc[t1+t0];
265
266    t1+=ido;
267    t2+=ido;
268    t4+=t3;
269    t6+=ido;
270  }
271}
272
273static void dradfg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
274                          float *c2,float *ch,float *ch2,float *wa){
275
276  static float tpi=6.283185307179586f;
277  int idij,ipph,i,j,k,l,ic,ik,is;
278  int t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
279  float dc2,ai1,ai2,ar1,ar2,ds2;
280  int nbd;
281  float dcp,arg,dsp,ar1h,ar2h;
282  int idp2,ipp2;
283
284  arg=tpi/(float)ip;
285  dcp=cos(arg);
286  dsp=sin(arg);
287  ipph=(ip+1)>>1;
288  ipp2=ip;
289  idp2=ido;
290  nbd=(ido-1)>>1;
291  t0=l1*ido;
292  t10=ip*ido;
293
294  if(ido==1)goto L119;
295  for(ik=0;ik<idl1;ik++)ch2[ik]=c2[ik];
296
297  t1=0;
298  for(j=1;j<ip;j++){
299    t1+=t0;
300    t2=t1;
301    for(k=0;k<l1;k++){
302      ch[t2]=c1[t2];
303      t2+=ido;
304    }
305  }
306
307  is=-ido;
308  t1=0;
309  if(nbd>l1){
310    for(j=1;j<ip;j++){
311      t1+=t0;
312      is+=ido;
313      t2= -ido+t1;
314      for(k=0;k<l1;k++){
315        idij=is-1;
316        t2+=ido;
317        t3=t2;
318        for(i=2;i<ido;i+=2){
319          idij+=2;
320          t3+=2;
321          ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
322          ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
323        }
324      }
325    }
326  }else{
327
328    for(j=1;j<ip;j++){
329      is+=ido;
330      idij=is-1;
331      t1+=t0;
332      t2=t1;
333      for(i=2;i<ido;i+=2){
334        idij+=2;
335        t2+=2;
336        t3=t2;
337        for(k=0;k<l1;k++){
338          ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
339          ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
340          t3+=ido;
341        }
342      }
343    }
344  }
345
346  t1=0;
347  t2=ipp2*t0;
348  if(nbd<l1){
349    for(j=1;j<ipph;j++){
350      t1+=t0;
351      t2-=t0;
352      t3=t1;
353      t4=t2;
354      for(i=2;i<ido;i+=2){
355        t3+=2;
356        t4+=2;
357        t5=t3-ido;
358        t6=t4-ido;
359        for(k=0;k<l1;k++){
360          t5+=ido;
361          t6+=ido;
362          c1[t5-1]=ch[t5-1]+ch[t6-1];
363          c1[t6-1]=ch[t5]-ch[t6];
364          c1[t5]=ch[t5]+ch[t6];
365          c1[t6]=ch[t6-1]-ch[t5-1];
366        }
367      }
368    }
369  }else{
370    for(j=1;j<ipph;j++){
371      t1+=t0;
372      t2-=t0;
373      t3=t1;
374      t4=t2;
375      for(k=0;k<l1;k++){
376        t5=t3;
377        t6=t4;
378        for(i=2;i<ido;i+=2){
379          t5+=2;
380          t6+=2;
381          c1[t5-1]=ch[t5-1]+ch[t6-1];
382          c1[t6-1]=ch[t5]-ch[t6];
383          c1[t5]=ch[t5]+ch[t6];
384          c1[t6]=ch[t6-1]-ch[t5-1];
385        }
386        t3+=ido;
387        t4+=ido;
388      }
389    }
390  }
391
392L119:
393  for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
394
395  t1=0;
396  t2=ipp2*idl1;
397  for(j=1;j<ipph;j++){
398    t1+=t0;
399    t2-=t0;
400    t3=t1-ido;
401    t4=t2-ido;
402    for(k=0;k<l1;k++){
403      t3+=ido;
404      t4+=ido;
405      c1[t3]=ch[t3]+ch[t4];
406      c1[t4]=ch[t4]-ch[t3];
407    }
408  }
409
410  ar1=1.f;
411  ai1=0.f;
412  t1=0;
413  t2=ipp2*idl1;
414  t3=(ip-1)*idl1;
415  for(l=1;l<ipph;l++){
416    t1+=idl1;
417    t2-=idl1;
418    ar1h=dcp*ar1-dsp*ai1;
419    ai1=dcp*ai1+dsp*ar1;
420    ar1=ar1h;
421    t4=t1;
422    t5=t2;
423    t6=t3;
424    t7=idl1;
425
426    for(ik=0;ik<idl1;ik++){
427      ch2[t4++]=c2[ik]+ar1*c2[t7++];
428      ch2[t5++]=ai1*c2[t6++];
429    }
430
431    dc2=ar1;
432    ds2=ai1;
433    ar2=ar1;
434    ai2=ai1;
435
436    t4=idl1;
437    t5=(ipp2-1)*idl1;
438    for(j=2;j<ipph;j++){
439      t4+=idl1;
440      t5-=idl1;
441
442      ar2h=dc2*ar2-ds2*ai2;
443      ai2=dc2*ai2+ds2*ar2;
444      ar2=ar2h;
445
446      t6=t1;
447      t7=t2;
448      t8=t4;
449      t9=t5;
450      for(ik=0;ik<idl1;ik++){
451        ch2[t6++]+=ar2*c2[t8++];
452        ch2[t7++]+=ai2*c2[t9++];
453      }
454    }
455  }
456
457  t1=0;
458  for(j=1;j<ipph;j++){
459    t1+=idl1;
460    t2=t1;
461    for(ik=0;ik<idl1;ik++)ch2[ik]+=c2[t2++];
462  }
463
464  if(ido<l1)goto L132;
465
466  t1=0;
467  t2=0;
468  for(k=0;k<l1;k++){
469    t3=t1;
470    t4=t2;
471    for(i=0;i<ido;i++)cc[t4++]=ch[t3++];
472    t1+=ido;
473    t2+=t10;
474  }
475
476  goto L135;
477
478 L132:
479  for(i=0;i<ido;i++){
480    t1=i;
481    t2=i;
482    for(k=0;k<l1;k++){
483      cc[t2]=ch[t1];
484      t1+=ido;
485      t2+=t10;
486    }
487  }
488
489 L135:
490  t1=0;
491  t2=ido<<1;
492  t3=0;
493  t4=ipp2*t0;
494  for(j=1;j<ipph;j++){
495
496    t1+=t2;
497    t3+=t0;
498    t4-=t0;
499
500    t5=t1;
501    t6=t3;
502    t7=t4;
503
504    for(k=0;k<l1;k++){
505      cc[t5-1]=ch[t6];
506      cc[t5]=ch[t7];
507      t5+=t10;
508      t6+=ido;
509      t7+=ido;
510    }
511  }
512
513  if(ido==1)return;
514  if(nbd<l1)goto L141;
515
516  t1=-ido;
517  t3=0;
518  t4=0;
519  t5=ipp2*t0;
520  for(j=1;j<ipph;j++){
521    t1+=t2;
522    t3+=t2;
523    t4+=t0;
524    t5-=t0;
525    t6=t1;
526    t7=t3;
527    t8=t4;
528    t9=t5;
529    for(k=0;k<l1;k++){
530      for(i=2;i<ido;i+=2){
531        ic=idp2-i;
532        cc[i+t7-1]=ch[i+t8-1]+ch[i+t9-1];
533        cc[ic+t6-1]=ch[i+t8-1]-ch[i+t9-1];
534        cc[i+t7]=ch[i+t8]+ch[i+t9];
535        cc[ic+t6]=ch[i+t9]-ch[i+t8];
536      }
537      t6+=t10;
538      t7+=t10;
539      t8+=ido;
540      t9+=ido;
541    }
542  }
543  return;
544
545 L141:
546
547  t1=-ido;
548  t3=0;
549  t4=0;
550  t5=ipp2*t0;
551  for(j=1;j<ipph;j++){
552    t1+=t2;
553    t3+=t2;
554    t4+=t0;
555    t5-=t0;
556    for(i=2;i<ido;i+=2){
557      t6=idp2+t1-i;
558      t7=i+t3;
559      t8=i+t4;
560      t9=i+t5;
561      for(k=0;k<l1;k++){
562        cc[t7-1]=ch[t8-1]+ch[t9-1];
563        cc[t6-1]=ch[t8-1]-ch[t9-1];
564        cc[t7]=ch[t8]+ch[t9];
565        cc[t6]=ch[t9]-ch[t8];
566        t6+=t10;
567        t7+=t10;
568        t8+=ido;
569        t9+=ido;
570      }
571    }
572  }
573}
574
575static void drftf1(int n,float *c,float *ch,float *wa,int *ifac){
576  int i,k1,l1,l2;
577  int na,kh,nf;
578  int ip,iw,ido,idl1,ix2,ix3;
579
580  nf=ifac[1];
581  na=1;
582  l2=n;
583  iw=n;
584
585  for(k1=0;k1<nf;k1++){
586    kh=nf-k1;
587    ip=ifac[kh+1];
588    l1=l2/ip;
589    ido=n/l2;
590    idl1=ido*l1;
591    iw-=(ip-1)*ido;
592    na=1-na;
593
594    if(ip!=4)goto L102;
595
596    ix2=iw+ido;
597    ix3=ix2+ido;
598    if(na!=0)
599      dradf4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
600    else
601      dradf4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
602    goto L110;
603
604 L102:
605    if(ip!=2)goto L104;
606    if(na!=0)goto L103;
607
608    dradf2(ido,l1,c,ch,wa+iw-1);
609    goto L110;
610
611  L103:
612    dradf2(ido,l1,ch,c,wa+iw-1);
613    goto L110;
614
615  L104:
616    if(ido==1)na=1-na;
617    if(na!=0)goto L109;
618
619    dradfg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
620    na=1;
621    goto L110;
622
623  L109:
624    dradfg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
625    na=0;
626
627  L110:
628    l2=l1;
629  }
630
631  if(na==1)return;
632
633  for(i=0;i<n;i++)c[i]=ch[i];
634}
635
636static void dradb2(int ido,int l1,float *cc,float *ch,float *wa1){
637  int i,k,t0,t1,t2,t3,t4,t5,t6;
638  float ti2,tr2;
639
640  t0=l1*ido;
641
642  t1=0;
643  t2=0;
644  t3=(ido<<1)-1;
645  for(k=0;k<l1;k++){
646    ch[t1]=cc[t2]+cc[t3+t2];
647    ch[t1+t0]=cc[t2]-cc[t3+t2];
648    t2=(t1+=ido)<<1;
649  }
650
651  if(ido<2)return;
652  if(ido==2)goto L105;
653
654  t1=0;
655  t2=0;
656  for(k=0;k<l1;k++){
657    t3=t1;
658    t5=(t4=t2)+(ido<<1);
659    t6=t0+t1;
660    for(i=2;i<ido;i+=2){
661      t3+=2;
662      t4+=2;
663      t5-=2;
664      t6+=2;
665      ch[t3-1]=cc[t4-1]+cc[t5-1];
666      tr2=cc[t4-1]-cc[t5-1];
667      ch[t3]=cc[t4]-cc[t5];
668      ti2=cc[t4]+cc[t5];
669      ch[t6-1]=wa1[i-2]*tr2-wa1[i-1]*ti2;
670      ch[t6]=wa1[i-2]*ti2+wa1[i-1]*tr2;
671    }
672    t2=(t1+=ido)<<1;
673  }
674
675  if(ido%2==1)return;
676
677L105:
678  t1=ido-1;
679  t2=ido-1;
680  for(k=0;k<l1;k++){
681    ch[t1]=cc[t2]+cc[t2];
682    ch[t1+t0]=-(cc[t2+1]+cc[t2+1]);
683    t1+=ido;
684    t2+=ido<<1;
685  }
686}
687
688static void dradb3(int ido,int l1,float *cc,float *ch,float *wa1,
689                          float *wa2){
690  static float taur = -.5f;
691  static float taui = .8660254037844386f;
692  int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
693  float ci2,ci3,di2,di3,cr2,cr3,dr2,dr3,ti2,tr2;
694  t0=l1*ido;
695
696  t1=0;
697  t2=t0<<1;
698  t3=ido<<1;
699  t4=ido+(ido<<1);
700  t5=0;
701  for(k=0;k<l1;k++){
702    tr2=cc[t3-1]+cc[t3-1];
703    cr2=cc[t5]+(taur*tr2);
704    ch[t1]=cc[t5]+tr2;
705    ci3=taui*(cc[t3]+cc[t3]);
706    ch[t1+t0]=cr2-ci3;
707    ch[t1+t2]=cr2+ci3;
708    t1+=ido;
709    t3+=t4;
710    t5+=t4;
711  }
712
713  if(ido==1)return;
714
715  t1=0;
716  t3=ido<<1;
717  for(k=0;k<l1;k++){
718    t7=t1+(t1<<1);
719    t6=(t5=t7+t3);
720    t8=t1;
721    t10=(t9=t1+t0)+t0;
722
723    for(i=2;i<ido;i+=2){
724      t5+=2;
725      t6-=2;
726      t7+=2;
727      t8+=2;
728      t9+=2;
729      t10+=2;
730      tr2=cc[t5-1]+cc[t6-1];
731      cr2=cc[t7-1]+(taur*tr2);
732      ch[t8-1]=cc[t7-1]+tr2;
733      ti2=cc[t5]-cc[t6];
734      ci2=cc[t7]+(taur*ti2);
735      ch[t8]=cc[t7]+ti2;
736      cr3=taui*(cc[t5-1]-cc[t6-1]);
737      ci3=taui*(cc[t5]+cc[t6]);
738      dr2=cr2-ci3;
739      dr3=cr2+ci3;
740      di2=ci2+cr3;
741      di3=ci2-cr3;
742      ch[t9-1]=wa1[i-2]*dr2-wa1[i-1]*di2;
743      ch[t9]=wa1[i-2]*di2+wa1[i-1]*dr2;
744      ch[t10-1]=wa2[i-2]*dr3-wa2[i-1]*di3;
745      ch[t10]=wa2[i-2]*di3+wa2[i-1]*dr3;
746    }
747    t1+=ido;
748  }
749}
750
751static void dradb4(int ido,int l1,float *cc,float *ch,float *wa1,
752			  float *wa2,float *wa3){
753  static float sqrt2=1.414213562373095f;
754  int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8;
755  float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
756  t0=l1*ido;
757
758  t1=0;
759  t2=ido<<2;
760  t3=0;
761  t6=ido<<1;
762  for(k=0;k<l1;k++){
763    t4=t3+t6;
764    t5=t1;
765    tr3=cc[t4-1]+cc[t4-1];
766    tr4=cc[t4]+cc[t4];
767    tr1=cc[t3]-cc[(t4+=t6)-1];
768    tr2=cc[t3]+cc[t4-1];
769    ch[t5]=tr2+tr3;
770    ch[t5+=t0]=tr1-tr4;
771    ch[t5+=t0]=tr2-tr3;
772    ch[t5+=t0]=tr1+tr4;
773    t1+=ido;
774    t3+=t2;
775  }
776
777  if(ido<2)return;
778  if(ido==2)goto L105;
779
780  t1=0;
781  for(k=0;k<l1;k++){
782    t5=(t4=(t3=(t2=t1<<2)+t6))+t6;
783    t7=t1;
784    for(i=2;i<ido;i+=2){
785      t2+=2;
786      t3+=2;
787      t4-=2;
788      t5-=2;
789      t7+=2;
790      ti1=cc[t2]+cc[t5];
791      ti2=cc[t2]-cc[t5];
792      ti3=cc[t3]-cc[t4];
793      tr4=cc[t3]+cc[t4];
794      tr1=cc[t2-1]-cc[t5-1];
795      tr2=cc[t2-1]+cc[t5-1];
796      ti4=cc[t3-1]-cc[t4-1];
797      tr3=cc[t3-1]+cc[t4-1];
798      ch[t7-1]=tr2+tr3;
799      cr3=tr2-tr3;
800      ch[t7]=ti2+ti3;
801      ci3=ti2-ti3;
802      cr2=tr1-tr4;
803      cr4=tr1+tr4;
804      ci2=ti1+ti4;
805      ci4=ti1-ti4;
806
807      ch[(t8=t7+t0)-1]=wa1[i-2]*cr2-wa1[i-1]*ci2;
808      ch[t8]=wa1[i-2]*ci2+wa1[i-1]*cr2;
809      ch[(t8+=t0)-1]=wa2[i-2]*cr3-wa2[i-1]*ci3;
810      ch[t8]=wa2[i-2]*ci3+wa2[i-1]*cr3;
811      ch[(t8+=t0)-1]=wa3[i-2]*cr4-wa3[i-1]*ci4;
812      ch[t8]=wa3[i-2]*ci4+wa3[i-1]*cr4;
813    }
814    t1+=ido;
815  }
816
817  if(ido%2 == 1)return;
818
819 L105:
820
821  t1=ido;
822  t2=ido<<2;
823  t3=ido-1;
824  t4=ido+(ido<<1);
825  for(k=0;k<l1;k++){
826    t5=t3;
827    ti1=cc[t1]+cc[t4];
828    ti2=cc[t4]-cc[t1];
829    tr1=cc[t1-1]-cc[t4-1];
830    tr2=cc[t1-1]+cc[t4-1];
831    ch[t5]=tr2+tr2;
832    ch[t5+=t0]=sqrt2*(tr1-ti1);
833    ch[t5+=t0]=ti2+ti2;
834    ch[t5+=t0]=-sqrt2*(tr1+ti1);
835
836    t3+=ido;
837    t1+=t2;
838    t4+=t2;
839  }
840}
841
842static void dradbg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
843            float *c2,float *ch,float *ch2,float *wa){
844  static float tpi=6.283185307179586f;
845  int idij,ipph,i,j,k,l,ik,is,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,
846      t11,t12;
847  float dc2,ai1,ai2,ar1,ar2,ds2;
848  int nbd;
849  float dcp,arg,dsp,ar1h,ar2h;
850  int ipp2;
851
852  t10=ip*ido;
853  t0=l1*ido;
854  arg=tpi/(float)ip;
855  dcp=cos(arg);
856  dsp=sin(arg);
857  nbd=(ido-1)>>1;
858  ipp2=ip;
859  ipph=(ip+1)>>1;
860  if(ido<l1)goto L103;
861
862  t1=0;
863  t2=0;
864  for(k=0;k<l1;k++){
865    t3=t1;
866    t4=t2;
867    for(i=0;i<ido;i++){
868      ch[t3]=cc[t4];
869      t3++;
870      t4++;
871    }
872    t1+=ido;
873    t2+=t10;
874  }
875  goto L106;
876
877 L103:
878  t1=0;
879  for(i=0;i<ido;i++){
880    t2=t1;
881    t3=t1;
882    for(k=0;k<l1;k++){
883      ch[t2]=cc[t3];
884      t2+=ido;
885      t3+=t10;
886    }
887    t1++;
888  }
889
890 L106:
891  t1=0;
892  t2=ipp2*t0;
893  t7=(t5=ido<<1);
894  for(j=1;j<ipph;j++){
895    t1+=t0;
896    t2-=t0;
897    t3=t1;
898    t4=t2;
899    t6=t5;
900    for(k=0;k<l1;k++){
901      ch[t3]=cc[t6-1]+cc[t6-1];
902      ch[t4]=cc[t6]+cc[t6];
903      t3+=ido;
904      t4+=ido;
905      t6+=t10;
906    }
907    t5+=t7;
908  }
909
910  if (ido == 1)goto L116;
911  if(nbd<l1)goto L112;
912
913  t1=0;
914  t2=ipp2*t0;
915  t7=0;
916  for(j=1;j<ipph;j++){
917    t1+=t0;
918    t2-=t0;
919    t3=t1;
920    t4=t2;
921
922    t7+=(ido<<1);
923    t8=t7;
924    for(k=0;k<l1;k++){
925      t5=t3;
926      t6=t4;
927      t9=t8;
928      t11=t8;
929      for(i=2;i<ido;i+=2){
930        t5+=2;
931        t6+=2;
932        t9+=2;
933        t11-=2;
934        ch[t5-1]=cc[t9-1]+cc[t11-1];
935        ch[t6-1]=cc[t9-1]-cc[t11-1];
936        ch[t5]=cc[t9]-cc[t11];
937        ch[t6]=cc[t9]+cc[t11];
938      }
939      t3+=ido;
940      t4+=ido;
941      t8+=t10;
942    }
943  }
944  goto L116;
945
946 L112:
947  t1=0;
948  t2=ipp2*t0;
949  t7=0;
950  for(j=1;j<ipph;j++){
951    t1+=t0;
952    t2-=t0;
953    t3=t1;
954    t4=t2;
955    t7+=(ido<<1);
956    t8=t7;
957    t9=t7;
958    for(i=2;i<ido;i+=2){
959      t3+=2;
960      t4+=2;
961      t8+=2;
962      t9-=2;
963      t5=t3;
964      t6=t4;
965      t11=t8;
966      t12=t9;
967      for(k=0;k<l1;k++){
968        ch[t5-1]=cc[t11-1]+cc[t12-1];
969        ch[t6-1]=cc[t11-1]-cc[t12-1];
970        ch[t5]=cc[t11]-cc[t12];
971        ch[t6]=cc[t11]+cc[t12];
972        t5+=ido;
973        t6+=ido;
974        t11+=t10;
975        t12+=t10;
976      }
977    }
978  }
979
980L116:
981  ar1=1.f;
982  ai1=0.f;
983  t1=0;
984  t9=(t2=ipp2*idl1);
985  t3=(ip-1)*idl1;
986  for(l=1;l<ipph;l++){
987    t1+=idl1;
988    t2-=idl1;
989
990    ar1h=dcp*ar1-dsp*ai1;
991    ai1=dcp*ai1+dsp*ar1;
992    ar1=ar1h;
993    t4=t1;
994    t5=t2;
995    t6=0;
996    t7=idl1;
997    t8=t3;
998    for(ik=0;ik<idl1;ik++){
999      c2[t4++]=ch2[t6++]+ar1*ch2[t7++];
1000      c2[t5++]=ai1*ch2[t8++];
1001    }
1002    dc2=ar1;
1003    ds2=ai1;
1004    ar2=ar1;
1005    ai2=ai1;
1006
1007    t6=idl1;
1008    t7=t9-idl1;
1009    for(j=2;j<ipph;j++){
1010      t6+=idl1;
1011      t7-=idl1;
1012      ar2h=dc2*ar2-ds2*ai2;
1013      ai2=dc2*ai2+ds2*ar2;
1014      ar2=ar2h;
1015      t4=t1;
1016      t5=t2;
1017      t11=t6;
1018      t12=t7;
1019      for(ik=0;ik<idl1;ik++){
1020        c2[t4++]+=ar2*ch2[t11++];
1021        c2[t5++]+=ai2*ch2[t12++];
1022      }
1023    }
1024  }
1025
1026  t1=0;
1027  for(j=1;j<ipph;j++){
1028    t1+=idl1;
1029    t2=t1;
1030    for(ik=0;ik<idl1;ik++)ch2[ik]+=ch2[t2++];
1031  }
1032
1033  t1=0;
1034  t2=ipp2*t0;
1035  for(j=1;j<ipph;j++){
1036    t1+=t0;
1037    t2-=t0;
1038    t3=t1;
1039    t4=t2;
1040    for(k=0;k<l1;k++){
1041      ch[t3]=c1[t3]-c1[t4];
1042      ch[t4]=c1[t3]+c1[t4];
1043      t3+=ido;
1044      t4+=ido;
1045    }
1046  }
1047
1048  if(ido==1)goto L132;
1049  if(nbd<l1)goto L128;
1050
1051  t1=0;
1052  t2=ipp2*t0;
1053  for(j=1;j<ipph;j++){
1054    t1+=t0;
1055    t2-=t0;
1056    t3=t1;
1057    t4=t2;
1058    for(k=0;k<l1;k++){
1059      t5=t3;
1060      t6=t4;
1061      for(i=2;i<ido;i+=2){
1062        t5+=2;
1063        t6+=2;
1064        ch[t5-1]=c1[t5-1]-c1[t6];
1065        ch[t6-1]=c1[t5-1]+c1[t6];
1066        ch[t5]=c1[t5]+c1[t6-1];
1067        ch[t6]=c1[t5]-c1[t6-1];
1068      }
1069      t3+=ido;
1070      t4+=ido;
1071    }
1072  }
1073  goto L132;
1074
1075 L128:
1076  t1=0;
1077  t2=ipp2*t0;
1078  for(j=1;j<ipph;j++){
1079    t1+=t0;
1080    t2-=t0;
1081    t3=t1;
1082    t4=t2;
1083    for(i=2;i<ido;i+=2){
1084      t3+=2;
1085      t4+=2;
1086      t5=t3;
1087      t6=t4;
1088      for(k=0;k<l1;k++){
1089        ch[t5-1]=c1[t5-1]-c1[t6];
1090        ch[t6-1]=c1[t5-1]+c1[t6];
1091        ch[t5]=c1[t5]+c1[t6-1];
1092        ch[t6]=c1[t5]-c1[t6-1];
1093        t5+=ido;
1094        t6+=ido;
1095      }
1096    }
1097  }
1098
1099L132:
1100  if(ido==1)return;
1101
1102  for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
1103
1104  t1=0;
1105  for(j=1;j<ip;j++){
1106    t2=(t1+=t0);
1107    for(k=0;k<l1;k++){
1108      c1[t2]=ch[t2];
1109      t2+=ido;
1110    }
1111  }
1112
1113  if(nbd>l1)goto L139;
1114
1115  is= -ido-1;
1116  t1=0;
1117  for(j=1;j<ip;j++){
1118    is+=ido;
1119    t1+=t0;
1120    idij=is;
1121    t2=t1;
1122    for(i=2;i<ido;i+=2){
1123      t2+=2;
1124      idij+=2;
1125      t3=t2;
1126      for(k=0;k<l1;k++){
1127        c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1128        c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1129        t3+=ido;
1130      }
1131    }
1132  }
1133  return;
1134
1135 L139:
1136  is= -ido-1;
1137  t1=0;
1138  for(j=1;j<ip;j++){
1139    is+=ido;
1140    t1+=t0;
1141    t2=t1;
1142    for(k=0;k<l1;k++){
1143      idij=is;
1144      t3=t2;
1145      for(i=2;i<ido;i+=2){
1146        idij+=2;
1147        t3+=2;
1148        c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1149        c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1150      }
1151      t2+=ido;
1152    }
1153  }
1154}
1155
1156static void drftb1(int n, float *c, float *ch, float *wa, int *ifac){
1157  int i,k1,l1,l2;
1158  int na;
1159  int nf,ip,iw,ix2,ix3,ido,idl1;
1160
1161  nf=ifac[1];
1162  na=0;
1163  l1=1;
1164  iw=1;
1165
1166  for(k1=0;k1<nf;k1++){
1167    ip=ifac[k1 + 2];
1168    l2=ip*l1;
1169    ido=n/l2;
1170    idl1=ido*l1;
1171    if(ip!=4)goto L103;
1172    ix2=iw+ido;
1173    ix3=ix2+ido;
1174
1175    if(na!=0)
1176      dradb4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
1177    else
1178      dradb4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
1179    na=1-na;
1180    goto L115;
1181
1182  L103:
1183    if(ip!=2)goto L106;
1184
1185    if(na!=0)
1186      dradb2(ido,l1,ch,c,wa+iw-1);
1187    else
1188      dradb2(ido,l1,c,ch,wa+iw-1);
1189    na=1-na;
1190    goto L115;
1191
1192  L106:
1193    if(ip!=3)goto L109;
1194
1195    ix2=iw+ido;
1196    if(na!=0)
1197      dradb3(ido,l1,ch,c,wa+iw-1,wa+ix2-1);
1198    else
1199      dradb3(ido,l1,c,ch,wa+iw-1,wa+ix2-1);
1200    na=1-na;
1201    goto L115;
1202
1203  L109:
1204/*    The radix five case can be translated later..... */
1205/*    if(ip!=5)goto L112;
1206
1207    ix2=iw+ido;
1208    ix3=ix2+ido;
1209    ix4=ix3+ido;
1210    if(na!=0)
1211      dradb5(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1212    else
1213      dradb5(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1214    na=1-na;
1215    goto L115;
1216
1217  L112:*/
1218    if(na!=0)
1219      dradbg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
1220    else
1221      dradbg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
1222    if(ido==1)na=1-na;
1223
1224  L115:
1225    l1=l2;
1226    iw+=(ip-1)*ido;
1227  }
1228
1229  if(na==0)return;
1230
1231  for(i=0;i<n;i++)c[i]=ch[i];
1232}
1233
1234void spx_drft_forward(struct drft_lookup *l,float *data){
1235  if(l->n==1)return;
1236  drftf1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1237}
1238
1239void spx_drft_backward(struct drft_lookup *l,float *data){
1240  if (l->n==1)return;
1241  drftb1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1242}
1243
1244void spx_drft_init(struct drft_lookup *l,int n)
1245{
1246  l->n=n;
1247  l->trigcache=(float*)speex_alloc(3*n*sizeof(*l->trigcache));
1248  l->splitcache=(int*)speex_alloc(32*sizeof(*l->splitcache));
1249  fdrffti(n, l->trigcache, l->splitcache);
1250}
1251
1252void spx_drft_clear(struct drft_lookup *l)
1253{
1254  if(l)
1255  {
1256    if(l->trigcache)
1257      speex_free(l->trigcache);
1258    if(l->splitcache)
1259      speex_free(l->splitcache);
1260  }
1261}
1262