1ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn// This small program computes a Fast Fourier Transform.  It tests
2ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn// Valgrind's handling of FP operations.  It is representative of all
3ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn// programs that do a lot of FP operations.
4ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
586c2349f3dd6574a20f58b31ed901d0006bced4bnjn// Licensing: This program is closely based on the one of the same name from
686c2349f3dd6574a20f58b31ed901d0006bced4bnjn// http://www.fourmilab.ch/.  The front page of that site says:
7ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn//
8ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn//   "Except for a few clearly-marked exceptions, all the material on this
9ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn//   site is in the public domain and may be used in any manner without
10ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn//   permission, restriction, attribution, or compensation."
11ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
12ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn/*
13ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
14ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	Two-dimensional FFT benchmark
15ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
16ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	Designed and implemented by John Walker in April of 1989.
17ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
18ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	This  benchmark executes a specified number of passes (default
19ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	20) through a loop in which each  iteration  performs  a  fast
20ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	Fourier transform of a square matrix (default size 256x256) of
21ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	complex numbers (default precision double),  followed  by  the
22ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	inverse  transform.   After  all loop iterations are performed
23ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	the results are checked against known correct values.
24ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
25ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	This benchmark is intended for use on C implementations  which
26ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn        define  "int"  as  32 bits or longer and permit allocation and
27ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	direct addressing of arrays larger than one megabyte.
28ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
29ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	If CAPOUT is defined,  the  result  after  all	iterations  is
30ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	written  as  a	CA  Lab  pattern  file.   This is intended for
31ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	debugging in case horribly wrong results  are  obtained  on  a
32ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	given machine.
33ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
34ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	Archival  timings  are	run  with the definitions below set as
35ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	follows: Float = double, Asize = 256, Passes = 20, CAPOUT  not
36ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	defined.
37ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
38ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	Time (seconds)		    System
39ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
40ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn            2393.93       Sun 3/260, SunOS 3.4, C, "-f68881 -O".
41ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  (John Walker).
42ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
43ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn            1928          Macintosh IIx, MPW C 3.0, "-mc68020
44ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn                          -mc68881 -elems881 -m".  (Hugh Hoover).
45ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
46ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn            1636.1        Sun 4/110, "cc -O3 -lm".  (Michael McClary).
47ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  The suspicion is that this is software
48ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  floating point.
49ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
50ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn            1556.7        Macintosh II, A/UX, "cc -O -lm"
51ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  (Michael McClary).
52ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
53ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	    1388.8	  Sun 386i/250, SunOS 4.0.1 C
54ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn                          "-O /usr/lib/trig.il".  (James Carrington).
55ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
56ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	    1331.93	  Sun 3/60, SunOS 4.0.1, C,
57ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn                          "-O4 -f68881 /usr/lib/libm.il"
58ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  (Bob Elman).
59ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
60ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn            1204.0        Apollo Domain DN4000, C, "-cpu 3000 -opt 4".
61ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  (Sam Crupi).
62ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
63ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	    1174.66	  Compaq 386/25, SCO Xenix 386 C.
64ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  (Peter Shieh).
65ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
66ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	    1068	  Compaq 386/25, SCO Xenix 386,
67ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  Metaware High C.  (Robert Wenig).
68ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
69ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	    1064.0	  Sun 3/80, SunOS 4.0.3 Beta C
70ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn                          "-O3 -f68881 /usr/lib/libm.il".  (James Carrington).
71ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
72ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	    1061.4	  Compaq 386/25, SCO Xenix, High C 1.4.
73ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  (James Carrington).
74ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
75ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	    1059.79	  Compaq 386/25, 387/25, High C 1.4,
76ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  DOS|Extender 2.2, 387 inline code
77ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  generation.  (Nathan Bender).
78ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
79ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	     777.14	  Compaq 386/25, IIT 3C87-25 (387 Compatible),
80ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  High C 1.5, DOS|Extender 2.2, 387 inline
81ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  code generation.  (Nathan Bender).
82ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
83ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	     751	  Compaq DeskPro 386/33, High C 1.5 + DOS|Extender,
84ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  387 code generation.	(James Carrington).
85ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
86ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	     431.44	  Compaq 386/25, Weitek 3167-25, DOS 3.31,
87ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  High C 1.4, DOS|Extender, Weitek code generation.
88ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  (Nathan Bender).
89ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
90ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	     344.9	  Compaq 486/25, Metaware High C 1.6, Phar Lap
91ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  DOS|Extender, in-line floating point.  (Nathan
92ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  Bender).
93ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
94ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	     324.2	  Data General Motorola 88000, 16 Mhz, Gnu C.
95ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
96ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn             323.1        Sun 4/280, C, "-O4".  (Eric Hill).
97ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
98ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	     254	  Compaq SystemPro 486/33, High C 1.5 + DOS|Extender,
99ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  387 code generation.	(James Carrington).
100ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
101ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	     242.8	  Silicon Graphics Personal IRIS, MIPS R2000A,
102ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn                          12.5 Mhz, "-O3" (highest level optimisation).
103ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  (Mike Zentner).
104ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
105ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn             233.0        Sun SPARCStation 1, C, "-O4", SunOS 4.0.3.
106ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  (Nathan Bender).
107ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
108ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	     187.30	  DEC PMAX 3100, MIPS 2000 chip.
109ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  (Robert Wenig).
110ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
111ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn             120.46       Sun SparcStation 2, C, "-O4", SunOS 4.1.1.
112ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  (John Walker).
113ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
114ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn             120.21       DEC 3MAX, MIPS 3000, "-O4".
115ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
116ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      98.0	  Intel i860 experimental environment,
117ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  OS/2, data caching disabled.	(Kern
118ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  Sibbald).
119ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
120ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      34.9	  Silicon Graphics Indigo�, MIPS R4400,
121ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn                          175 Mhz, IRIX 5.2, "-O".
122ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
123ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      32.4	  Pentium 133, Windows NT, Microsoft Visual
124ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  C++ 4.0.
125ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
126ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      17.25	  Silicon Graphics Indigo�, MIPS R4400,
127ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn                          175 Mhz, IRIX 6.5, "-O3".
128ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
129ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      14.10	  Dell Dimension XPS R100, Pentium II 400 MHz,
130ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  Windows 98, Microsoft Visual C 5.0.
131ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
132ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      10.7	  Hewlett-Packard Kayak XU 450Mhz Pentium II,
133ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn			  Microsoft Visual C++ 6.0, Windows NT 4.0sp3.	(Nathan Bender).
134ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
135ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	       5.09	  Sun Ultra 2, UltraSPARC V9, 300 MHz, gcc -O3.
136ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
137ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	       0.846	  Dell Inspiron 9100, Pentium 4, 3.4 GHz, gcc -O3.
138ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
139ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn*/
140ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
141ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#include <stdio.h>
142ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#include <stdlib.h>
143ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#include <math.h>
144ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#include <string.h>
145ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
146ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn/*  The  program  may  be  run	with  Float defined as either float or
147ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn    double.  With IEEE arithmetic, the same answers are generated  for
148ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn    either floating point mode.  */
149ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
150ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define Float	 double 	   /* Floating point type used in FFT */
151ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
152ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define Asize	 256		   /* Array edge size */
153ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define Passes	 20		   /* Number of FFT/Inverse passes */
154ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
155ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define max(a,b) ((a)>(b)?(a):(b))
156ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define min(a,b) ((a)<=(b)?(a):(b))
157ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
158ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn/*
159ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
160ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	Multi-dimensional fast Fourier transform
161ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
162ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn        Adapted from Press et al., "Numerical Recipes in C".
163ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
164ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn*/
165ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
166ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define SWAP(a,b) tempr=(a); (a)=(b); (b)=tempr
167ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
168ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njnstatic void fourn(data, nn, ndim, isign)
169ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn  Float data[];
170ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn  int nn[], ndim, isign;
171ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn{
172ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	register int i1, i2, i3;
173ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	int i2rev, i3rev, ip1, ip2, ip3, ifp1, ifp2;
174ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	int ibit, idim, k1, k2, n, nprev, nrem, ntot;
175ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	Float tempi, tempr;
176ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	double theta, wi, wpi, wpr, wr, wtemp;
177ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
178ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	ntot = 1;
179ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	for (idim = 1; idim <= ndim; idim++)
180ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   ntot *= nn[idim];
181ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	nprev = 1;
182ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	for (idim = ndim; idim >= 1; idim--) {
183ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   n = nn[idim];
184ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   nrem = ntot / (n * nprev);
185ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   ip1 = nprev << 1;
186ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   ip2 = ip1 * n;
187ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   ip3 = ip2 * nrem;
188ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   i2rev = 1;
189ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   for (i2 = 1; i2 <= ip2; i2 += ip1) {
190ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      if (i2 < i2rev) {
191ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		 for (i1 = i2; i1 <= i2 + ip1 - 2; i1 += 2) {
192ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		    for (i3 = i1; i3 <= ip3; i3 += ip2) {
193ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		       i3rev = i2rev + i3 - i2;
194ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		       SWAP(data[i3], data[i3rev]);
195ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		       SWAP(data[i3 + 1], data[i3rev + 1]);
196ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		    }
197ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		 }
198ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      }
199ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      ibit = ip2 >> 1;
200ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      while (ibit >= ip1 && i2rev > ibit) {
201ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		 i2rev -= ibit;
202ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		 ibit >>= 1;
203ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      }
204ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      i2rev += ibit;
205ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   }
206ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   ifp1 = ip1;
207ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   while (ifp1 < ip2) {
208ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      ifp2 = ifp1 << 1;
209ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      theta = isign * 6.28318530717959 / (ifp2 / ip1);
210ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      wtemp = sin(0.5 * theta);
211ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      wpr = -2.0 * wtemp * wtemp;
212ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      wpi = sin(theta);
213ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      wr = 1.0;
214ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      wi = 0.0;
215ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      for (i3 = 1; i3 <= ifp1; i3 += ip1) {
216ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		 for (i1 = i3; i1 <= i3 + ip1 - 2; i1 += 2) {
217ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		    for (i2 = i1; i2 <= ip3; i2 += ifp2) {
218ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		       k1 = i2;
219ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		       k2 = k1 + ifp1;
220ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		       tempr = wr * data[k2] - wi * data[k2 + 1];
221ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		       tempi = wr * data[k2 + 1] + wi * data[k2];
222ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		       data[k2] = data[k1] - tempr;
223ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		       data[k2 + 1] = data[k1 + 1] - tempi;
224ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		       data[k1] += tempr;
225ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		       data[k1 + 1] += tempi;
226ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		    }
227ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		 }
228ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		 wr = (wtemp = wr) * wpr - wi * wpi + wr;
229ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		 wi = wi * wpr + wtemp * wpi + wi;
230ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      }
231ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      ifp1 = ifp2;
232ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   }
233ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   nprev *= n;
234ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	}
235ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn}
236ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#undef SWAP
237ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
238ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njnint main()
239ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn{
240ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	int i, j, k, l, m, npasses = Passes, faedge;
241ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	Float *fdata /* , *fd */ ;
242ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	static int nsize[] = {0, 0, 0};
243ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	long fanum, fasize;
244ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	double mapbase, mapscale, /* x, */ rmin, rmax, imin, imax;
245ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
246ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	faedge = Asize; 	   /* FFT array edge size */
247ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	fanum = faedge * faedge;   /* Elements in FFT array */
248ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	fasize = ((fanum + 1) * 2 * sizeof(Float)); /* FFT array size */
249ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	nsize[1] = nsize[2] = faedge;
250ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
251ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	fdata = (Float *) malloc(fasize);
252ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	if (fdata == NULL) {
253ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn           fprintf(stdout, "Can't allocate data array.\n");
254ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   exit(1);
255ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	}
256ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
257ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	/*  Generate data array to process.  */
258ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
259ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define Re(x,y) fdata[1 + (faedge * (x) + (y)) * 2]
260ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define Im(x,y) fdata[2 + (faedge * (x) + (y)) * 2]
261ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
262ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	memset(fdata, 0, fasize);
263ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	for (i = 0; i < faedge; i++) {
264ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   for (j = 0; j < faedge; j++) {
265ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      if (((i & 15) == 8) || ((j & 15) == 8))
266ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		 Re(i, j) = 128.0;
267ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   }
268ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	}
269ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
270ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	for (i = 0; i < npasses; i++) {
271ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn/*printf("Pass %d\n", i);*/
272ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   /* Transform image to frequency domain. */
273ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   fourn(fdata, nsize, 2, 1);
274ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
275ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   /*  Back-transform to image. */
276ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   fourn(fdata, nsize, 2, -1);
277ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	}
278ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
279ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	{
280ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   double r, ij, ar, ai;
281ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   rmin = 1e10; rmax = -1e10;
282ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   imin = 1e10; imax = -1e10;
283ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   ar = 0;
284ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   ai = 0;
285ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
286ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   for (i = 1; i <= fanum; i += 2) {
287ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      r = fdata[i];
288ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      ij = fdata[i + 1];
289ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      ar += r;
290ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      ai += ij;
291ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      rmin = min(r, rmin);
292ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      rmax = max(r, rmax);
293ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      imin = min(ij, imin);
294ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      imax = max(ij, imax);
295ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   }
296ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#ifdef DEBUG
297ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn           printf("Real min %.4g, max %.4g.  Imaginary min %.4g, max %.4g.\n",
298ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      rmin, rmax, imin, imax);
299ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn           printf("Average real %.4g, imaginary %.4g.\n",
300ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      ar / fanum, ai / fanum);
301ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#endif
302ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   mapbase = rmin;
303ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   mapscale = 255 / (rmax - rmin);
304ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	}
305ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
306ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	/* See if we got the right answers. */
307ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
308ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	m = 0;
309ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	for (i = 0; i < faedge; i++) {
310ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   for (j = 0; j < faedge; j++) {
311ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      k = (Re(i, j) - mapbase) * mapscale;
312ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      l = (((i & 15) == 8) || ((j & 15) == 8)) ? 255 : 0;
313ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      if (k != l) {
314ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		 m++;
315ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		 fprintf(stdout,
316ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn                    "Wrong answer at (%d,%d)!  Expected %d, got %d.\n",
317ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		    i, j, l, k);
318ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      }
319ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   }
320ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	}
321ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	if (m == 0) {
322ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn           fprintf(stdout, "%d passes.  No errors in results.\n", npasses);
323ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	} else {
324ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn           fprintf(stdout, "%d passes.  %d errors in results.\n",
325ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      npasses, m);
326ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	}
327ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
328ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#ifdef CAPOUT
329ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
330ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	/* Output the result of the transform as a CA Lab pattern
331ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   file for debugging. */
332ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
333ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	{
334ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define SCRX  322
335ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define SCRY  200
336ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define SCRN  (SCRX * SCRY)
337ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   unsigned char patarr[SCRY][SCRX];
338ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   FILE *fp;
339ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
340ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn/*  Map user external state numbers to internal state index  */
341ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
342ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define UtoI(x)     (((((x) >> 1) & 0x7F) | ((x) << 7)) & 0xFF)
343ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
344ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   /* Copy data from FFT buffer to map. */
345ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
346ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   memset(patarr, 0, sizeof patarr);
347ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   l = (SCRX - faedge) / 2;
348ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   m = (faedge > SCRY) ? 0 : ((SCRY - faedge) / 2);
349ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   for (i = 1; i < faedge; i++) {
350ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      for (j = 0; j < min(SCRY, faedge); j++) {
351ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		 k = (Re(i, j) - mapbase) * mapscale;
352ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn		 patarr[j + m][i + l] = UtoI(k);
353ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      }
354ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   }
355ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
356ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   /* Dump pattern map to file. */
357ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
358ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn           fp = fopen("fft.cap", "w");
359ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   if (fp == NULL) {
360ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn              fprintf(stdout, "Cannot open output file.\n");
361ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	      exit(0);
362ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   }
363ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn           putc(':', fp);
364ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   putc(1, fp);
365ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   fwrite(patarr, SCRN, 1, fp);
366ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   putc(6, fp);
367ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	   fclose(fp);
368ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	}
369ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#endif
370ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn
371ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn	return 0;
372ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn}
373