1ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn// This small program computes a Fast Fourier Transform. It tests 2ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn// Valgrind's handling of FP operations. It is representative of all 3ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn// programs that do a lot of FP operations. 4ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 586c2349f3dd6574a20f58b31ed901d0006bced4bnjn// Licensing: This program is closely based on the one of the same name from 686c2349f3dd6574a20f58b31ed901d0006bced4bnjn// http://www.fourmilab.ch/. The front page of that site says: 7ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn// 8ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn// "Except for a few clearly-marked exceptions, all the material on this 9ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn// site is in the public domain and may be used in any manner without 10ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn// permission, restriction, attribution, or compensation." 11ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 12ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn/* 13ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 14ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Two-dimensional FFT benchmark 15ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 16ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Designed and implemented by John Walker in April of 1989. 17ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 18ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn This benchmark executes a specified number of passes (default 19ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 20) through a loop in which each iteration performs a fast 20ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Fourier transform of a square matrix (default size 256x256) of 21ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn complex numbers (default precision double), followed by the 22ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn inverse transform. After all loop iterations are performed 23ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn the results are checked against known correct values. 24ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 25ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn This benchmark is intended for use on C implementations which 26ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn define "int" as 32 bits or longer and permit allocation and 27ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn direct addressing of arrays larger than one megabyte. 28ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 29ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn If CAPOUT is defined, the result after all iterations is 30ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn written as a CA Lab pattern file. This is intended for 31ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn debugging in case horribly wrong results are obtained on a 32ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn given machine. 33ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 34ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Archival timings are run with the definitions below set as 35ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn follows: Float = double, Asize = 256, Passes = 20, CAPOUT not 36ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn defined. 37ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 38ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Time (seconds) System 39ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 40ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 2393.93 Sun 3/260, SunOS 3.4, C, "-f68881 -O". 41ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn (John Walker). 42ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 43ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 1928 Macintosh IIx, MPW C 3.0, "-mc68020 44ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn -mc68881 -elems881 -m". (Hugh Hoover). 45ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 46ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 1636.1 Sun 4/110, "cc -O3 -lm". (Michael McClary). 47ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn The suspicion is that this is software 48ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn floating point. 49ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 50ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 1556.7 Macintosh II, A/UX, "cc -O -lm" 51ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn (Michael McClary). 52ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 53ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 1388.8 Sun 386i/250, SunOS 4.0.1 C 54ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn "-O /usr/lib/trig.il". (James Carrington). 55ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 56ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 1331.93 Sun 3/60, SunOS 4.0.1, C, 57ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn "-O4 -f68881 /usr/lib/libm.il" 58ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn (Bob Elman). 59ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 60ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 1204.0 Apollo Domain DN4000, C, "-cpu 3000 -opt 4". 61ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn (Sam Crupi). 62ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 63ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 1174.66 Compaq 386/25, SCO Xenix 386 C. 64ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn (Peter Shieh). 65ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 66ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 1068 Compaq 386/25, SCO Xenix 386, 67ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Metaware High C. (Robert Wenig). 68ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 69ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 1064.0 Sun 3/80, SunOS 4.0.3 Beta C 70ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn "-O3 -f68881 /usr/lib/libm.il". (James Carrington). 71ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 72ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 1061.4 Compaq 386/25, SCO Xenix, High C 1.4. 73ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn (James Carrington). 74ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 75ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 1059.79 Compaq 386/25, 387/25, High C 1.4, 76ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn DOS|Extender 2.2, 387 inline code 77ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn generation. (Nathan Bender). 78ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 79ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 777.14 Compaq 386/25, IIT 3C87-25 (387 Compatible), 80ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn High C 1.5, DOS|Extender 2.2, 387 inline 81ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn code generation. (Nathan Bender). 82ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 83ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 751 Compaq DeskPro 386/33, High C 1.5 + DOS|Extender, 84ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 387 code generation. (James Carrington). 85ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 86ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 431.44 Compaq 386/25, Weitek 3167-25, DOS 3.31, 87ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn High C 1.4, DOS|Extender, Weitek code generation. 88ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn (Nathan Bender). 89ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 90ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 344.9 Compaq 486/25, Metaware High C 1.6, Phar Lap 91ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn DOS|Extender, in-line floating point. (Nathan 92ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Bender). 93ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 94ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 324.2 Data General Motorola 88000, 16 Mhz, Gnu C. 95ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 96ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 323.1 Sun 4/280, C, "-O4". (Eric Hill). 97ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 98ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 254 Compaq SystemPro 486/33, High C 1.5 + DOS|Extender, 99ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 387 code generation. (James Carrington). 100ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 101ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 242.8 Silicon Graphics Personal IRIS, MIPS R2000A, 102ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 12.5 Mhz, "-O3" (highest level optimisation). 103ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn (Mike Zentner). 104ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 105ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 233.0 Sun SPARCStation 1, C, "-O4", SunOS 4.0.3. 106ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn (Nathan Bender). 107ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 108ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 187.30 DEC PMAX 3100, MIPS 2000 chip. 109ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn (Robert Wenig). 110ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 111ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 120.46 Sun SparcStation 2, C, "-O4", SunOS 4.1.1. 112ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn (John Walker). 113ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 114ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 120.21 DEC 3MAX, MIPS 3000, "-O4". 115ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 116ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 98.0 Intel i860 experimental environment, 117ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn OS/2, data caching disabled. (Kern 118ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Sibbald). 119ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 120ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 34.9 Silicon Graphics Indigo�, MIPS R4400, 121ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 175 Mhz, IRIX 5.2, "-O". 122ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 123ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 32.4 Pentium 133, Windows NT, Microsoft Visual 124ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn C++ 4.0. 125ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 126ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 17.25 Silicon Graphics Indigo�, MIPS R4400, 127ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 175 Mhz, IRIX 6.5, "-O3". 128ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 129ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 14.10 Dell Dimension XPS R100, Pentium II 400 MHz, 130ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Windows 98, Microsoft Visual C 5.0. 131ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 132ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 10.7 Hewlett-Packard Kayak XU 450Mhz Pentium II, 133ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Microsoft Visual C++ 6.0, Windows NT 4.0sp3. (Nathan Bender). 134ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 135ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 5.09 Sun Ultra 2, UltraSPARC V9, 300 MHz, gcc -O3. 136ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 137ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 0.846 Dell Inspiron 9100, Pentium 4, 3.4 GHz, gcc -O3. 138ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 139ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn*/ 140ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 141ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#include <stdio.h> 142ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#include <stdlib.h> 143ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#include <math.h> 144ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#include <string.h> 145ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 146ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn/* The program may be run with Float defined as either float or 147ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn double. With IEEE arithmetic, the same answers are generated for 148ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn either floating point mode. */ 149ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 150ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define Float double /* Floating point type used in FFT */ 151ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 152ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define Asize 256 /* Array edge size */ 153ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define Passes 20 /* Number of FFT/Inverse passes */ 154ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 155ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define max(a,b) ((a)>(b)?(a):(b)) 156ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define min(a,b) ((a)<=(b)?(a):(b)) 157ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 158ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn/* 159ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 160ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Multi-dimensional fast Fourier transform 161ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 162ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Adapted from Press et al., "Numerical Recipes in C". 163ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 164ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn*/ 165ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 166ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define SWAP(a,b) tempr=(a); (a)=(b); (b)=tempr 167ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 168ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njnstatic void fourn(data, nn, ndim, isign) 169ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Float data[]; 170ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn int nn[], ndim, isign; 171ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn{ 172ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn register int i1, i2, i3; 173ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn int i2rev, i3rev, ip1, ip2, ip3, ifp1, ifp2; 174ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn int ibit, idim, k1, k2, n, nprev, nrem, ntot; 175ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Float tempi, tempr; 176ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn double theta, wi, wpi, wpr, wr, wtemp; 177ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 178ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ntot = 1; 179ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (idim = 1; idim <= ndim; idim++) 180ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ntot *= nn[idim]; 181ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn nprev = 1; 182ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (idim = ndim; idim >= 1; idim--) { 183ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn n = nn[idim]; 184ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn nrem = ntot / (n * nprev); 185ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ip1 = nprev << 1; 186ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ip2 = ip1 * n; 187ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ip3 = ip2 * nrem; 188ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn i2rev = 1; 189ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (i2 = 1; i2 <= ip2; i2 += ip1) { 190ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn if (i2 < i2rev) { 191ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (i1 = i2; i1 <= i2 + ip1 - 2; i1 += 2) { 192ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (i3 = i1; i3 <= ip3; i3 += ip2) { 193ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn i3rev = i2rev + i3 - i2; 194ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn SWAP(data[i3], data[i3rev]); 195ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn SWAP(data[i3 + 1], data[i3rev + 1]); 196ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 197ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 198ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 199ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ibit = ip2 >> 1; 200ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn while (ibit >= ip1 && i2rev > ibit) { 201ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn i2rev -= ibit; 202ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ibit >>= 1; 203ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 204ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn i2rev += ibit; 205ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 206ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ifp1 = ip1; 207ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn while (ifp1 < ip2) { 208ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ifp2 = ifp1 << 1; 209ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn theta = isign * 6.28318530717959 / (ifp2 / ip1); 210ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn wtemp = sin(0.5 * theta); 211ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn wpr = -2.0 * wtemp * wtemp; 212ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn wpi = sin(theta); 213ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn wr = 1.0; 214ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn wi = 0.0; 215ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (i3 = 1; i3 <= ifp1; i3 += ip1) { 216ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (i1 = i3; i1 <= i3 + ip1 - 2; i1 += 2) { 217ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (i2 = i1; i2 <= ip3; i2 += ifp2) { 218ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn k1 = i2; 219ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn k2 = k1 + ifp1; 220ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn tempr = wr * data[k2] - wi * data[k2 + 1]; 221ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn tempi = wr * data[k2 + 1] + wi * data[k2]; 222ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn data[k2] = data[k1] - tempr; 223ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn data[k2 + 1] = data[k1 + 1] - tempi; 224ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn data[k1] += tempr; 225ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn data[k1 + 1] += tempi; 226ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 227ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 228ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn wr = (wtemp = wr) * wpr - wi * wpi + wr; 229ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn wi = wi * wpr + wtemp * wpi + wi; 230ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 231ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ifp1 = ifp2; 232ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 233ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn nprev *= n; 234ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 235ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn} 236ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#undef SWAP 237ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 238ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njnint main() 239ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn{ 240ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn int i, j, k, l, m, npasses = Passes, faedge; 241ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Float *fdata /* , *fd */ ; 242ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn static int nsize[] = {0, 0, 0}; 243ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn long fanum, fasize; 244ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn double mapbase, mapscale, /* x, */ rmin, rmax, imin, imax; 245ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 246ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn faedge = Asize; /* FFT array edge size */ 247ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn fanum = faedge * faedge; /* Elements in FFT array */ 248ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn fasize = ((fanum + 1) * 2 * sizeof(Float)); /* FFT array size */ 249ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn nsize[1] = nsize[2] = faedge; 250ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 251ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn fdata = (Float *) malloc(fasize); 252ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn if (fdata == NULL) { 253ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn fprintf(stdout, "Can't allocate data array.\n"); 254ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn exit(1); 255ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 256ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 257ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn /* Generate data array to process. */ 258ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 259ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define Re(x,y) fdata[1 + (faedge * (x) + (y)) * 2] 260ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define Im(x,y) fdata[2 + (faedge * (x) + (y)) * 2] 261ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 262ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn memset(fdata, 0, fasize); 263ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (i = 0; i < faedge; i++) { 264ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (j = 0; j < faedge; j++) { 265ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn if (((i & 15) == 8) || ((j & 15) == 8)) 266ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn Re(i, j) = 128.0; 267ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 268ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 269ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 270ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (i = 0; i < npasses; i++) { 271ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn/*printf("Pass %d\n", i);*/ 272ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn /* Transform image to frequency domain. */ 273ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn fourn(fdata, nsize, 2, 1); 274ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 275ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn /* Back-transform to image. */ 276ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn fourn(fdata, nsize, 2, -1); 277ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 278ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 279ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn { 280ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn double r, ij, ar, ai; 281ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn rmin = 1e10; rmax = -1e10; 282ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn imin = 1e10; imax = -1e10; 283ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ar = 0; 284ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ai = 0; 285ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 286ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (i = 1; i <= fanum; i += 2) { 287ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn r = fdata[i]; 288ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ij = fdata[i + 1]; 289ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ar += r; 290ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ai += ij; 291ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn rmin = min(r, rmin); 292ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn rmax = max(r, rmax); 293ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn imin = min(ij, imin); 294ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn imax = max(ij, imax); 295ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 296ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#ifdef DEBUG 297ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn printf("Real min %.4g, max %.4g. Imaginary min %.4g, max %.4g.\n", 298ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn rmin, rmax, imin, imax); 299ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn printf("Average real %.4g, imaginary %.4g.\n", 300ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn ar / fanum, ai / fanum); 301ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#endif 302ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn mapbase = rmin; 303ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn mapscale = 255 / (rmax - rmin); 304ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 305ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 306ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn /* See if we got the right answers. */ 307ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 308ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn m = 0; 309ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (i = 0; i < faedge; i++) { 310ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (j = 0; j < faedge; j++) { 311ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn k = (Re(i, j) - mapbase) * mapscale; 312ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn l = (((i & 15) == 8) || ((j & 15) == 8)) ? 255 : 0; 313ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn if (k != l) { 314ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn m++; 315ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn fprintf(stdout, 316ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn "Wrong answer at (%d,%d)! Expected %d, got %d.\n", 317ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn i, j, l, k); 318ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 319ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 320ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 321ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn if (m == 0) { 322ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn fprintf(stdout, "%d passes. No errors in results.\n", npasses); 323ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } else { 324ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn fprintf(stdout, "%d passes. %d errors in results.\n", 325ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn npasses, m); 326ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 327ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 328ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#ifdef CAPOUT 329ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 330ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn /* Output the result of the transform as a CA Lab pattern 331ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn file for debugging. */ 332ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 333ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn { 334ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define SCRX 322 335ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define SCRY 200 336ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define SCRN (SCRX * SCRY) 337ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn unsigned char patarr[SCRY][SCRX]; 338ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn FILE *fp; 339ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 340ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn/* Map user external state numbers to internal state index */ 341ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 342ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#define UtoI(x) (((((x) >> 1) & 0x7F) | ((x) << 7)) & 0xFF) 343ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 344ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn /* Copy data from FFT buffer to map. */ 345ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 346ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn memset(patarr, 0, sizeof patarr); 347ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn l = (SCRX - faedge) / 2; 348ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn m = (faedge > SCRY) ? 0 : ((SCRY - faedge) / 2); 349ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (i = 1; i < faedge; i++) { 350ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn for (j = 0; j < min(SCRY, faedge); j++) { 351ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn k = (Re(i, j) - mapbase) * mapscale; 352ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn patarr[j + m][i + l] = UtoI(k); 353ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 354ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 355ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 356ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn /* Dump pattern map to file. */ 357ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 358ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn fp = fopen("fft.cap", "w"); 359ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn if (fp == NULL) { 360ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn fprintf(stdout, "Cannot open output file.\n"); 361ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn exit(0); 362ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 363ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn putc(':', fp); 364ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn putc(1, fp); 365ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn fwrite(patarr, SCRN, 1, fp); 366ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn putc(6, fp); 367ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn fclose(fp); 368ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn } 369ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn#endif 370ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn 371ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn return 0; 372ec0c27a40ae0777cc5c5f96f219d1d72cbcaafd6njn} 373