1/*
2 * Experimental data  distribution table generator
3 * Taken from the uncopyrighted NISTnet code (public domain).
4 *
5 * Read in a series of "random" data values, either
6 * experimentally or generated from some probability distribution.
7 * From this, create the inverse distribution table used to approximate
8 * the distribution.
9 */
10#include <stdio.h>
11#include <stdlib.h>
12#include <math.h>
13#include <malloc.h>
14#include <string.h>
15#include <sys/types.h>
16#include <sys/stat.h>
17
18
19double *
20readdoubles(FILE *fp, int *number)
21{
22	struct stat info;
23	double *x;
24	int limit;
25	int n=0, i;
26
27	fstat(fileno(fp), &info);
28	if (info.st_size > 0) {
29		limit = 2*info.st_size/sizeof(double);	/* @@ approximate */
30	} else {
31		limit = 10000;
32	}
33
34	x = calloc(limit, sizeof(double));
35	if (!x) {
36		perror("double alloc");
37		exit(3);
38	}
39
40	for (i=0; i<limit; ++i){
41		fscanf(fp, "%lf", &x[i]);
42		if (feof(fp))
43			break;
44		++n;
45	}
46	*number = n;
47	return x;
48}
49
50void
51arraystats(double *x, int limit, double *mu, double *sigma, double *rho)
52{
53	int n=0, i;
54	double sumsquare=0.0, sum=0.0, top=0.0;
55	double sigma2=0.0;
56
57	for (i=0; i<limit; ++i){
58		sumsquare += x[i]*x[i];
59		sum += x[i];
60		++n;
61	}
62	*mu = sum/(double)n;
63	*sigma = sqrt((sumsquare - (double)n*(*mu)*(*mu))/(double)(n-1));
64
65	for (i=1; i < n; ++i){
66		top += ((double)x[i]- *mu)*((double)x[i-1]- *mu);
67		sigma2 += ((double)x[i-1] - *mu)*((double)x[i-1] - *mu);
68
69	}
70	*rho = top/sigma2;
71}
72
73/* Create a (normalized) distribution table from a set of observed
74 * values.  The table is fixed to run from (as it happens) -4 to +4,
75 * with granularity .00002.
76 */
77
78#define TABLESIZE	16384/4
79#define TABLEFACTOR	8192
80#ifndef MINSHORT
81#define MINSHORT	-32768
82#define MAXSHORT	32767
83#endif
84
85/* Since entries in the inverse are scaled by TABLEFACTOR, and can't be bigger
86 * than MAXSHORT, we don't bother looking at a larger domain than this:
87 */
88#define DISTTABLEDOMAIN ((MAXSHORT/TABLEFACTOR)+1)
89#define DISTTABLEGRANULARITY 50000
90#define DISTTABLESIZE (DISTTABLEDOMAIN*DISTTABLEGRANULARITY*2)
91
92static int *
93makedist(double *x, int limit, double mu, double sigma)
94{
95	int *table;
96	int i, index, first=DISTTABLESIZE, last=0;
97	double input;
98
99	table = calloc(DISTTABLESIZE, sizeof(int));
100	if (!table) {
101		perror("table alloc");
102		exit(3);
103	}
104
105	for (i=0; i < limit; ++i) {
106		/* Normalize value */
107		input = (x[i]-mu)/sigma;
108
109		index = (int)rint((input+DISTTABLEDOMAIN)*DISTTABLEGRANULARITY);
110		if (index < 0) index = 0;
111		if (index >= DISTTABLESIZE) index = DISTTABLESIZE-1;
112		++table[index];
113		if (index > last)
114			last = index +1;
115		if (index < first)
116			first = index;
117	}
118	return table;
119}
120
121/* replace an array by its cumulative distribution */
122static void
123cumulativedist(int *table, int limit, int *total)
124{
125	int accum=0;
126
127	while (--limit >= 0) {
128		accum += *table;
129		*table++ = accum;
130	}
131	*total = accum;
132}
133
134static short *
135inverttable(int *table, int inversesize, int tablesize, int cumulative)
136{
137	int i, inverseindex, inversevalue;
138	short *inverse;
139	double findex, fvalue;
140
141	inverse = (short *)malloc(inversesize*sizeof(short));
142	for (i=0; i < inversesize; ++i) {
143		inverse[i] = MINSHORT;
144	}
145	for (i=0; i < tablesize; ++i) {
146		findex = ((double)i/(double)DISTTABLEGRANULARITY) - DISTTABLEDOMAIN;
147		fvalue = (double)table[i]/(double)cumulative;
148		inverseindex = (int)rint(fvalue*inversesize);
149		inversevalue = (int)rint(findex*TABLEFACTOR);
150		if (inversevalue <= MINSHORT) inversevalue = MINSHORT+1;
151		if (inversevalue > MAXSHORT) inversevalue = MAXSHORT;
152		inverse[inverseindex] = inversevalue;
153	}
154	return inverse;
155
156}
157
158/* Run simple linear interpolation over the table to fill in missing entries */
159static void
160interpolatetable(short *table, int limit)
161{
162	int i, j, last, lasti = -1;
163
164	last = MINSHORT;
165	for (i=0; i < limit; ++i) {
166		if (table[i] == MINSHORT) {
167			for (j=i; j < limit; ++j)
168				if (table[j] != MINSHORT)
169					break;
170			if (j < limit) {
171				table[i] = last + (i-lasti)*(table[j]-last)/(j-lasti);
172			} else {
173				table[i] = last + (i-lasti)*(MAXSHORT-last)/(limit-lasti);
174			}
175		} else {
176			last = table[i];
177			lasti = i;
178		}
179	}
180}
181
182static void
183printtable(const short *table, int limit)
184{
185	int i;
186
187	printf("# This is the distribution table for the experimental distribution.\n");
188
189	for (i=0 ; i < limit; ++i) {
190		printf("%d%c", table[i],
191		       (i % 8) == 7 ? '\n' : ' ');
192	}
193}
194
195int
196main(int argc, char **argv)
197{
198	FILE *fp;
199	double *x;
200	double mu, sigma, rho;
201	int limit;
202	int *table;
203	short *inverse;
204	int total;
205
206	if (argc > 1) {
207		if (!(fp = fopen(argv[1], "r"))) {
208			perror(argv[1]);
209			exit(1);
210		}
211	} else {
212		fp = stdin;
213	}
214	x = readdoubles(fp, &limit);
215	if (limit <= 0) {
216		fprintf(stderr, "Nothing much read!\n");
217		exit(2);
218	}
219	arraystats(x, limit, &mu, &sigma, &rho);
220#ifdef DEBUG
221	fprintf(stderr, "%d values, mu %10.4f, sigma %10.4f, rho %10.4f\n",
222		limit, mu, sigma, rho);
223#endif
224
225	table = makedist(x, limit, mu, sigma);
226	free((void *) x);
227	cumulativedist(table, DISTTABLESIZE, &total);
228	inverse = inverttable(table, TABLESIZE, DISTTABLESIZE, total);
229	interpolatetable(inverse, TABLESIZE);
230	printtable(inverse, TABLESIZE);
231	return 0;
232}
233