1/********************************************************************
2 *                                                                  *
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7 *                                                                  *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001             *
9 * by the Xiph.Org Foundation http://www.xiph.org/                  *
10 *                                                                  *
11 ********************************************************************
12
13 function: function calls to collect codebook metrics
14 last mod: $Id: metrics.c 16037 2009-05-26 21:10:58Z xiphmont $
15
16 ********************************************************************/
17
18
19#include <stdlib.h>
20#include <unistd.h>
21#include <math.h>
22#include "bookutil.h"
23
24/* collect the following metrics:
25
26   mean and mean squared amplitude
27   mean and mean squared error
28   mean and mean squared error (per sample) by entry
29   worst case fit by entry
30   entry cell size
31   hits by entry
32   total bits
33   total samples
34   (average bits per sample)*/
35
36
37/* set up metrics */
38
39float meanamplitude_acc=0.f;
40float meanamplitudesq_acc=0.f;
41float meanerror_acc=0.f;
42float meanerrorsq_acc=0.f;
43
44float **histogram=NULL;
45float **histogram_error=NULL;
46float **histogram_errorsq=NULL;
47float **histogram_hi=NULL;
48float **histogram_lo=NULL;
49float bits=0.f;
50float count=0.f;
51
52static float *_now(codebook *c, int i){
53  return c->valuelist+i*c->c->dim;
54}
55
56int books=0;
57
58void process_preprocess(codebook **bs,char *basename){
59  int i;
60  while(bs[books])books++;
61
62  if(books){
63    histogram=_ogg_calloc(books,sizeof(float *));
64    histogram_error=_ogg_calloc(books,sizeof(float *));
65    histogram_errorsq=_ogg_calloc(books,sizeof(float *));
66    histogram_hi=_ogg_calloc(books,sizeof(float *));
67    histogram_lo=_ogg_calloc(books,sizeof(float *));
68  }else{
69    fprintf(stderr,"Specify at least one codebook\n");
70    exit(1);
71  }
72
73  for(i=0;i<books;i++){
74    codebook *b=bs[i];
75    histogram[i]=_ogg_calloc(b->entries,sizeof(float));
76    histogram_error[i]=_ogg_calloc(b->entries*b->dim,sizeof(float));
77    histogram_errorsq[i]=_ogg_calloc(b->entries*b->dim,sizeof(float));
78    histogram_hi[i]=_ogg_calloc(b->entries*b->dim,sizeof(float));
79    histogram_lo[i]=_ogg_calloc(b->entries*b->dim,sizeof(float));
80  }
81}
82
83static float _dist(int el,float *a, float *b){
84  int i;
85  float acc=0.f;
86  for(i=0;i<el;i++){
87    float val=(a[i]-b[i]);
88    acc+=val*val;
89  }
90  return acc;
91}
92
93void cell_spacing(codebook *c){
94  int j,k;
95  float min=-1.f,max=-1.f,mean=0.f,meansq=0.f;
96  long total=0;
97
98  /* minimum, maximum, mean, ms cell spacing */
99  for(j=0;j<c->c->entries;j++){
100    if(c->c->lengthlist[j]>0){
101      float localmin=-1.;
102      for(k=0;k<c->c->entries;k++){
103        if(c->c->lengthlist[k]>0){
104          float this=_dist(c->c->dim,_now(c,j),_now(c,k));
105          if(j!=k &&
106             (localmin==-1 || this<localmin))
107            localmin=this;
108        }
109      }
110
111      if(min==-1 || localmin<min)min=localmin;
112      if(max==-1 || localmin>max)max=localmin;
113      mean+=sqrt(localmin);
114      meansq+=localmin;
115      total++;
116    }
117  }
118
119  fprintf(stderr,"\tminimum cell spacing (closest side): %g\n",sqrt(min));
120  fprintf(stderr,"\tmaximum cell spacing (closest side): %g\n",sqrt(max));
121  fprintf(stderr,"\tmean closest side spacing: %g\n",mean/total);
122  fprintf(stderr,"\tmean sq closest side spacing: %g\n",sqrt(meansq/total));
123}
124
125void process_postprocess(codebook **bs,char *basename){
126  int i,k,book;
127  char *buffer=alloca(strlen(basename)+80);
128
129  fprintf(stderr,"Done.  Processed %ld data points:\n\n",
130          (long)count);
131
132  fprintf(stderr,"Global statistics:******************\n\n");
133
134  fprintf(stderr,"\ttotal samples: %ld\n",(long)count);
135  fprintf(stderr,"\ttotal bits required to code: %ld\n",(long)bits);
136  fprintf(stderr,"\taverage bits per sample: %g\n\n",bits/count);
137
138  fprintf(stderr,"\tmean sample amplitude: %g\n",
139          meanamplitude_acc/count);
140  fprintf(stderr,"\tmean squared sample amplitude: %g\n\n",
141          sqrt(meanamplitudesq_acc/count));
142
143  fprintf(stderr,"\tmean code error: %g\n",
144          meanerror_acc/count);
145  fprintf(stderr,"\tmean squared code error: %g\n\n",
146          sqrt(meanerrorsq_acc/count));
147
148  for(book=0;book<books;book++){
149    FILE *out;
150    codebook *b=bs[book];
151    int n=b->c->entries;
152    int dim=b->c->dim;
153
154    fprintf(stderr,"Book %d statistics:------------------\n",book);
155
156    cell_spacing(b);
157
158    sprintf(buffer,"%s-%d-mse.m",basename,book);
159    out=fopen(buffer,"w");
160    if(!out){
161      fprintf(stderr,"Could not open file %s for writing\n",buffer);
162      exit(1);
163    }
164
165    for(i=0;i<n;i++){
166      for(k=0;k<dim;k++){
167        fprintf(out,"%d, %g, %g\n",
168                i*dim+k,(b->valuelist+i*dim)[k],
169                sqrt((histogram_errorsq[book]+i*dim)[k]/histogram[book][i]));
170      }
171    }
172    fclose(out);
173
174    sprintf(buffer,"%s-%d-me.m",basename,book);
175    out=fopen(buffer,"w");
176    if(!out){
177      fprintf(stderr,"Could not open file %s for writing\n",buffer);
178      exit(1);
179    }
180
181    for(i=0;i<n;i++){
182      for(k=0;k<dim;k++){
183        fprintf(out,"%d, %g, %g\n",
184                i*dim+k,(b->valuelist+i*dim)[k],
185                (histogram_error[book]+i*dim)[k]/histogram[book][i]);
186      }
187    }
188    fclose(out);
189
190    sprintf(buffer,"%s-%d-worst.m",basename,book);
191    out=fopen(buffer,"w");
192    if(!out){
193      fprintf(stderr,"Could not open file %s for writing\n",buffer);
194      exit(1);
195    }
196
197    for(i=0;i<n;i++){
198      for(k=0;k<dim;k++){
199        fprintf(out,"%d, %g, %g, %g\n",
200                i*dim+k,(b->valuelist+i*dim)[k],
201                (b->valuelist+i*dim)[k]+(histogram_lo[book]+i*dim)[k],
202                (b->valuelist+i*dim)[k]+(histogram_hi[book]+i*dim)[k]);
203      }
204    }
205    fclose(out);
206  }
207}
208
209float process_one(codebook *b,int book,float *a,int dim,int step,int addmul,
210                   float base){
211  int j,entry;
212  float amplitude=0.f;
213
214  if(book==0){
215    float last=base;
216    for(j=0;j<dim;j++){
217      amplitude=a[j*step]-(b->c->q_sequencep?last:0);
218      meanamplitude_acc+=fabs(amplitude);
219      meanamplitudesq_acc+=amplitude*amplitude;
220      count++;
221      last=a[j*step];
222    }
223  }
224
225  if(b->c->q_sequencep){
226    float temp;
227    for(j=0;j<dim;j++){
228      temp=a[j*step];
229      a[j*step]-=base;
230    }
231    base=temp;
232  }
233
234  entry=vorbis_book_besterror(b,a,step,addmul);
235
236  if(entry==-1){
237    fprintf(stderr,"Internal error: _best returned -1.\n");
238    exit(1);
239  }
240
241  histogram[book][entry]++;
242  bits+=vorbis_book_codelen(b,entry);
243
244  for(j=0;j<dim;j++){
245    float error=a[j*step];
246
247    if(book==books-1){
248      meanerror_acc+=fabs(error);
249      meanerrorsq_acc+=error*error;
250    }
251    histogram_errorsq[book][entry*dim+j]+=error*error;
252    histogram_error[book][entry*dim+j]+=fabs(error);
253    if(histogram[book][entry]==0 || histogram_hi[book][entry*dim+j]<error)
254      histogram_hi[book][entry*dim+j]=error;
255    if(histogram[book][entry]==0 || histogram_lo[book][entry*dim+j]>error)
256      histogram_lo[book][entry*dim+j]=error;
257  }
258  return base;
259}
260
261
262void process_vector(codebook **bs,int *addmul,int inter,float *a,int n){
263  int bi;
264  int i;
265
266  for(bi=0;bi<books;bi++){
267    codebook *b=bs[bi];
268    int dim=b->dim;
269    float base=0.f;
270
271    if(inter){
272      for(i=0;i<n/dim;i++)
273        base=process_one(b,bi,a+i,dim,n/dim,addmul[bi],base);
274    }else{
275      for(i=0;i<=n-dim;i+=dim)
276        base=process_one(b,bi,a+i,dim,1,addmul[bi],base);
277    }
278  }
279
280  if((long)(count)%100)spinnit("working.... samples: ",count);
281}
282
283void process_usage(void){
284  fprintf(stderr,
285          "usage: vqmetrics [-i] +|*<codebook>.vqh [ +|*<codebook.vqh> ]... \n"
286          "                 datafile.vqd [datafile.vqd]...\n\n"
287          "       data can be taken on stdin.  -i indicates interleaved coding.\n"
288          "       Output goes to output files:\n"
289          "       basename-me.m:       gnuplot: mean error by entry value\n"
290          "       basename-mse.m:      gnuplot: mean square error by entry value\n"
291          "       basename-worst.m:    gnuplot: worst error by entry value\n"
292          "       basename-distance.m: gnuplot file showing distance probability\n"
293          "\n");
294
295}
296