1/*---------------------------------------------------------------------------*
2 *  specnorm.c  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20
21#ifndef _RTT
22#include <stdio.h>
23#endif
24#include <stdlib.h>
25#include <math.h>
26#include <string.h>
27#include <assert.h>
28
29#include "duk_err.h"
30#include "specnorm.h"
31#include "portable.h"
32
33#define DEBUG   0
34
35static const char specnorm[] = "$Id: specnorm.c,v 1.2.10.7 2007/10/15 18:06:24 dahan Exp $";
36
37int copy_distribution_counts(spect_dist_info *spec, spect_dist_info *base);
38
39
40int add_distribution_data(spect_dist_info *spec, int spec_val)
41{
42  /*  Median calculations
43  */
44  ASSERT(spec);
45#if USE_MEDIAN
46  if (spec_val < spec->low_entry) spec->low_counts += UNIT_SIZE;
47  else if (spec_val > spec->high_entry) spec->high_counts += UNIT_SIZE;
48  else spec->hist[spec_val - spec->low_entry] += UNIT_SIZE;
49#endif
50
51  /*  Mean calculations
52  */
53#if 1
54  spec->running_total += spec_val - spec->mean;
55  spec->running_total_devn += (spec_val - spec->mean)
56                              * (spec_val - spec->mean);
57#else
58  spec->running_total += spec_val;
59  spec->running_total_devn += spec_val * spec_val;
60#endif
61
62  spec->count++;
63  if (spec->estimate_period > 0 && spec->count >= spec->estimate_period)
64  {
65    evaluate_parameters(spec);
66    spec->gain_used = False;
67    spec->count = 0;
68    return (1);
69  }
70  return (0);
71}
72
73void evaluate_parameters(spect_dist_info *spec)
74{
75  ASSERT(spec);
76#if USE_MEDIAN
77  estimate_sv6(spec);
78  spec->median = estimate_percentile(spec, spec->estimate_percentile);
79  spec->perc_high = estimate_percentile(spec, 90);  /* check this value */
80#endif
81#if USE_MEAN
82  estimate_mean(spec, spec->forget_factor);
83#endif
84#if USE_MEDIAN
85  forget_distribution_counts(spec, spec->forget_factor);
86#endif
87  spec->count = 0;
88  return;
89}
90
91#if USE_MEDIAN
92int estimate_percentile(spect_dist_info *spec, int percentile)
93{
94  int ii, jj, count, cumval;
95  long    accum = 0;
96
97  /*  Calculate the median
98  */
99  ASSERT(spec);
100  if (spec->count < MIN_COUNT) return(spec->median);
101  if (percentile == 0)
102    percentile = spec->estimate_percentile;
103  count = spec->low_counts + spec->high_counts;
104  for (ii = 0; ii <= (spec->high_entry - spec->low_entry); ii++)
105    count += spec->hist[ii];
106  count = (count * percentile) / 100;
107
108  cumval = spec->low_counts;
109  for (ii = 0; ii <= (spec->high_entry - spec->low_entry)
110       && cumval < count; ii++)
111    cumval += spec->hist[ii];
112
113  count = 0;
114  for (jj = ii; jj <= (spec->high_entry - spec->low_entry); jj++)
115  {
116    count += spec->hist[jj];
117    accum += spec->hist[jj] * (jj - ii);
118  }
119#if DEBUG
120  if (count > 0)
121    log_report("Median margin %d\n", accum / count);
122#endif
123  return (spec->low_entry + ii);
124}
125
126void estimate_sv6(spect_dist_info *spec)
127{
128  int ii, jj, count, span, totcount;
129  long    accum;
130
131  /*  Calculate the median
132  */
133  ASSERT(spec);
134  if (spec->count < MIN_COUNT) return;
135  count = spec->high_counts;
136  accum = 0;
137  span = spec->high_entry - spec->low_entry;
138  for (ii = 0, jj = spec->high_entry - spec->low_entry;
139       ii <= span; ii++, jj--)
140  {
141    count += spec->hist[jj];
142    accum += spec->hist[jj] * ii;
143    if (count > 0 && (ii - accum / count) > spec->sv6_margin)
144      break;
145  }
146
147  totcount = count;
148  for (; ii <= span; ii++, jj--)
149    totcount += spec->hist[jj];
150  totcount += spec->high_counts;
151
152#if DEBUG
153  log_report("SV6 (%d) Percentage %d, %d, %d\n", spec->sv6_margin,
154             (count * 100) / totcount,
155             totcount, spec->count);
156#endif
157  if (count > 0)
158    spec->sv6 = spec->high_entry - accum / count;
159  return;
160}
161#endif
162
163void estimate_mean(spect_dist_info *spec, int forget_factor)
164{
165  /*  Calculate the mean and standard deviation
166  */
167  ASSERT(spec);
168  if (spec->count < MIN_COUNT) return;
169#if DEBUG
170  log_report("old mean= %d, ", spec->mean);
171#endif
172  spec->mean_count = (spec->mean_count * (100 - forget_factor)) / 100;
173  spec->mean_count += spec->count;
174  if (spec->mean_count > 0)
175  {
176    spec->devn = spec->running_total_devn / spec->mean_count
177                 - (spec->running_total * spec->running_total)
178                 / (spec->mean_count * spec->mean_count);
179    spec->devn = (int) sqrt((double)  spec->devn);
180    if (spec->running_total >= 0)
181      spec->mean += (spec->running_total + spec->mean_count / 2)
182                    / spec->mean_count;
183    else
184      spec->mean += (spec->running_total - spec->mean_count / 2)
185                    / spec->mean_count;
186  }
187#if DEBUG
188  log_report("accumulates= %d and %d (%d), ", spec->running_total,
189             spec->mean_count, spec->count);
190  log_report("new mean= %d\n", spec->mean);
191#endif
192  spec->running_total = 0;
193  spec->running_total_devn = 0;
194
195  return;
196}
197
198#if USE_MEDIAN
199int median_normalize_data(spect_dist_info *spec, int spec_val)
200{
201  return (spec_val - spec->median + spec->offset);
202}
203
204int sv6_normalize_data(spect_dist_info *spec, int spec_val)
205{
206  return (spec_val - spec->sv6 + spec->offset);
207}
208#endif
209
210int mean_normalize_data(spect_dist_info *spec, int spec_val)
211{
212  return (spec_val - spec->mean + spec->offset);
213}
214
215spect_dist_info *create_spectrum_distribution(int offset, int initial_median,
216    int low_entry, int high_entry,
217    int forget_factor, int estimate_period,
218    int estimate_percentile,
219    int sv6_margin)
220{
221  spect_dist_info *spec;
222
223  if (high_entry < low_entry) return(NULL);
224
225  spec = (spect_dist_info *) CALLOC_CLR(1,
226         sizeof(spect_dist_info), "clib.spec");
227  if (estimate_period == 0) /* basically disable 0 as an estimate period */
228    spec->estimate_period = 1;
229  else
230    spec->estimate_period = estimate_period;
231  spec->forget_factor = forget_factor;
232  spec->offset = offset;
233
234#if USE_MEDIAN
235  spec->hist = (long *) CALLOC_CLR(high_entry - low_entry + 1,
236                                         sizeof(int), "clib.spec.hist");
237  spec->low_entry = low_entry;
238  spec->high_entry = high_entry;
239  spec->median = initial_median;
240  spec->estimate_percentile = estimate_percentile;
241  spec->sv6_margin = sv6_margin;
242  clear_distribution_counts(spec);
243#endif
244#if USE_MEAN
245  spec->mean = initial_median;
246  spec->devn = 0;
247  clear_mean_counts(spec);
248#endif
249  spec->sv6 = initial_median;
250
251  return (spec);
252}
253
254void destroy_spectrum_distribution(spect_dist_info *spec)
255{
256  ASSERT(spec);
257#if USE_MEDIAN
258  FREE((char *)spec->hist);
259#endif
260  FREE((char *)spec);
261  return;
262}
263
264#if USE_MEDIAN
265void clear_distribution_counts(spect_dist_info *spec)
266{
267  int ii;
268
269  ASSERT(spec);
270  spec->high_counts = 0;
271  spec->low_counts = 0;
272  spec->count = 0;
273  for (ii = 0; ii <= (spec->high_entry - spec->low_entry); ii++)
274    spec->hist[ii] = 0;
275  return;
276}
277
278int copy_distribution_counts(spect_dist_info *spec, spect_dist_info *base)
279{
280  int ii;
281
282  ASSERT(spec);
283  ASSERT(base);
284  ASSERT(spec->hist);
285  ASSERT(base->hist);
286  if (base->low_entry != spec->low_entry ||
287      base->high_entry != spec->high_entry)
288    return (False);
289  spec->high_counts = base->high_counts;
290  spec->low_counts = base->low_counts;
291  for (ii = 0; ii <= (spec->high_entry - spec->low_entry); ii++)
292    spec->hist[ii] = base->hist[ii];
293  return (True);
294}
295
296void forget_distribution_counts(spect_dist_info *spec, int forget_factor)
297{
298  int ii, remember;
299
300  ASSERT(spec);
301  /*    remember= 100 - (forget_factor * spec->count)/spec->estimate_period; */
302  remember = 100 - forget_factor;
303  spec->high_counts = (spec->high_counts * remember) / 100;
304  spec->low_counts = (spec->low_counts * remember) / 100;
305  for (ii = 0; ii <= (spec->high_entry - spec->low_entry); ii++)
306    spec->hist[ii] = (spec->hist[ii] * remember) / 100;
307  return;
308}
309
310void shift_distribution_counts(spect_dist_info *spec, int shift)
311{
312  int ii;
313
314  ASSERT(spec);
315  if (shift > 0)
316  {
317    if (shift > (spec->high_entry - spec->low_entry))
318      SERVICE_ERROR(UNEXPECTED_DATA_ERROR); /* TODO: find a new error code */
319    for (ii = 0; ii < shift; ii++)
320      spec->high_counts += spec->hist[spec->high_entry
321                                      - spec->low_entry - shift + ii];
322
323    MEMMOVE(&spec->hist[shift], spec->hist,
324            (spec->high_entry - spec->low_entry - shift + 1),
325            sizeof(int));
326    for (ii = 0; ii < shift; ii++)
327      spec->hist[ii] = 0;
328  }
329  else if (shift < 0)
330  {
331    if (shift < (spec->low_entry - spec->high_entry))
332      SERVICE_ERROR(UNEXPECTED_DATA_ERROR); /* TODO: find a new error code */
333    for (ii = 0; ii < -shift; ii++)
334      spec->low_counts += spec->hist[ii];
335
336    MEMMOVE(spec->hist, spec->hist - shift,
337            (spec->high_entry - spec->low_entry + shift + 1),
338            sizeof(int));
339    for (ii = shift; ii < 0; ii++)
340      spec->hist[ii + spec->high_entry - spec->low_entry + 1] = 0;
341  }
342  return;
343}
344#endif
345
346void clear_mean_counts(spect_dist_info *spec)
347{
348  ASSERT(spec);
349  spec->mean_count = 0;
350  spec->count = 0;
351  spec->running_total = 0;
352  spec->running_total_devn = 0;
353  return;
354}
355
356void shift_parameters(spect_dist_info *spec, int shift)
357{
358  ASSERT(spec);
359  spec->mean += shift;
360#if USE_MEDIAN
361  spec->median += shift;
362  spec->sv6 += shift;
363#endif
364  return;
365}
366