1/*---------------------------------------------------------------------------*
2 *  swicms.c                                                                 *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#include <string.h>
21#include"swicms.h"
22#include"srec_sizes.h"
23#include"prelib.h"
24
25#include "passert.h"
26#include "ESR_Session.h"
27#include "ESR_SessionType.h"
28#include "IntArrayList.h"
29#include "portable.h"
30
31#define printf_vector(HEAD, FMT, PTR, NN) { int i; LCHAR buffer[256]; sprintf(buffer, HEAD); sprintf(buffer + LSTRLEN(buffer), " %x", (int)PTR); for (i=0; i<(NN); ++i) sprintf(buffer + LSTRLEN(buffer), FMT, PTR[i]); PLogMessage(buffer); }
32
33/* Cross-utterance CMN calculation:
34   We try to normalize the speech frames before they get to the recognizer.
35   The speech frames are LDA-processed mfcc-with-dynamic feature vectors.
36   We collect these speech frames during recognition. At the end of
37   recognition we exclude the silence frames from the collected data, and
38   generate a new channel average based on the previous average and the new
39   data, using an exponential decay formula.
40
41   In-utterance CMN calculation:
42   A new short-term average mechanism was introduced, with faster update,
43   to improve recognition on the very first recognition after init or reset.
44   We wait for a minimum number of new data frames to apply this. We also
45   disable the fast updater after some frames, because we assume the
46   cross-utterance estimator to be more reliable, particularly in its
47   ability to exclude silence frames from the calculation.
48*/
49
50/* default settings for cross-utterance cms */
51#define SWICMS_FORGET_FACTOR_DEFAULT        400 /* effective frms of history */
52#define SWICMS_SBINDEX_DEFAULT              100 /* use speech frames only */
53/* #define SWICMS_CACHE_RESOLUTION_DEFAULT  see swicms.h */
54/* #define SWICMS_CACHE_SIZE_DEFAULT        see swicms.h */
55
56/* default settings for in-utterance cms */
57#define SWICMS_INUTT_FORGET_FACTOR2_DISABLE 65535 /* any large number */
58#define SWICMS_INUTT_FORGET_FACTOR2_DEFAULT SWICMS_INUTT_FORGET_FACTOR2_DISABLE
59/* disable this when cross-utt become more reliable */
60#define SWICMS_INUTT_DISABLE_AFTER_FRAMES   200
61/* wait while the estimate is poor */
62#define SWICMS_INUTT_ENABLE_AFTER_FRAMES    10
63
64/**
65 * Logging Stuff
66 */
67#define LOG_LEVEL 2
68#define MODULE_NAME L("swicms.c")
69//static const char* MTAG = MODULE_NAME;
70
71static const char *rcsid = 0 ? (const char *) &rcsid :
72                           "$Id: swicms.c,v 1.21.6.16 2008/06/05 19:00:55 stever Exp $";
73
74static ESR_BOOL SWICMS_DEBUG = ESR_FALSE;
75
76/* these are good values from cmn/tmn files */
77static const imeldata gswicms_cmn1_8 [MAX_CHAN_DIM] =
78  {
79    158, 141,  99, 125, 101, 162, 113, 138, 128, 143, 123, 141,
80    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
81    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
82  };
83
84static const imeldata gswicms_cmn1_11 [MAX_CHAN_DIM] =
85  {
86    163, 121, 120, 114, 124, 139, 144, 108, 150, 119, 146, 124,
87    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
88    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
89  };
90
91static const imeldata gswicms_tmn1_8 [MAX_CHAN_DIM] =
92  {
93    108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
94    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
95    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
96  };
97
98static const imeldata gswicms_tmn1_11 [MAX_CHAN_DIM] =
99  {
100    108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
102    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
103  };
104
105static ESR_ReturnCode GetSomeIntsIfAny( const LCHAR* parname, imeldata* parvalue, size_t reqSize)
106{
107  size_t i, size;
108  ESR_ReturnCode rc;
109  ESR_BOOL exists;
110  IntArrayList* intList = 0;
111
112  CHKLOG(rc, ESR_SessionContains(parname, &exists));
113  if (exists) {
114    rc = ESR_SessionGetProperty(parname, (void**)&intList, TYPES_INTARRAYLIST);
115    if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
116      /* no match will revert to default data already in static array */
117      PLogError(L("Error reading %s from session: %s"), parname, ESR_rc2str(rc));
118      return ESR_FATAL_ERROR;
119    }
120    else if (rc == ESR_SUCCESS) {
121      CHKLOG(rc, IntArrayListGetSize(intList, &size));
122      if(size != reqSize) {
123	PLogError(L("Error reading %s from session, expected len %d: %s"), parname, reqSize, ESR_rc2str(rc));
124	return ESR_FATAL_ERROR;
125      }
126      if(reqSize == 1)
127	CHKLOG(rc, IntArrayListGet(intList, 0, parvalue));
128      else {
129	for (i=0; i<size; ++i)
130	  CHKLOG(rc, IntArrayListGet(intList, i, &parvalue[i]));
131      }
132    }
133  }
134  return ESR_SUCCESS;
135 CLEANUP:
136  return rc;
137}
138
139int swicms_init(swicms_norm_info* swicms)
140{
141  ESR_ReturnCode    rc = ESR_SUCCESS;
142  size_t            i;
143  ESR_BOOL          exists, sessionExists;
144  size_t 	    sample_rate;
145
146  /* defaults */
147  swicms->sbindex          = SWICMS_SBINDEX_DEFAULT;
148  swicms->cached_num_frames = 0;
149  swicms->forget_factor    = SWICMS_FORGET_FACTOR_DEFAULT;
150  swicms->cache_resolution = SWICMS_CACHE_RESOLUTION_DEFAULT;
151  swicms->num_frames_in_cmn = 0;
152
153  CHKLOG(rc, ESR_SessionExists(&sessionExists));
154
155  if (sessionExists)
156  {  /* We'll assume this rate is valid or someone else will be complaining.   SteveR */
157    rc = ESR_SessionGetSize_t ( L ( "CREC.Frontend.samplerate" ), &sample_rate );
158
159    if ( rc != ESR_SUCCESS )
160      return ( rc );
161  }
162  else
163    sample_rate = 11025;
164
165  /* init the data structures by copying the static data so that we can have a copy if we need to reset */
166  if ( sample_rate == 8000 )
167  {
168    for ( i = 0; i < MAX_CHAN_DIM; i++ )
169    {
170      swicms->cmn [i] = gswicms_cmn1_8 [i];
171      swicms->tmn [i] = gswicms_tmn1_8 [i];
172// _lda_*mn below are OK, but are recalculated in swicms_lda_process()
173      swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
174      swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
175    }
176  }
177  else
178  {
179    for ( i = 0; i < MAX_CHAN_DIM; i++ )
180    {
181      swicms->cmn [i] = gswicms_cmn1_11 [i];
182      swicms->tmn [i] = gswicms_tmn1_11 [i];
183// _lda_*mn below are OK, but are recalculated in swicms_lda_process()
184      swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
185      swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
186    }
187  }
188  CHKLOG(rc, ESR_SessionExists(&sessionExists));
189
190  if (sessionExists)
191  {
192    const LCHAR* parname = L("CREC.Frontend.swicms.debug");
193    CHKLOG(rc, ESR_SessionContains(parname, &exists));
194    if (exists) {
195      rc = ESR_SessionGetBool(parname, &SWICMS_DEBUG);
196      if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
197        PLOG_DBG_ERROR((L("Error reading %s from session: %s"), parname, ESR_rc2str(rc)));
198        return rc;
199      }
200    }
201
202    rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.forget_factor"),
203			   &swicms->forget_factor, 1);
204    if(rc != ESR_SUCCESS) return rc;
205
206    rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.sbindex"),
207			   &swicms->sbindex, 1);
208    if(rc != ESR_SUCCESS) return rc;
209
210    rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn"),
211			   &swicms->cmn[0], MAX_CHAN_DIM);
212    if(rc != ESR_SUCCESS) return rc;
213
214    if ( sample_rate == 8000 )
215    {
216      rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn8"), &swicms->cmn[0], MAX_CHAN_DIM);
217
218      if(rc != ESR_SUCCESS)
219        return rc;
220    }
221    else
222    {
223      rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn11"), &swicms->cmn[0], MAX_CHAN_DIM);
224
225      if(rc != ESR_SUCCESS)
226        return rc;
227    }
228
229    rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.tmn"),
230			   &swicms->tmn[0], MAX_CHAN_DIM);
231    if(rc != ESR_SUCCESS) return rc;
232  }
233
234  swicms->is_valid = 0;
235  for (i = 0; i < MAX_CHAN_DIM; i++)
236    swicms->adjust[i] = 255;
237
238#ifdef SREC_ENGINE_VERBOSE_LOGGING
239  PLogMessage("swicms->forget_factor    = %d\n", swicms->forget_factor);
240  PLogMessage("swicms->cache_resolution = %d\n", swicms->cache_resolution);
241  PLogMessage("swicms->sbindex          = %d\n", swicms->sbindex);
242#endif
243
244  /* in-utt cms parameters */
245  swicms->inutt.forget_factor2 = SWICMS_INUTT_FORGET_FACTOR2_DEFAULT;
246  swicms->inutt.disable_after  = 200;
247  swicms->inutt.enable_after   = 10;    /* in-utt is less reliable       */
248  swicms->inutt.num_bou_frames_to_skip = 20; /* silence frames! see windback */
249  swicms->inutt.num_frames_since_bou = 0;
250  swicms->inutt.num_frames_in_accum = 0;
251  for(i=0; i<MAX_CHAN_DIM; i++) swicms->inutt.accum[i] = 0;
252
253  if (sessionExists) {
254    rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.forget_factor2"),
255			  &swicms->inutt.forget_factor2, 1);
256    if(rc != ESR_SUCCESS) return rc;
257
258    rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.disable_after"),
259			  &swicms->inutt.disable_after, 1);
260    if(rc != ESR_SUCCESS) return rc;
261
262    rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.enable_after"),
263			  &swicms->inutt.enable_after, 1);
264    if(rc != ESR_SUCCESS) return rc;
265
266    /* we need to estimate the in-utt cmn from speech frames only! so let's
267       make sure to skip some frames before collecting data, */
268    ESR_SessionContains(L("CREC.Frontend.start_windback"), &exists);
269    if (exists) {
270      ESR_BOOL do_skip_even_frames = ESR_TRUE;
271      ESR_SessionGetBool(L("CREC.Frontend.do_skip_even_frames"), &do_skip_even_frames);
272      ESR_SessionGetInt(L("CREC.Frontend.start_windback"), &swicms->inutt.num_bou_frames_to_skip);
273      if( do_skip_even_frames)
274	swicms->inutt.num_bou_frames_to_skip /= 2;
275      swicms->inutt.num_bou_frames_to_skip -= 5; /* ensure spch frames only */
276    }
277  }
278
279  return 0;
280 CLEANUP:
281  return rc;
282}
283
284
285ESR_ReturnCode swicms_get_cmn ( swicms_norm_info* swicms, LCHAR *cmn_params, size_t* len )
286{
287  int dim_count;
288  int i;
289  imeldata temp[MAX_CHAN_DIM];
290  const size_t INT_LENGTH = 12;
291
292  if (  swicms->_prep != NULL )	/* lda exists give them transformed lda. */
293  {
294    for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
295      temp [dim_count] = swicms->lda_cmn [dim_count];
296    inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
297  }
298  else	/* lda does not exist give them raw cmn values */
299  {
300    for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
301      temp [dim_count] = swicms->cmn [dim_count];
302  }
303
304  for ( dim_count = 0, i = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
305  {
306    i += sprintf( cmn_params + i, dim_count==0 ? "%d" : ",%d", temp [dim_count] );
307    if (i + INT_LENGTH >= *len) {
308        *len = MAX_CHAN_DIM * (INT_LENGTH + 2) * sizeof(LCHAR);
309        return ESR_BUFFER_OVERFLOW;
310    }
311  }
312
313  return ESR_SUCCESS;
314}
315
316
317ESR_ReturnCode swicms_set_cmn ( swicms_norm_info* swicms, const char *cmn_params )
318{
319  ESR_ReturnCode    set_status;
320  int               length_of_params;
321  int               dim_count;
322  int               got_word;
323  int               current_position;
324  char              *copy_of_params;
325  char              *parsed_strings [MAX_CHAN_DIM];
326  int               temp_cmn [MAX_CHAN_DIM];
327
328  length_of_params = strlen ( cmn_params ) + 1;
329  copy_of_params = (char*)MALLOC ( length_of_params, NULL );
330
331  if ( copy_of_params != NULL )
332  {
333    set_status = ESR_SUCCESS;
334    memcpy ( copy_of_params, cmn_params, length_of_params );
335    dim_count = 0;
336    current_position = 0;
337    got_word = 0;
338    parsed_strings [dim_count] = copy_of_params + current_position;
339
340    while ( ( dim_count < MAX_CHAN_DIM ) && ( set_status == ESR_SUCCESS ) )
341    {
342      switch ( *( copy_of_params + current_position ) )
343      {
344        case '\0':
345          if ( got_word == 1 )
346          {
347            if ( dim_count == ( MAX_CHAN_DIM - 1 ) )
348              dim_count++;
349            else
350            {
351              PLogError ( "Channel Normalization : Missing Params Must Contain %d Params\n", MAX_CHAN_DIM );
352              set_status = ESR_INVALID_ARGUMENT;
353            }
354          }
355          else
356          {
357            PLogError ( "Channel Normalization : Missing Params Mus Contain %d Params\n", MAX_CHAN_DIM );
358            set_status = ESR_INVALID_ARGUMENT;
359          }
360          break;
361
362        case ',':
363          if ( got_word == 1 )
364          {
365            if ( dim_count < ( MAX_CHAN_DIM - 1 ) )
366            {
367              dim_count++;
368              *( copy_of_params + current_position) = '\0';
369              current_position++;
370
371              if ( current_position == length_of_params )
372              {
373                PLogError ( "Channel Normalization : Delimiter At End Of Param String\n" );
374                set_status = ESR_INVALID_ARGUMENT;
375              }
376              parsed_strings [dim_count] = copy_of_params + current_position;
377              got_word = 0;
378            }
379            else
380            {
381              PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
382              set_status = ESR_INVALID_ARGUMENT;
383            }
384          }
385          else
386          {
387            PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
388            set_status = ESR_INVALID_ARGUMENT;
389          }
390          break;
391
392        case '0':
393        case '1':
394        case '2':
395        case '3':
396        case '4':
397        case '5':
398        case '6':
399        case '7':
400        case '8':
401        case '9':
402          got_word = 1;
403          current_position++;
404
405          if ( current_position == length_of_params )
406          {
407            PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
408            set_status = ESR_INVALID_ARGUMENT;
409          }
410          break;
411
412        default:
413          PLogError ( "Channel Normalization : Invalid Param : %c : Params Must Contain Only Digits\n" );
414          set_status = ESR_INVALID_ARGUMENT;
415          break;
416      }
417    }
418    if ( set_status == ESR_SUCCESS )
419    {
420      dim_count = 0;
421
422      while ( ( dim_count < MAX_CHAN_DIM ) && (  set_status == ESR_SUCCESS ) )
423      {
424        temp_cmn [dim_count] = atoi ( parsed_strings [dim_count] );
425
426        if ( ( temp_cmn [dim_count] < 0 ) || ( temp_cmn [dim_count] > 255 ) )
427        {
428          set_status = ESR_INVALID_ARGUMENT;
429        }
430      }
431      if ( set_status == ESR_SUCCESS )
432      {
433        for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
434          swicms->cmn [dim_count] = temp_cmn [dim_count];
435        if ( swicms->_prep != NULL )	/* Set now if NULL it will automatically be set on first utterance */
436          linear_transform_frame(swicms->_prep, swicms->lda_cmn, 1 /*do_shift*/);
437      }
438    }
439    FREE ( copy_of_params );
440  }
441  else
442  {
443    PLogError ( "Channel Normalization Out Of Memory Error\n" );
444    set_status = ESR_OUT_OF_MEMORY;
445  }
446  swicms->num_frames_in_cmn = 0;
447  return ( set_status );
448}
449
450
451int swicms_cache_frame(swicms_norm_info* swicms, imeldata* frame, int dimen)
452{
453  int i;
454  imeldata *pcache, *pframe;
455
456  ASSERT(dimen == MAX_CHAN_DIM);
457  i = swicms->cached_num_frames / swicms->cache_resolution;
458  if (i < SWICMS_CACHE_SIZE_DEFAULT)
459  {
460    pcache = swicms->cached_sections[ i];
461    if (swicms->cached_num_frames % swicms->cache_resolution == 0)
462    {
463      for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ = 0;
464      pcache -= MAX_CHAN_DIM;
465    }
466    pframe = frame;
467    for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ += *pframe++;
468    swicms->cached_num_frames++;
469  }
470
471  return 0;
472}
473
474int apply_channel_normalization_in_swicms(swicms_norm_info *swicms,
475    imeldata* oframe,
476    imeldata* iframe, int dimen)
477{
478  int ii;
479  ASSERT(dimen == MAX_CHAN_DIM);
480
481  /* IF inutt is activated at all */
482  if(swicms->inutt.forget_factor2 != SWICMS_INUTT_FORGET_FACTOR2_DISABLE) {
483    /* AND IF we have not disabled it (due to x-utt more reliable) */
484    if(swicms->inutt.num_frames_in_accum < swicms->inutt.disable_after) {
485      /* AND IF we have skipped past the silence frames */
486      if( swicms->inutt.num_frames_since_bou >= swicms->inutt.num_bou_frames_to_skip){
487	swicms->inutt.num_frames_in_accum++;
488	for(ii=0;ii<dimen;ii++) swicms->inutt.accum[ii] += iframe[ii];
489	/* AND IF we've already seen at least 10 frames (presumably) of speech */
490	if(swicms->inutt.num_frames_in_accum>swicms->inutt.enable_after) {
491	  /* THEN we update the adjustment in-line with the current utterance! */
492	  for(ii=0;ii<dimen;ii++) {
493	    imeldata denom = ( swicms->inutt.forget_factor2
494			       + swicms->inutt.num_frames_in_accum );
495	    /* tmp: weighted average of the old lda_cmn and the new accum */
496	    imeldata tmp=(swicms->lda_cmn[ii]*swicms->inutt.forget_factor2
497			  + swicms->inutt.accum[ii] + denom/2) / denom;
498	    swicms->adjust[ii] = swicms->lda_tmn[ii] - tmp;
499	  }
500	  //printf_vector("swicms->adjust2 "," %d",swicms->adjust, dimen);
501	}
502      }
503    }
504    swicms->inutt.num_frames_since_bou++;
505  }
506
507  for (ii = 0; ii < dimen; ii++)
508    oframe[ii] = MAKEBYTE(iframe[ii] + swicms->adjust[ii]);
509  return 0;
510}
511
512int swicms_update(swicms_norm_info* swicms, int speech_start, int speech_end)
513{
514  int i, j;
515  asr_int32_t speech_avg[MAX_CHAN_DIM], backgr_avg[MAX_CHAN_DIM], avg[MAX_CHAN_DIM];
516  int ff;
517  int nn, speech_nn, backgr_nn;
518  int num_frames = swicms->cached_num_frames;
519  int cache_start, cache_end, backgr_cache_end;
520  int sbindex = swicms->sbindex;
521
522  /* init for utterance */
523  swicms->inutt.num_frames_since_bou = 0;
524
525  swicms->cached_num_frames = 0;
526  cache_start = speech_start;
527  cache_start -= (cache_start % swicms->cache_resolution);
528  cache_start /= swicms->cache_resolution;
529
530  if (speech_end == MAXframeID)
531  {
532    cache_end = SWICMS_CACHE_SIZE_DEFAULT;
533  }
534  else
535  {
536    if (speech_end < num_frames)
537      cache_end = speech_end;
538    else
539      cache_end = num_frames;
540    cache_end -= (cache_end % swicms->cache_resolution);
541    cache_end /= swicms->cache_resolution;
542  }
543
544  if (num_frames == 0 || speech_end == 0 || speech_start == speech_end || speech_end == MAXframeID)
545  {
546    if (speech_end != 0 || speech_start != 0)
547      PLogError("Warning: speech_bounds (%d,%d) swicms->cached_num_frames (%d)\n",
548                speech_start, speech_end, num_frames);
549	if (SWICMS_DEBUG) {
550      //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
551    }
552    return 1;
553  }
554
555  backgr_cache_end = (num_frames - num_frames % swicms->cache_resolution) / swicms->cache_resolution;
556
557  speech_nn = (cache_end - cache_start) * swicms->cache_resolution;
558  backgr_nn = backgr_cache_end * swicms->cache_resolution - speech_nn;
559
560  for (i = 0; i < MAX_CHAN_DIM; i++)
561  {
562    speech_avg[i] = 0;
563    backgr_avg[i] = 0;
564    for (j = cache_start; j < cache_end; j++)
565      speech_avg[i] += swicms->cached_sections[j][i];
566    for (j = 0; j < cache_start; j++)
567      backgr_avg[i] += swicms->cached_sections[j][i];
568    for (j = cache_end; j < backgr_cache_end; j++)
569      backgr_avg[i] += swicms->cached_sections[j][i];
570    if (speech_nn == 0 && backgr_nn > 0)
571    {
572      backgr_avg[i] /= backgr_nn;
573      speech_avg[i] = backgr_avg[i];
574      speech_nn = backgr_nn;
575    }
576    else if (speech_nn > 0 && backgr_nn == 0)
577    {
578      speech_avg[i] /= speech_nn;
579      backgr_avg[i] = speech_avg[i];
580      backgr_nn = speech_nn;
581    }
582    else if (speech_nn > 0 && backgr_nn > 0)
583    {
584      speech_avg[i] /= speech_nn;
585      backgr_avg[i] /= backgr_nn;
586    }
587    else
588    {
589      return 0;
590    }
591
592    avg[i] = (sbindex * speech_avg[i] + (100 - sbindex) * backgr_avg[i] + 50) / 100;
593  }
594  nn = (sbindex * speech_nn + (100 - sbindex) * backgr_nn + 50) / 100;
595
596  for (i = 0, ff = 0; i < MAX_CHAN_DIM; i++)
597  {
598    ff += (swicms->lda_tmn[i] - avg[i]);
599  }
600  ff /= MAX_CHAN_DIM; /* sum is now the average offset from TMN */
601  if (ff > 5)
602  {
603    PLogError("Warning: bad utt mean during swicms_update() (moffs=%d)\n", ff);
604    //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
605    return 1;
606  }
607  ff = swicms->forget_factor;
608  if (ff < 9999)
609  {
610    for (i = 0; i < MAX_CHAN_DIM; i++)
611    {
612      swicms->lda_cmn[i] = (swicms->lda_cmn[i] * ff + avg[i] * nn + (ff + nn) / 2)  / (ff + nn);
613      swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
614    }
615  }
616
617  if (SWICMS_DEBUG)
618    {
619      imeldata temp[MAX_CHAN_DIM];
620      PLogMessage("swicms_update() used %d frames (%d-%d)", nn, speech_start, speech_end);
621
622      for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
623      inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
624      /* use this dump, to put back into CREC.Frontend.swicms.cmn */
625      printf_vector("swicms.cmn(r)  ", " %d", temp, MAX_CHAN_DIM);
626
627      //printf_vector("swicms.lda_cmn   ", " %d", &swicms.lda_cmn [0], MAX_CHAN_DIM);
628      //printf_vector("swicms.lda_tmn   ", " %d", &swicms.lda_tmn [0], MAX_CHAN_DIM);
629      //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
630      //printf_vector("avg.speech    ", " %d", avg, MAX_CHAN_DIM);
631    }
632  else
633    {
634#ifndef NDEBUG
635      //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
636#endif
637    }
638  swicms->num_frames_in_cmn += nn;
639  return 0;
640}
641
642int swicms_lda_process(swicms_norm_info* swicms, preprocessed* prep)
643{
644  int i;
645
646  for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_tmn[i] = swicms->tmn[i];
647  for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_cmn[i] = swicms->cmn[i];
648  linear_transform_frame(prep, swicms->lda_tmn, 1 /*do_shift*/);
649  linear_transform_frame(prep, swicms->lda_cmn, 1 /*do_shift*/);
650
651  for (i = 0; i < MAX_CHAN_DIM; i++)
652  {
653    swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
654  }
655
656#ifndef NDEBUG
657  //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
658#endif
659  swicms->is_valid = 1;
660  swicms->_prep = prep;
661
662  if(SWICMS_DEBUG) {
663    imeldata temp[MAX_CHAN_DIM];
664    printf_vector("swicms->cmn     ", " %d", swicms->cmn,     MAX_CHAN_DIM);
665    printf_vector("swicms->lda_cmn ", " %d", swicms->lda_cmn, MAX_CHAN_DIM);
666    //printf_vector("swicms->tmn     ", " %d", swicms->tmn,     MAX_CHAN_DIM);
667    //printf_vector("swicms->lda_tmn ", " %d", swicms->lda_tmn, MAX_CHAN_DIM);
668    //printf_vector("swicms->adjust  ", " %d", swicms->adjust,  MAX_CHAN_DIM);
669
670    //for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_tmn[i];
671    //inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
672    //printf_vector("swicms->tmn(r)  ", " %d", temp, MAX_CHAN_DIM);
673
674    for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
675    inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
676    printf_vector("swicms->cmn(r)  ", " %d", temp, MAX_CHAN_DIM);
677  }
678  return 0;
679}
680
681
682
683