1/*---------------------------------------------------------------------------*
2 *  swicms.c                                                                 *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#include <string.h>
21#include"swicms.h"
22#include"srec_sizes.h"
23#include"prelib.h"
24
25#include "passert.h"
26#include "ESR_Session.h"
27#include "ESR_SessionType.h"
28#include "IntArrayList.h"
29#include "portable.h"
30
31#define printf_vector(HEAD, FMT, PTR, NN) { int i; LCHAR buffer[256]; sprintf(buffer, HEAD); sprintf(buffer + LSTRLEN(buffer), " %p", (void *)PTR); for (i=0; i<(NN); ++i) sprintf(buffer + LSTRLEN(buffer), FMT, PTR[i]); PLogMessage(buffer); }
32
33/* Cross-utterance CMN calculation:
34   We try to normalize the speech frames before they get to the recognizer.
35   The speech frames are LDA-processed mfcc-with-dynamic feature vectors.
36   We collect these speech frames during recognition. At the end of
37   recognition we exclude the silence frames from the collected data, and
38   generate a new channel average based on the previous average and the new
39   data, using an exponential decay formula.
40
41   In-utterance CMN calculation:
42   A new short-term average mechanism was introduced, with faster update,
43   to improve recognition on the very first recognition after init or reset.
44   We wait for a minimum number of new data frames to apply this. We also
45   disable the fast updater after some frames, because we assume the
46   cross-utterance estimator to be more reliable, particularly in its
47   ability to exclude silence frames from the calculation.
48*/
49
50/* default settings for cross-utterance cms */
51#define SWICMS_FORGET_FACTOR_DEFAULT        400 /* effective frms of history */
52#define SWICMS_SBINDEX_DEFAULT              100 /* use speech frames only */
53/* #define SWICMS_CACHE_RESOLUTION_DEFAULT  see swicms.h */
54/* #define SWICMS_CACHE_SIZE_DEFAULT        see swicms.h */
55
56/* default settings for in-utterance cms */
57#define SWICMS_INUTT_FORGET_FACTOR2_DISABLE 65535 /* any large number */
58#define SWICMS_INUTT_FORGET_FACTOR2_DEFAULT SWICMS_INUTT_FORGET_FACTOR2_DISABLE
59/* disable this when cross-utt become more reliable */
60#define SWICMS_INUTT_DISABLE_AFTER_FRAMES   200
61/* wait while the estimate is poor */
62#define SWICMS_INUTT_ENABLE_AFTER_FRAMES    10
63
64/**
65 * Logging Stuff
66 */
67#define LOG_LEVEL 2
68#define MODULE_NAME L("swicms.c")
69//static const char* MTAG = MODULE_NAME;
70
71static const char *rcsid = 0 ? (const char *) &rcsid :
72                           "$Id: swicms.c,v 1.21.6.16 2008/06/05 19:00:55 stever Exp $";
73
74static ESR_BOOL SWICMS_DEBUG = ESR_FALSE;
75
76/* these are good values from cmn/tmn files */
77static const imeldata gswicms_cmn1_8 [MAX_CHAN_DIM] =
78  {
79    158, 141,  99, 125, 101, 162, 113, 138, 128, 143, 123, 141,
80    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
81    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
82  };
83
84static const imeldata gswicms_cmn1_11 [MAX_CHAN_DIM] =
85  {
86    163, 121, 120, 114, 124, 139, 144, 108, 150, 119, 146, 124,
87    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
88    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
89  };
90
91static const imeldata gswicms_tmn1_8 [MAX_CHAN_DIM] =
92  {
93    108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
94    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
95    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
96  };
97
98static const imeldata gswicms_tmn1_11 [MAX_CHAN_DIM] =
99  {
100    108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
102    127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
103  };
104
105static ESR_ReturnCode GetSomeIntsIfAny( const LCHAR* parname, imeldata* parvalue, size_t reqSize)
106{
107  size_t i, size;
108  ESR_ReturnCode rc;
109  ESR_BOOL exists;
110  IntArrayList* intList = 0;
111
112  CHKLOG(rc, ESR_SessionContains(parname, &exists));
113  if (exists) {
114    rc = ESR_SessionGetProperty(parname, (void**)&intList, TYPES_INTARRAYLIST);
115    if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
116      /* no match will revert to default data already in static array */
117      PLogError(L("Error reading %s from session: %s"), parname, ESR_rc2str(rc));
118      return ESR_FATAL_ERROR;
119    }
120    else if (rc == ESR_SUCCESS) {
121      CHKLOG(rc, IntArrayListGetSize(intList, &size));
122      if(size != reqSize) {
123	PLogError(L("Error reading %s from session, expected len %d: %s"), parname, reqSize, ESR_rc2str(rc));
124	return ESR_FATAL_ERROR;
125      }
126      if(reqSize == 1)
127	CHKLOG(rc, IntArrayListGet(intList, 0, parvalue));
128      else {
129	for (i=0; i<size; ++i)
130	  CHKLOG(rc, IntArrayListGet(intList, i, &parvalue[i]));
131      }
132    }
133  }
134  return ESR_SUCCESS;
135 CLEANUP:
136  return rc;
137}
138
139int swicms_init(swicms_norm_info* swicms)
140{
141  ESR_ReturnCode    rc = ESR_SUCCESS;
142  size_t            i;
143  ESR_BOOL          exists, sessionExists;
144  size_t 	    sample_rate;
145
146  /* defaults */
147  swicms->sbindex          = SWICMS_SBINDEX_DEFAULT;
148  swicms->cached_num_frames = 0;
149  swicms->forget_factor    = SWICMS_FORGET_FACTOR_DEFAULT;
150  swicms->cache_resolution = SWICMS_CACHE_RESOLUTION_DEFAULT;
151  swicms->num_frames_in_cmn = 0;
152
153  CHKLOG(rc, ESR_SessionExists(&sessionExists));
154
155  if (sessionExists)
156  {  /* We'll assume this rate is valid or someone else will be complaining.   SteveR */
157    rc = ESR_SessionGetSize_t ( L ( "CREC.Frontend.samplerate" ), &sample_rate );
158
159    if ( rc != ESR_SUCCESS )
160      return ( rc );
161  }
162  else
163    sample_rate = 11025;
164
165  /* init the data structures by copying the static data so that we can have a copy if we need to reset */
166  if ( sample_rate == 8000 )
167  {
168    for ( i = 0; i < MAX_CHAN_DIM; i++ )
169    {
170      swicms->cmn [i] = gswicms_cmn1_8 [i];
171      swicms->tmn [i] = gswicms_tmn1_8 [i];
172// _lda_*mn below are OK, but are recalculated in swicms_lda_process()
173      swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
174      swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
175    }
176  }
177  else
178  {
179    for ( i = 0; i < MAX_CHAN_DIM; i++ )
180    {
181      swicms->cmn [i] = gswicms_cmn1_11 [i];
182      swicms->tmn [i] = gswicms_tmn1_11 [i];
183// _lda_*mn below are OK, but are recalculated in swicms_lda_process()
184      swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
185      swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
186    }
187  }
188  CHKLOG(rc, ESR_SessionExists(&sessionExists));
189
190  if (sessionExists)
191  {
192    const LCHAR* parname = L("CREC.Frontend.swicms.debug");
193    CHKLOG(rc, ESR_SessionContains(parname, &exists));
194    if (exists) {
195      rc = ESR_SessionGetBool(parname, &SWICMS_DEBUG);
196      if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
197        PLOG_DBG_ERROR((L("Error reading %s from session: %s"), parname, ESR_rc2str(rc)));
198        return rc;
199      }
200    }
201
202    rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.forget_factor"),
203			   &swicms->forget_factor, 1);
204    if(rc != ESR_SUCCESS) return rc;
205
206    rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.sbindex"),
207			   &swicms->sbindex, 1);
208    if(rc != ESR_SUCCESS) return rc;
209
210    rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn"),
211			   &swicms->cmn[0], MAX_CHAN_DIM);
212    if(rc != ESR_SUCCESS) return rc;
213
214    if ( sample_rate == 8000 )
215    {
216      rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn8"), &swicms->cmn[0], MAX_CHAN_DIM);
217
218      if(rc != ESR_SUCCESS)
219        return rc;
220    }
221    else
222    {
223      rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn11"), &swicms->cmn[0], MAX_CHAN_DIM);
224
225      if(rc != ESR_SUCCESS)
226        return rc;
227    }
228
229    rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.tmn"),
230			   &swicms->tmn[0], MAX_CHAN_DIM);
231    if(rc != ESR_SUCCESS) return rc;
232  }
233
234  swicms->is_valid = 0;
235  for (i = 0; i < MAX_CHAN_DIM; i++)
236    swicms->adjust[i] = 255;
237
238#ifdef SREC_ENGINE_VERBOSE_LOGGING
239  PLogMessage("swicms->forget_factor    = %d\n", swicms->forget_factor);
240  PLogMessage("swicms->cache_resolution = %d\n", swicms->cache_resolution);
241  PLogMessage("swicms->sbindex          = %d\n", swicms->sbindex);
242#endif
243
244  /* in-utt cms parameters */
245  swicms->inutt.forget_factor2 = SWICMS_INUTT_FORGET_FACTOR2_DEFAULT;
246  swicms->inutt.disable_after  = 200;
247  swicms->inutt.enable_after   = 10;    /* in-utt is less reliable       */
248  swicms->inutt.num_bou_frames_to_skip = 20; /* silence frames! see windback */
249  swicms->inutt.num_frames_since_bou = 0;
250  swicms->inutt.num_frames_in_accum = 0;
251  for(i=0; i<MAX_CHAN_DIM; i++) swicms->inutt.accum[i] = 0;
252
253  if (sessionExists) {
254    rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.forget_factor2"),
255			  &swicms->inutt.forget_factor2, 1);
256    if(rc != ESR_SUCCESS) return rc;
257
258    rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.disable_after"),
259			  &swicms->inutt.disable_after, 1);
260    if(rc != ESR_SUCCESS) return rc;
261
262    rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.enable_after"),
263			  &swicms->inutt.enable_after, 1);
264    if(rc != ESR_SUCCESS) return rc;
265
266    /* we need to estimate the in-utt cmn from speech frames only! so let's
267       make sure to skip some frames before collecting data, */
268    ESR_SessionContains(L("CREC.Frontend.start_windback"), &exists);
269    if (exists) {
270      ESR_BOOL do_skip_even_frames = ESR_TRUE;
271      ESR_SessionGetBool(L("CREC.Frontend.do_skip_even_frames"), &do_skip_even_frames);
272      ESR_SessionGetInt(L("CREC.Frontend.start_windback"), &swicms->inutt.num_bou_frames_to_skip);
273      if( do_skip_even_frames)
274	swicms->inutt.num_bou_frames_to_skip /= 2;
275      swicms->inutt.num_bou_frames_to_skip -= 5; /* ensure spch frames only */
276    }
277  }
278
279  return 0;
280 CLEANUP:
281  return rc;
282}
283
284
285ESR_ReturnCode swicms_get_cmn ( swicms_norm_info* swicms, LCHAR *cmn_params, size_t* len )
286{
287  int dim_count;
288  int i;
289  imeldata temp[MAX_CHAN_DIM];
290  const size_t INT_LENGTH = 12;
291
292  if (  swicms->_prep != NULL )	/* lda exists give them transformed lda. */
293  {
294    for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
295      temp [dim_count] = swicms->lda_cmn [dim_count];
296    inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
297  }
298  else	/* lda does not exist give them raw cmn values */
299  {
300    for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
301      temp [dim_count] = swicms->cmn [dim_count];
302  }
303
304  for ( dim_count = 0, i = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
305  {
306    i += sprintf( cmn_params + i, dim_count==0 ? "%d" : ",%d", temp [dim_count] );
307    if (i + INT_LENGTH >= *len) {
308        *len = MAX_CHAN_DIM * (INT_LENGTH + 2) * sizeof(LCHAR);
309        return ESR_BUFFER_OVERFLOW;
310    }
311  }
312
313  return ESR_SUCCESS;
314}
315
316
317ESR_ReturnCode swicms_set_cmn ( swicms_norm_info* swicms, const char *cmn_params )
318{
319  ESR_ReturnCode    set_status;
320  int               length_of_params;
321  int               dim_count;
322  int               got_word;
323  int               current_position;
324  char              *copy_of_params;
325  char              *parsed_strings [MAX_CHAN_DIM];
326  int               temp_cmn [MAX_CHAN_DIM];
327
328  length_of_params = strlen ( cmn_params ) + 1;
329  copy_of_params = (char*)MALLOC ( length_of_params, NULL );
330
331  if ( copy_of_params != NULL )
332  {
333    set_status = ESR_SUCCESS;
334    memcpy ( copy_of_params, cmn_params, length_of_params );
335    dim_count = 0;
336    current_position = 0;
337    got_word = 0;
338    parsed_strings [dim_count] = copy_of_params + current_position;
339
340    while ( ( dim_count < MAX_CHAN_DIM ) && ( set_status == ESR_SUCCESS ) )
341    {
342      switch ( *( copy_of_params + current_position ) )
343      {
344        case '\0':
345          if ( got_word == 1 )
346          {
347            if ( dim_count == ( MAX_CHAN_DIM - 1 ) )
348              dim_count++;
349            else
350            {
351              PLogError ( "Channel Normalization : Missing Params Must Contain %d Params\n", MAX_CHAN_DIM );
352              set_status = ESR_INVALID_ARGUMENT;
353            }
354          }
355          else
356          {
357            PLogError ( "Channel Normalization : Missing Params Mus Contain %d Params\n", MAX_CHAN_DIM );
358            set_status = ESR_INVALID_ARGUMENT;
359          }
360          break;
361
362        case ',':
363          if ( got_word == 1 )
364          {
365            if ( dim_count < ( MAX_CHAN_DIM - 1 ) )
366            {
367              dim_count++;
368              *( copy_of_params + current_position) = '\0';
369              current_position++;
370
371              if ( current_position == length_of_params )
372              {
373                PLogError ( "Channel Normalization : Delimiter At End Of Param String\n" );
374                set_status = ESR_INVALID_ARGUMENT;
375              }
376              parsed_strings [dim_count] = copy_of_params + current_position;
377              got_word = 0;
378            }
379            else
380            {
381              PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
382              set_status = ESR_INVALID_ARGUMENT;
383            }
384          }
385          else
386          {
387            PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
388            set_status = ESR_INVALID_ARGUMENT;
389          }
390          break;
391
392        case '0':
393        case '1':
394        case '2':
395        case '3':
396        case '4':
397        case '5':
398        case '6':
399        case '7':
400        case '8':
401        case '9':
402          got_word = 1;
403          current_position++;
404
405          if ( current_position == length_of_params )
406          {
407            PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
408            set_status = ESR_INVALID_ARGUMENT;
409          }
410          break;
411
412        default:
413          PLogError ( "Channel Normalization : Invalid Param : %c : Params Must Contain Only Digits\n" );
414          set_status = ESR_INVALID_ARGUMENT;
415          break;
416      }
417    }
418    if ( set_status == ESR_SUCCESS )
419    {
420      dim_count = 0;
421
422      while ( ( dim_count < MAX_CHAN_DIM ) && (  set_status == ESR_SUCCESS ) )
423      {
424        temp_cmn [dim_count] = atoi ( parsed_strings [dim_count] );
425
426        if ( ( temp_cmn [dim_count] < 0 ) || ( temp_cmn [dim_count] > 255 ) )
427        {
428          set_status = ESR_INVALID_ARGUMENT;
429        }
430
431        dim_count++;
432      }
433      if ( set_status == ESR_SUCCESS )
434      {
435        for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
436          swicms->cmn [dim_count] = temp_cmn [dim_count];
437        if ( swicms->_prep != NULL )	/* Set now if NULL it will automatically be set on first utterance */
438          linear_transform_frame(swicms->_prep, swicms->lda_cmn, 1 /*do_shift*/);
439      }
440    }
441    FREE ( copy_of_params );
442  }
443  else
444  {
445    PLogError ( "Channel Normalization Out Of Memory Error\n" );
446    set_status = ESR_OUT_OF_MEMORY;
447  }
448  swicms->num_frames_in_cmn = 0;
449  return ( set_status );
450}
451
452
453int swicms_cache_frame(swicms_norm_info* swicms, imeldata* frame, int dimen)
454{
455  int i;
456  imeldata *pcache, *pframe;
457
458  ASSERT(dimen == MAX_CHAN_DIM);
459  i = swicms->cached_num_frames / swicms->cache_resolution;
460  if (i < SWICMS_CACHE_SIZE_DEFAULT)
461  {
462    pcache = swicms->cached_sections[ i];
463    if (swicms->cached_num_frames % swicms->cache_resolution == 0)
464    {
465      for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ = 0;
466      pcache -= MAX_CHAN_DIM;
467    }
468    pframe = frame;
469    for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ += *pframe++;
470    swicms->cached_num_frames++;
471  }
472
473  return 0;
474}
475
476int apply_channel_normalization_in_swicms(swicms_norm_info *swicms,
477    imeldata* oframe,
478    imeldata* iframe, int dimen)
479{
480  int ii;
481  ASSERT(dimen == MAX_CHAN_DIM);
482
483  /* IF inutt is activated at all */
484  if(swicms->inutt.forget_factor2 != SWICMS_INUTT_FORGET_FACTOR2_DISABLE) {
485    /* AND IF we have not disabled it (due to x-utt more reliable) */
486    if(swicms->inutt.num_frames_in_accum < swicms->inutt.disable_after) {
487      /* AND IF we have skipped past the silence frames */
488      if( swicms->inutt.num_frames_since_bou >= swicms->inutt.num_bou_frames_to_skip){
489	swicms->inutt.num_frames_in_accum++;
490	for(ii=0;ii<dimen;ii++) swicms->inutt.accum[ii] += iframe[ii];
491	/* AND IF we've already seen at least 10 frames (presumably) of speech */
492	if(swicms->inutt.num_frames_in_accum>swicms->inutt.enable_after) {
493	  /* THEN we update the adjustment in-line with the current utterance! */
494	  for(ii=0;ii<dimen;ii++) {
495	    imeldata denom = ( swicms->inutt.forget_factor2
496			       + swicms->inutt.num_frames_in_accum );
497	    /* tmp: weighted average of the old lda_cmn and the new accum */
498	    imeldata tmp=(swicms->lda_cmn[ii]*swicms->inutt.forget_factor2
499			  + swicms->inutt.accum[ii] + denom/2) / denom;
500	    swicms->adjust[ii] = swicms->lda_tmn[ii] - tmp;
501	  }
502	  //printf_vector("swicms->adjust2 "," %d",swicms->adjust, dimen);
503	}
504      }
505    }
506    swicms->inutt.num_frames_since_bou++;
507  }
508
509  for (ii = 0; ii < dimen; ii++)
510    oframe[ii] = MAKEBYTE(iframe[ii] + swicms->adjust[ii]);
511  return 0;
512}
513
514int swicms_update(swicms_norm_info* swicms, int speech_start, int speech_end)
515{
516  int i, j;
517  asr_int32_t speech_avg[MAX_CHAN_DIM], backgr_avg[MAX_CHAN_DIM], avg[MAX_CHAN_DIM];
518  int ff;
519  int nn, speech_nn, backgr_nn;
520  int num_frames = swicms->cached_num_frames;
521  int cache_start, cache_end, backgr_cache_end;
522  int sbindex = swicms->sbindex;
523
524  /* init for utterance */
525  swicms->inutt.num_frames_since_bou = 0;
526
527  swicms->cached_num_frames = 0;
528  cache_start = speech_start;
529  cache_start -= (cache_start % swicms->cache_resolution);
530  cache_start /= swicms->cache_resolution;
531
532  if (speech_end == MAXframeID)
533  {
534    cache_end = SWICMS_CACHE_SIZE_DEFAULT;
535  }
536  else
537  {
538    if (speech_end < num_frames)
539      cache_end = speech_end;
540    else
541      cache_end = num_frames;
542    cache_end -= (cache_end % swicms->cache_resolution);
543    cache_end /= swicms->cache_resolution;
544  }
545
546  if (num_frames == 0 || speech_end == 0 || speech_start == speech_end || speech_end == MAXframeID)
547  {
548    if (speech_end != 0 || speech_start != 0)
549      PLogError("Warning: speech_bounds (%d,%d) swicms->cached_num_frames (%d)\n",
550                speech_start, speech_end, num_frames);
551	if (SWICMS_DEBUG) {
552      //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
553    }
554    return 1;
555  }
556
557  backgr_cache_end = (num_frames - num_frames % swicms->cache_resolution) / swicms->cache_resolution;
558
559  speech_nn = (cache_end - cache_start) * swicms->cache_resolution;
560  backgr_nn = backgr_cache_end * swicms->cache_resolution - speech_nn;
561
562  for (i = 0; i < MAX_CHAN_DIM; i++)
563  {
564    speech_avg[i] = 0;
565    backgr_avg[i] = 0;
566    for (j = cache_start; j < cache_end; j++)
567      speech_avg[i] += swicms->cached_sections[j][i];
568    for (j = 0; j < cache_start; j++)
569      backgr_avg[i] += swicms->cached_sections[j][i];
570    for (j = cache_end; j < backgr_cache_end; j++)
571      backgr_avg[i] += swicms->cached_sections[j][i];
572    if (speech_nn == 0 && backgr_nn > 0)
573    {
574      backgr_avg[i] /= backgr_nn;
575      speech_avg[i] = backgr_avg[i];
576      speech_nn = backgr_nn;
577    }
578    else if (speech_nn > 0 && backgr_nn == 0)
579    {
580      speech_avg[i] /= speech_nn;
581      backgr_avg[i] = speech_avg[i];
582      backgr_nn = speech_nn;
583    }
584    else if (speech_nn > 0 && backgr_nn > 0)
585    {
586      speech_avg[i] /= speech_nn;
587      backgr_avg[i] /= backgr_nn;
588    }
589    else
590    {
591      return 0;
592    }
593
594    avg[i] = (sbindex * speech_avg[i] + (100 - sbindex) * backgr_avg[i] + 50) / 100;
595  }
596  nn = (sbindex * speech_nn + (100 - sbindex) * backgr_nn + 50) / 100;
597
598  for (i = 0, ff = 0; i < MAX_CHAN_DIM; i++)
599  {
600    ff += (swicms->lda_tmn[i] - avg[i]);
601  }
602  ff /= MAX_CHAN_DIM; /* sum is now the average offset from TMN */
603  if (ff > 5)
604  {
605    PLogError("Warning: bad utt mean during swicms_update() (moffs=%d)\n", ff);
606    //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
607    return 1;
608  }
609  ff = swicms->forget_factor;
610  if (ff < 9999)
611  {
612    for (i = 0; i < MAX_CHAN_DIM; i++)
613    {
614      swicms->lda_cmn[i] = (swicms->lda_cmn[i] * ff + avg[i] * nn + (ff + nn) / 2)  / (ff + nn);
615      swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
616    }
617  }
618
619  if (SWICMS_DEBUG)
620    {
621      imeldata temp[MAX_CHAN_DIM];
622      PLogMessage("swicms_update() used %d frames (%d-%d)", nn, speech_start, speech_end);
623
624      for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
625      inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
626      /* use this dump, to put back into CREC.Frontend.swicms.cmn */
627      printf_vector("swicms.cmn(r)  ", " %d", temp, MAX_CHAN_DIM);
628
629      //printf_vector("swicms.lda_cmn   ", " %d", &swicms.lda_cmn [0], MAX_CHAN_DIM);
630      //printf_vector("swicms.lda_tmn   ", " %d", &swicms.lda_tmn [0], MAX_CHAN_DIM);
631      //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
632      //printf_vector("avg.speech    ", " %d", avg, MAX_CHAN_DIM);
633    }
634  else
635    {
636#ifndef NDEBUG
637      //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
638#endif
639    }
640  swicms->num_frames_in_cmn += nn;
641  return 0;
642}
643
644int swicms_lda_process(swicms_norm_info* swicms, preprocessed* prep)
645{
646  int i;
647
648  for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_tmn[i] = swicms->tmn[i];
649  for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_cmn[i] = swicms->cmn[i];
650  linear_transform_frame(prep, swicms->lda_tmn, 1 /*do_shift*/);
651  linear_transform_frame(prep, swicms->lda_cmn, 1 /*do_shift*/);
652
653  for (i = 0; i < MAX_CHAN_DIM; i++)
654  {
655    swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
656  }
657
658#ifndef NDEBUG
659  //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
660#endif
661  swicms->is_valid = 1;
662  swicms->_prep = prep;
663
664  if(SWICMS_DEBUG) {
665    imeldata temp[MAX_CHAN_DIM];
666    printf_vector("swicms->cmn     ", " %d", swicms->cmn,     MAX_CHAN_DIM);
667    printf_vector("swicms->lda_cmn ", " %d", swicms->lda_cmn, MAX_CHAN_DIM);
668    //printf_vector("swicms->tmn     ", " %d", swicms->tmn,     MAX_CHAN_DIM);
669    //printf_vector("swicms->lda_tmn ", " %d", swicms->lda_tmn, MAX_CHAN_DIM);
670    //printf_vector("swicms->adjust  ", " %d", swicms->adjust,  MAX_CHAN_DIM);
671
672    //for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_tmn[i];
673    //inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
674    //printf_vector("swicms->tmn(r)  ", " %d", temp, MAX_CHAN_DIM);
675
676    for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
677    inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
678    printf_vector("swicms->cmn(r)  ", " %d", temp, MAX_CHAN_DIM);
679  }
680  return 0;
681}
682
683
684
685