1/*---------------------------------------------------------------------------*
2 *  swicms.h                                                                 *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#ifndef __SWICMS_H__
21#define __SWICMS_H__
22
23#include"all_defs.h"
24#include"sizes.h"
25#include"fronttyp.h"
26#include"pre_desc.h"
27
28#define DEBUG_SWICMS        0
29#define MAX_CACHED_FRAMES 800
30#define SWICMS_CACHE_RESOLUTION_DEFAULT   8
31#define SWICMS_CACHE_SIZE_DEFAULT         100 /* equals #frames/resolution */
32
33/**
34 * This is used for casting in debugger, just type (imelvec*)tmn.
35 */
36typedef struct
37{
38  imeldata vec[MAX_CHAN_DIM];
39}
40imelvec;
41
42/**
43 * Does channel normalization without using fine recognition segmenation.  It remembers the
44 * frames of speech and uses that as a channel mean for the next utterance.  A forget_factor
45 * is used to weigh the new speech mean estimate with an older one.
46 */
47typedef struct
48{
49  imeldata tmn [MAX_CHAN_DIM];                 /* target mean */
50  imeldata cmn [MAX_CHAN_DIM];                 /* channel mean */
51
52  imeldata lda_tmn [MAX_CHAN_DIM];                 /* target mean */
53  imeldata lda_cmn [MAX_CHAN_DIM];                 /* channel mean */
54
55  imeldata adjust[MAX_CHAN_DIM]; /* target less channel */
56
57  int is_valid;
58  int forget_factor;           /* in frames, mass of cmn average */
59  int sbindex;                 /* speech to background index
60        100 -> use only speech to calculate CMN
61        000 -> use only background to calculate CMN
62        050 -> use half/half ..
63        all numbers in between are acceptable */
64
65  int num_frames_in_cmn; /* num frames used to estimate cmn (or lda_cmn) */
66
67  /* for in-utterance channel normalization */
68  struct {
69    int forget_factor2;     /* cmn is given this weight to start off */
70    int disable_after;      /* we disable in-utt cms after this many fr*/
71    int enable_after;       /* we enable in-utt cms after this many fr*/
72    int num_bou_frames_to_skip;   /* don't start accum 'til this many frames */
73    int num_frames_since_bou;     /* counter for above, bou=begin-of-utt     */
74    int num_frames_in_accum;      /* number of frames in accum */
75    imeldata accum[MAX_CHAN_DIM]; /* accumulates frames of the current utt */
76  } inutt;
77
78  int cached_num_frames;       /* we cache frames, until recognition is done
79        and can calculate speech mean from these */
80  int cache_resolution;        /* we'll avg this many frames per section */
81  imeldata cached_sections[SWICMS_CACHE_SIZE_DEFAULT][MAX_CHAN_DIM];
82  /*const*/ preprocessed* _prep;
83}
84swicms_norm_info;
85
86int swicms_init(swicms_norm_info* swicms);
87int swicms_cache_frame(swicms_norm_info* swicms, imeldata* frame, int dimen);
88int apply_channel_normalization_in_swicms(swicms_norm_info *swicms,
89    imeldata* oframe, imeldata* iframe,
90    int dimen);
91int swicms_lda_process(swicms_norm_info* swicms, preprocessed* prep);
92
93int swicms_update(swicms_norm_info* swicms, int speech_start_frame, int speech_end_frame);
94
95ESR_ReturnCode swicms_set_cmn(swicms_norm_info *swicms, const LCHAR *new_cmn_params );
96ESR_ReturnCode swicms_get_cmn(swicms_norm_info *swicms, LCHAR *cmn_params, size_t* len );
97
98#if DEBUG_SWICMS
99int swicms_compare(swicms_norm_info* swicms, imeldata* imelda_adjust);
100int swicms_dump_stats(swicms_norm_info* swicms);
101#else
102#define swicms_compare(swicms,ia)
103#define swicms_dump_stats(swicms)
104#endif
105
106#endif
107
108