1/*---------------------------------------------------------------------------*
2 *  utteranc.h  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20
21
22#ifndef _h_utteranc_
23#define _h_utteranc_
24
25#ifdef SET_RCSID
26static const char utteranc_h[] = "$Id: utteranc.h,v 1.3.6.7 2007/08/31 17:44:53 dahan Exp $";
27#endif
28
29
30
31#include "all_defs.h"
32#include "hmm_type.h"
33#include "fpi_tgt.h"
34#include "voicing.h"
35#include "specnorm.h"
36#include "channorm.h"
37#include "swicms.h"
38#ifndef _RTT
39#include "duk_io.h"
40#endif
41
42#define DEFAULT_BUFFER_SIZE 100 /* in frames */
43#define KEEP_FRAMES   40 /* in frames, past frames kept */
44
45/*  Functions supported are
46**  new, delete (by source)
47**  open file/device, close file/device
48**  attach and detach sink
49**  read/store samples - including the header
50*/
51
52/**
53 * @todo document
54 */
55typedef struct
56{                /* label structure */
57  char *label;
58  long begin;
59  long end;
60  char *extra;
61  unsigned char flag;
62}
63annotate;
64
65
66/**
67 * @todo document
68 */
69typedef struct
70{
71  int   utt_type;
72  int   dim;
73  fepFramePkt  *frame;
74  int   num_chan;
75  int   do_channorm;
76  spect_dist_info **spchchan; /*  Mirrored from the Wave object */
77  norm_info   *channorm; /*  Mirrored from the Wave object */
78  swicms_norm_info     *swicms;    /* copy of wave obj pointer */
79  spect_dist_info *backchan[MAX_CHAN_DIM];
80  featdata  *last_push;
81  int   voice_duration;
82  int   quiet_duration;
83  int   unsure_duration;
84  int   start_windback;
85}
86utt_generic_info;
87
88#ifndef _RTT
89/**
90 * @todo document
91 */
92typedef struct
93{
94  char  typ;  /* s (16 bit), c (8 bit), u (newton .utb) */
95  int   endian;  /* 0 is little 1 is big */
96  int   do_skip; /* skip every other frame */
97  unsigned long len;  /* length of file/utterance */
98  PFile* file;  /* pointer to file */
99  char  name[MAX_LABEL]; /* file name */
100  /*    int   op;  read or write */
101  int   num_utts; /* no. of utterances in utb file */
102  annotate  *utb_table; /* utb file header information */
103}
104utt_file_info;
105
106/**
107 * @todo document
108 */
109typedef struct
110{
111  int   utt_type;
112  int   dim;
113  fepFramePkt  *frame;
114  int   num_chan;
115  int   do_channorm;
116  spect_dist_info **spchchan; /*  Mirrored from the Wave object */
117  norm_info   *channorm; /*  Mirrored from the Wave object */
118  swicms_norm_info    *swicms;          /* copy of wave obj pointer */
119  spect_dist_info *backchan[MAX_CHAN_DIM];
120  featdata  *last_push;
121  int   voice_duration;
122  int   quiet_duration;
123  int   unsure_duration;
124  int   start_windback;
125  /*    voicing_info voice; */
126  utt_file_info file;
127}
128file_utterance_info;
129#endif
130
131/**
132 * @todo document
133 */
134typedef struct
135{
136  int   utt_type;
137  int   dim;
138  fepFramePkt  *frame;
139  int   num_chan;
140  int   do_channorm;
141  spect_dist_info **spchchan; /*  Mirrored from the Wave object */
142  norm_info   *channorm; /*  Mirrored from the Wave object */
143  swicms_norm_info    *swicms;        /* copy of wave obj pointer */
144  spect_dist_info *backchan[MAX_CHAN_DIM];
145  featdata  *last_push;
146  int   voice_duration;
147  int   quiet_duration;
148  int   unsure_duration;
149  int   start_windback;
150}
151live_utterance_info;
152
153/**
154 * @todo document
155 */
156typedef union
157{
158  int   utt_type; /* live or from file */
159  utt_generic_info    gen_utt; /* generic one */
160#ifndef _RTT
161  file_utterance_info file_utt;
162#endif
163  live_utterance_info live_utt;
164} utterance_info;
165
166
167/*
168**  Size of the utb file headers and details
169*/
170
171#ifndef _RTT
172#define UTT_VERSION 2
173#define UTT_HEADER_SIZE 16        /*Size on disk*/
174#define UTB_HEADER_SIZE 32        /*Size on disk*/
175#define UTB_HEADER_USED 16        /*Size on disk*/   /* SAL */
176
177/**
178 * UTB file header.
179 */
180typedef struct _UttHeader
181{
182	/**
183	 * The size of the header in bytes.
184	 */
185  unsigned short headerSize;
186	/**
187	 * The version of the file format.
188	 */
189  unsigned short version;
190	/**
191	 * The size of the payload in bytes.
192	 */
193  unsigned long  nBytes;
194	/**
195	 * The number of parameters per frame.
196	 */
197  unsigned short nParametersPerFrame;
198	/**
199	 * 0=unknown, 1=none, 2=amp-based, 3=harmonicity-based, 4=mrec style
200	 */
201  unsigned short channelNormalization;
202  /**
203	 * 0=unknown, 1=no, 2=yes
204	 */
205  unsigned short speakerNormalization;
206  /**
207	 * 0=unknown, 1=no, 2=yes
208	 */
209  unsigned short imeldaization;
210	/**
211	 * Before imelda truncation.
212	 */
213  unsigned short nOriginalParameters;
214	/**
215	 * The number of samples per frame.
216	 */
217  unsigned short samplesPerFrame;
218	/**
219	 * The audio sample rate.
220	 */
221  unsigned long  sampleRate;
222	/**
223	 * not used in version 5.
224	 */
225  unsigned long  checksum;
226}
227UttHeader;
228
229int    update_utb_header(file_utterance_info *utt, int frames, int samplerate,
230                         int framerate);
231void    init_utt_v5_header(UttHeader *uhead, int dim, int samplerate, int framerate);
232int init_data_file(char *filename, file_utterance_info *utt, int dimen,
233                   char typ, int endian, int do_skip);
234int new_data_file(char *filename, file_utterance_info *utt, int dimen,
235                  char typ, int endian);
236int set_data_frame(file_utterance_info *utt, long begin);
237int buffer_data_frames(file_utterance_info *utt, long f_begin, long f_end);
238void more_data_frames(file_utterance_info *utt);
239int save_data_frames(file_utterance_info *utt);
240void close_data_stream(file_utterance_info *utt);
241int init_utb_file(file_utterance_info *utt, annotate **table);
242int position_utb_file(file_utterance_info *utt, long position, annotate *table);
243int load_utb_data(file_utterance_info *utt, int num_frames, int do_skip);
244int load_short_data(file_utterance_info *utt, int num_frames, int do_skip);
245int save_utb_data(file_utterance_info *utt, int num_frames);
246int save_short_data(file_utterance_info *utt, int num_frames);
247int read_utt_head(UttHeader *head, PFile* datafile);
248int write_utt_head(UttHeader *head, PFile* datafile);
249int check_for_utb(char* filename);
250
251/*  TCP reading routines
252*/
253int     read_tcp(char *filename, annotate **tag_base);
254int     read_lst(char *filename, annotate *tag_base, int ntags);
255int     read_utb_table(char *filename, annotate **tag_base);
256void    save_tcp(char *tcpnam, annotate *tag, int ntags);
257void compose_tcp_name_of_utt(char* uttname , char* tcpname);
258
259#endif
260
261void init_utterance(utterance_info *utt, int utt_type, int dimen,
262                    int buffer_size, int keep_frames, int num_chan, int do_voicing);
263void set_voicing_durations(utterance_info *utt, int voice_duration,
264                           int quiet_duration, int unsure_duration,
265                           int start_windback);
266void free_utterance(utterance_info *utt);
267int utterance_started(utterance_info *utt);
268int utterance_ended(utterance_info *utt);
269int load_utterance_frame(utterance_info *utt, unsigned char* pUttFrame, int voicing);
270int copy_utterance_frame(utterance_info *oututt, utterance_info *inutt);
271
272#endif /* _h_utteranc_ */
273