1/*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/**
17 * @file picoacph.h
18 *
19 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
20 * All rights reserved.
21 *
22 * History:
23 * - 2009-04-20 -- initial version
24 *
25 */
26
27
28/**
29 * @addtogroup picoacph
30 *
31itemtype, iteminfo1, iteminfo2, content -> TYPE(INFO1,INFO2)content
32in the following
33
34items input
35===========
36
37processed by sa (POS disambiguation):
38- WORDGRAPH(POSes,NA)graph
39- WORDINDEX(POSes,NA)POS|1ind1...POSN|indN
40- CMD(PICODATA_ITEMINFO1_CMD_FLUSH,PICODATA_ITEMINFO2_NA)
41
42processed by sa (Phrasing, Accentuation):
43- PUNC(PUNCtype,PUNCsubtype)
44
45unprocessed:
46- all other item types are forwarded through the PU without modification:
47  CMD
48
49
50minimal input size (before processing starts)
51==================
52
53processing (POS disambiguation, g2p, lexind, phrasing, accentuation)
54is possible with
55
56- one punctuation-phrase, consisting of a sequence (see below for
57  limits) of items terminated by a PUNC item.
58
59(possible but not implemented: as long as the internal buffer is
60empty, non-processed item types can be processed immediately)
61
62Ensuring terminal PUNC item:
63- when reading items from the external buffer a CMD(...FLUSH...) is
64  converted to a PUNC(...FLUSH...) item
65- If needed, a PUNC(PHRASE) is artificially added to ensure a phrase
66  fits in the PUs memory and processing can start.
67
68
69items processed and output
70==========================
71
72precondition:
73CMD(...FLUSH...) already converted to PUNC(...FLUSH...) and trailing
74PUNC item enforced if necessary.
75
76----
77-# PROCESS_POSD: processing input WORDGRAPH or WORDINDEX items, after
78POS disambiguation (POSes -> POS), results in a sequence of:
79  -
80  - WORDGRAPH(POS,NA)graph
81  - WORDINDEX(POS,NA)POS|ind
82  -
83  .
84-# PROCESS_WPHO: then, after lex-index lookup and G2P in a
85sequence of:
86  - WORDPHON(POS,NA)phon
87
88(phon containing primary and secondary word-level stress)
89
90----
913. PROCESS_PHR: then, after processing these WORDPHON items,
92together with the trailing PUNC item results in:
93
94-> BOUND(BOUNDstrength,BOUNDtype)
95
96being added in the sequence of WORDPHON (respectively inserted instead
97of the PUNC). All PUNC, incl PUNC(...FLUSH...) now gone.
98
99----
1004. PROCESS_ACC: then, after processing the WORDPHON and BOUND items
101results in:
102
103-> WORDPHON(POS,ACC)phon
104
105A postprocessing step of accentuation is hard-coded in the
106accentuation module: In case the whole word does not have any stress
107at all (primary or secondary or both) then do the following mapping:
108
109  ACC0 nostress -> ACC0
110  ACC1 nostress -> ACC3
111  ACC2 nostress -> ACC3
112  ACC3 nostress -> ACC3
113
114----
115- POS
116  a single, unambiguous POS
117
118cf. picodata.h for
119- ACC    (sentence-level accent (aka prominence)) %d
120  - PICODATA_ACC0
121  - PICODATA_ACC1
122  - PICODATA_ACC2  (<- maybe mapped to ACC1, ie. no ACC2 in output)
123  - PICODATA_ACC3
124
125- BOUNDstrength %d
126  - PICODATA_ITEMINFO1_BOUND_SBEG (at sentence start)
127  - PICODATA_ITEMINFO1_BOUND_SEND (at sentence end)
128  - PICODATA_ITEMINFO1_BOUND_TERM (replaces a flush)
129  - PICODATA_ITEMINFO1_BOUND_PHR1 (primary boundary)
130  - PICODATA_ITEMINFO1_BOUND_PHR2 (short break)
131  - PICODATA_ITEMINFO1_BOUND_PHR3 (secondary phrase boundary, no break)
132  - PICODATA_ITEMINFO1_BOUND_PHR0 (no break, not produced by sa, not existing
133          BOUND in item sequence equals PHR0 bound strength)
134
135- BOUNDtype    (created in sa base on punctuation, indicates type of phrase
136                following the boundary) %d
137  - PICODATA_ITEMINFO2_BOUNDTYPE_P
138  - PICODATA_ITEMINFO2_BOUNDTYPE_T
139  - PICODATA_ITEMINFO2_BOUNDTYPE_Q
140  - PICODATA_ITEMINFO2_BOUNDTYPE_E
141
142
143output sequence (without CMDs):
144
145<output> = { BOUND(BOUND_SBEG,PHRASEtype) <sentence> BOUND(BOUND_SEND,..)} BOUND(BOUND_TERM,..)
146
147<sentence> =   <phrase> { BOUND(BOUND_PHR1|2|3,BOUNDtype) <phrase> }
148
149<phrase> = WORDPHON(POS,ACC)phon { WORDPHON(POS,ACC)phon }
150
151Done in later PU: mapping ACC & word-level stress to syllable accent value
152  - ACC0 prim -> 0
153  - ACC1 prim -> 1
154  - ACC2 prim -> 2
155  - ACC3 prim -> 3
156  - ACC0 sec  -> 0
157  - ACC1 sec  -> 4
158  - ACC2 sec  -> 4
159  - ACC3 sec  -> 4
160
161other limitations
162=================
163
164- item size: header plus len=256 (valid for Pico in general)
165- see defines below for max nr of items. Item heads plus ref. to contents
166  buffer are stored in array with fixed size elements. Two restrictions:
167  - MAXNR_HEADX (max nr elements==items in headx array)
168  - CONTENTSSIZE (max size of all contents together
169 */
170
171
172#ifndef PICOACPH_H_
173#define PICOACPH_H_
174
175#include "picoos.h"
176#include "picodata.h"
177#include "picorsrc.h"
178
179#ifdef __cplusplus
180extern "C" {
181#endif
182#if 0
183}
184#endif
185
186/* nr item restriction: maximum number of extended item heads in headx */
187#define PICOACPH_MAXNR_HEADX    60
188
189/* nr item restriction: maximum size of all item contents together in cont */
190#define PICOACPH_MAXSIZE_CBUF 7680
191
192
193
194picodata_ProcessingUnit picoacph_newAccPhrUnit(
195        picoos_MemoryManager mm,
196        picoos_Common common,
197        picodata_CharBuffer cbIn,
198        picodata_CharBuffer cbOut,
199        picorsrc_Voice voice);
200
201#ifdef __cplusplus
202}
203#endif
204
205#endif /*PICOACPH_H_*/
206