1b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/*
2b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
4b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Licensed under the Apache License, Version 2.0 (the "License");
5b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * you may not use this file except in compliance with the License.
6b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * You may obtain a copy of the License at
7b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
8b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *     http://www.apache.org/licenses/LICENSE-2.0
9b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
10b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Unless required by applicable law or agreed to in writing, software
11b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * distributed under the License is distributed on an "AS IS" BASIS,
12b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * See the License for the specific language governing permissions and
14b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * limitations under the License.
15b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */
16b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/**
17b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * @file picoacph.h
18b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
19b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
20b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * All rights reserved.
21b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
22b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * History:
23b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * - 2009-04-20 -- initial version
24b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
25b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */
26b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
27b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
28b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/**
29b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * @addtogroup picoacph
30b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
31b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenitemtype, iteminfo1, iteminfo2, content -> TYPE(INFO1,INFO2)content
32b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenin the following
33b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
34b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenitems input
35b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen===========
36b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
37b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenprocessed by sa (POS disambiguation):
38b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- WORDGRAPH(POSes,NA)graph
39b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- WORDINDEX(POSes,NA)POS|1ind1...POSN|indN
40b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- CMD(PICODATA_ITEMINFO1_CMD_FLUSH,PICODATA_ITEMINFO2_NA)
41b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
42b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenprocessed by sa (Phrasing, Accentuation):
43b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- PUNC(PUNCtype,PUNCsubtype)
44b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
45b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenunprocessed:
46b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- all other item types are forwarded through the PU without modification:
47b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  CMD
48b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
49b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
50b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenminimal input size (before processing starts)
51b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen==================
52b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
53b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenprocessing (POS disambiguation, g2p, lexind, phrasing, accentuation)
54b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenis possible with
55b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
56b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- one punctuation-phrase, consisting of a sequence (see below for
57b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  limits) of items terminated by a PUNC item.
58b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
59b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen(possible but not implemented: as long as the internal buffer is
60b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenempty, non-processed item types can be processed immediately)
61b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
62b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenEnsuring terminal PUNC item:
63b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- when reading items from the external buffer a CMD(...FLUSH...) is
64b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  converted to a PUNC(...FLUSH...) item
65b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- If needed, a PUNC(PHRASE) is artificially added to ensure a phrase
66b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  fits in the PUs memory and processing can start.
67b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
68b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
69b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenitems processed and output
70b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen==========================
71b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
72b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenprecondition:
73b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenCMD(...FLUSH...) already converted to PUNC(...FLUSH...) and trailing
74b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenPUNC item enforced if necessary.
75b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
76b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen----
77b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-# PROCESS_POSD: processing input WORDGRAPH or WORDINDEX items, after
78b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenPOS disambiguation (POSes -> POS), results in a sequence of:
79b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  -
80b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - WORDGRAPH(POS,NA)graph
81b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - WORDINDEX(POS,NA)POS|ind
82b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  -
83b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  .
84b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-# PROCESS_WPHO: then, after lex-index lookup and G2P in a
85b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chensequence of:
86b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - WORDPHON(POS,NA)phon
87b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
88b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen(phon containing primary and secondary word-level stress)
89b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
90b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen----
91b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen3. PROCESS_PHR: then, after processing these WORDPHON items,
92b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chentogether with the trailing PUNC item results in:
93b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
94b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-> BOUND(BOUNDstrength,BOUNDtype)
95b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
96b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenbeing added in the sequence of WORDPHON (respectively inserted instead
97b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenof the PUNC). All PUNC, incl PUNC(...FLUSH...) now gone.
98b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
99b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen----
100b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen4. PROCESS_ACC: then, after processing the WORDPHON and BOUND items
101b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenresults in:
102b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
103b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-> WORDPHON(POS,ACC)phon
104b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
105b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenA postprocessing step of accentuation is hard-coded in the
106b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenaccentuation module: In case the whole word does not have any stress
107b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenat all (primary or secondary or both) then do the following mapping:
108b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
109b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  ACC0 nostress -> ACC0
110b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  ACC1 nostress -> ACC3
111b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  ACC2 nostress -> ACC3
112b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  ACC3 nostress -> ACC3
113b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
114b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen----
115b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- POS
116b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  a single, unambiguous POS
117b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
118b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chencf. picodata.h for
119b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- ACC    (sentence-level accent (aka prominence)) %d
120b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ACC0
121b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ACC1
122b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ACC2  (<- maybe mapped to ACC1, ie. no ACC2 in output)
123b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ACC3
124b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
125b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- BOUNDstrength %d
126b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ITEMINFO1_BOUND_SBEG (at sentence start)
127b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ITEMINFO1_BOUND_SEND (at sentence end)
128b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ITEMINFO1_BOUND_TERM (replaces a flush)
129b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ITEMINFO1_BOUND_PHR1 (primary boundary)
130b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ITEMINFO1_BOUND_PHR2 (short break)
131b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ITEMINFO1_BOUND_PHR3 (secondary phrase boundary, no break)
132b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ITEMINFO1_BOUND_PHR0 (no break, not produced by sa, not existing
133b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen          BOUND in item sequence equals PHR0 bound strength)
134b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
135b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- BOUNDtype    (created in sa base on punctuation, indicates type of phrase
136b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                following the boundary) %d
137b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ITEMINFO2_BOUNDTYPE_P
138b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ITEMINFO2_BOUNDTYPE_T
139b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ITEMINFO2_BOUNDTYPE_Q
140b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - PICODATA_ITEMINFO2_BOUNDTYPE_E
141b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
142b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
143b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenoutput sequence (without CMDs):
144b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
145b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen<output> = { BOUND(BOUND_SBEG,PHRASEtype) <sentence> BOUND(BOUND_SEND,..)} BOUND(BOUND_TERM,..)
146b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
147b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen<sentence> =   <phrase> { BOUND(BOUND_PHR1|2|3,BOUNDtype) <phrase> }
148b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
149b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen<phrase> = WORDPHON(POS,ACC)phon { WORDPHON(POS,ACC)phon }
150b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
151b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenDone in later PU: mapping ACC & word-level stress to syllable accent value
152b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - ACC0 prim -> 0
153b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - ACC1 prim -> 1
154b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - ACC2 prim -> 2
155b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - ACC3 prim -> 3
156b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - ACC0 sec  -> 0
157b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - ACC1 sec  -> 4
158b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - ACC2 sec  -> 4
159b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - ACC3 sec  -> 4
160b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
161b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenother limitations
162b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen=================
163b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
164b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- item size: header plus len=256 (valid for Pico in general)
165b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- see defines below for max nr of items. Item heads plus ref. to contents
166b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  buffer are stored in array with fixed size elements. Two restrictions:
167b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - MAXNR_HEADX (max nr elements==items in headx array)
168b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - CONTENTSSIZE (max size of all contents together
169b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */
170b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
171b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
172b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifndef PICOACPH_H_
173b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOACPH_H_
174b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
175b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picoos.h"
176b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picodata.h"
177b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picorsrc.h"
178b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
179b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus
180b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenextern "C" {
181b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
182b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if 0
183b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
184b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
185b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
186b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* nr item restriction: maximum number of extended item heads in headx */
187b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOACPH_MAXNR_HEADX    60
188b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
189b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* nr item restriction: maximum size of all item contents together in cont */
190b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOACPH_MAXSIZE_CBUF 7680
191b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
192b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
193b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
194b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpicodata_ProcessingUnit picoacph_newAccPhrUnit(
195b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_MemoryManager mm,
196b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_Common common,
197b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picodata_CharBuffer cbIn,
198b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picodata_CharBuffer cbOut,
199b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picorsrc_Voice voice);
200b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
201b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus
202b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
203b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
204b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
205b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif /*PICOACPH_H_*/
206