1b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* 2b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 3b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 4b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Licensed under the Apache License, Version 2.0 (the "License"); 5b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * you may not use this file except in compliance with the License. 6b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * You may obtain a copy of the License at 7b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 8b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * http://www.apache.org/licenses/LICENSE-2.0 9b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 10b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Unless required by applicable law or agreed to in writing, software 11b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * distributed under the License is distributed on an "AS IS" BASIS, 12b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * See the License for the specific language governing permissions and 14b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * limitations under the License. 15b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */ 16b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/** 17b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * @file picoacph.h 18b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 19b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 20b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * All rights reserved. 21b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 22b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * History: 23b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * - 2009-04-20 -- initial version 24b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 25b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */ 26b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 27b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 28b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/** 29b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * @addtogroup picoacph 30b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 31b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenitemtype, iteminfo1, iteminfo2, content -> TYPE(INFO1,INFO2)content 32b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenin the following 33b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 34b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenitems input 35b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen=========== 36b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 37b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenprocessed by sa (POS disambiguation): 38b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- WORDGRAPH(POSes,NA)graph 39b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- WORDINDEX(POSes,NA)POS|1ind1...POSN|indN 40b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- CMD(PICODATA_ITEMINFO1_CMD_FLUSH,PICODATA_ITEMINFO2_NA) 41b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 42b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenprocessed by sa (Phrasing, Accentuation): 43b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- PUNC(PUNCtype,PUNCsubtype) 44b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 45b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenunprocessed: 46b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- all other item types are forwarded through the PU without modification: 47b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen CMD 48b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 49b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 50b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenminimal input size (before processing starts) 51b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen================== 52b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 53b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenprocessing (POS disambiguation, g2p, lexind, phrasing, accentuation) 54b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenis possible with 55b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 56b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- one punctuation-phrase, consisting of a sequence (see below for 57b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen limits) of items terminated by a PUNC item. 58b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 59b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen(possible but not implemented: as long as the internal buffer is 60b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenempty, non-processed item types can be processed immediately) 61b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 62b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenEnsuring terminal PUNC item: 63b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- when reading items from the external buffer a CMD(...FLUSH...) is 64b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen converted to a PUNC(...FLUSH...) item 65b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- If needed, a PUNC(PHRASE) is artificially added to ensure a phrase 66b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen fits in the PUs memory and processing can start. 67b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 68b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 69b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenitems processed and output 70b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen========================== 71b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 72b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenprecondition: 73b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenCMD(...FLUSH...) already converted to PUNC(...FLUSH...) and trailing 74b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenPUNC item enforced if necessary. 75b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 76b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen---- 77b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-# PROCESS_POSD: processing input WORDGRAPH or WORDINDEX items, after 78b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenPOS disambiguation (POSes -> POS), results in a sequence of: 79b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - 80b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - WORDGRAPH(POS,NA)graph 81b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - WORDINDEX(POS,NA)POS|ind 82b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - 83b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen . 84b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-# PROCESS_WPHO: then, after lex-index lookup and G2P in a 85b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chensequence of: 86b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - WORDPHON(POS,NA)phon 87b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 88b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen(phon containing primary and secondary word-level stress) 89b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 90b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen---- 91b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen3. PROCESS_PHR: then, after processing these WORDPHON items, 92b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chentogether with the trailing PUNC item results in: 93b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 94b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-> BOUND(BOUNDstrength,BOUNDtype) 95b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 96b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenbeing added in the sequence of WORDPHON (respectively inserted instead 97b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenof the PUNC). All PUNC, incl PUNC(...FLUSH...) now gone. 98b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 99b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen---- 100b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen4. PROCESS_ACC: then, after processing the WORDPHON and BOUND items 101b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenresults in: 102b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 103b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-> WORDPHON(POS,ACC)phon 104b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 105b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenA postprocessing step of accentuation is hard-coded in the 106b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenaccentuation module: In case the whole word does not have any stress 107b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenat all (primary or secondary or both) then do the following mapping: 108b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 109b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen ACC0 nostress -> ACC0 110b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen ACC1 nostress -> ACC3 111b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen ACC2 nostress -> ACC3 112b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen ACC3 nostress -> ACC3 113b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 114b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen---- 115b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- POS 116b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen a single, unambiguous POS 117b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 118b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chencf. picodata.h for 119b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- ACC (sentence-level accent (aka prominence)) %d 120b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ACC0 121b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ACC1 122b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ACC2 (<- maybe mapped to ACC1, ie. no ACC2 in output) 123b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ACC3 124b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 125b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- BOUNDstrength %d 126b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ITEMINFO1_BOUND_SBEG (at sentence start) 127b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ITEMINFO1_BOUND_SEND (at sentence end) 128b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ITEMINFO1_BOUND_TERM (replaces a flush) 129b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ITEMINFO1_BOUND_PHR1 (primary boundary) 130b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ITEMINFO1_BOUND_PHR2 (short break) 131b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ITEMINFO1_BOUND_PHR3 (secondary phrase boundary, no break) 132b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ITEMINFO1_BOUND_PHR0 (no break, not produced by sa, not existing 133b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen BOUND in item sequence equals PHR0 bound strength) 134b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 135b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- BOUNDtype (created in sa base on punctuation, indicates type of phrase 136b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen following the boundary) %d 137b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ITEMINFO2_BOUNDTYPE_P 138b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ITEMINFO2_BOUNDTYPE_T 139b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ITEMINFO2_BOUNDTYPE_Q 140b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PICODATA_ITEMINFO2_BOUNDTYPE_E 141b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 142b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 143b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenoutput sequence (without CMDs): 144b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 145b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen<output> = { BOUND(BOUND_SBEG,PHRASEtype) <sentence> BOUND(BOUND_SEND,..)} BOUND(BOUND_TERM,..) 146b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 147b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen<sentence> = <phrase> { BOUND(BOUND_PHR1|2|3,BOUNDtype) <phrase> } 148b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 149b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen<phrase> = WORDPHON(POS,ACC)phon { WORDPHON(POS,ACC)phon } 150b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 151b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenDone in later PU: mapping ACC & word-level stress to syllable accent value 152b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - ACC0 prim -> 0 153b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - ACC1 prim -> 1 154b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - ACC2 prim -> 2 155b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - ACC3 prim -> 3 156b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - ACC0 sec -> 0 157b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - ACC1 sec -> 4 158b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - ACC2 sec -> 4 159b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - ACC3 sec -> 4 160b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 161b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenother limitations 162b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen================= 163b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 164b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- item size: header plus len=256 (valid for Pico in general) 165b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- see defines below for max nr of items. Item heads plus ref. to contents 166b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen buffer are stored in array with fixed size elements. Two restrictions: 167b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - MAXNR_HEADX (max nr elements==items in headx array) 168b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - CONTENTSSIZE (max size of all contents together 169b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */ 170b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 171b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 172b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifndef PICOACPH_H_ 173b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOACPH_H_ 174b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 175b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picoos.h" 176b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picodata.h" 177b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picorsrc.h" 178b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 179b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus 180b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenextern "C" { 181b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif 182b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if 0 183b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 184b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif 185b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 186b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* nr item restriction: maximum number of extended item heads in headx */ 187b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOACPH_MAXNR_HEADX 60 188b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 189b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* nr item restriction: maximum size of all item contents together in cont */ 190b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOACPH_MAXSIZE_CBUF 7680 191b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 192b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 193b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 194b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpicodata_ProcessingUnit picoacph_newAccPhrUnit( 195b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_MemoryManager mm, 196b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_Common common, 197b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picodata_CharBuffer cbIn, 198b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picodata_CharBuffer cbOut, 199b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picorsrc_Voice voice); 200b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 201b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus 202b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 203b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif 204b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 205b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif /*PICOACPH_H_*/ 206