1/* 2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16/** 17 * @file picowa.h 18 * 19 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 20 * All rights reserved. 21 * 22 * History: 23 * - 2009-04-20 -- initial version 24 * 25 */ 26 27 28/** 29 * @addtogroup picowa 30 * ---------------------------------------------------\n 31 * <b> Pico Word Analysis </b>\n 32 * ---------------------------------------------------\n 33itemtype, iteminfo1, iteminfo2, content -> TYPE(INFO1,INFO2)content 34in the following 35 36items input\n 37=========== 38 39processed by wa: 40- WORDGRAPH(NA,NA)graph 41- OTHER(NA,NA)string 42 43unprocessed: 44- all other item types are forwarded through the PU without modification: 45 - PUNC 46 - CMD 47 48 49minimal input size (before processing starts)\n 50================== 51 52processing (ie. lex lookup and POS prediction) is possible with 53- one item 54 55 56items processed and output\n 57========================== 58 59processing an input WORDGRAPH results in one of the following items: 60- WORDGRAPH(POSes,NA)graph 61 - graph not in lex, POSes determined with dtree, or 62 - graph in lex - single entry without phone (:G2P), POSes from lex 63- WORDINDEX(POSes,NA)pos1|ind1...posN|indN 64 - graph in lex - {1,4} entries with phone, pos1...posN from lex, 65 {1,4} lexentries indices in content, POSes combined with map table 66 in klex 67 68processing an input OTHER results in the item being skipped (in the 69future this can be extended to e.g. spelling) 70 71see picotok.h for PUNC and CMD 72 73- POSes %d 74 - is the superset of all single POS and POS combinations defined 75 in the lingware as unique symbol 76- graph, len>0, utf8 graphemes, %s 77- pos1|ind1, pos2|ind2, ..., posN|indN 78 - pos? are the single, unambiguous POS only, one byte %d 79 - ind? are the lexentry indices, three bytes %d %d %d 80 81 82lexicon (system lexicon, but must also be ensured for user lexica)\n 83======= 84 85- POS GRAPH PHON, all mandatory, but 86 - * PHON can be an empty string -> no pronunciation in the resulting TTS output 87 - * PHON can be :G2P -> use G2P later to add pronunciation 88- (POS,GRAPH) is a uniq key (only one entry allowed) 89- (GRAPH) is almost a uniq key (2-4 entries with the same GRAPH, and 90 differing POS and differing PHON possible) 91 - for one graph we can have 2-4 solutions from the lex which all 92 need to be passed on the the next PU 93 - in this case GRAPH, POS, and PHON all must be available in lex 94 - in this case for each entry only a non-ambiguous, unique POS ID 95 is possible) 96 97other limitations\n 98================= 99 100- item size: header plus len=256 (valid for Pico in general) 101- wa uses one item context only -> internal buffer set to 256+4 102 */ 103 104 105#ifndef PICOWA_H_ 106#define PICOWA_H_ 107 108#include "picoos.h" 109#include "picodata.h" 110#include "picorsrc.h" 111 112#ifdef __cplusplus 113extern "C" { 114#endif 115#if 0 116} 117#endif 118 119 120/* maximum length of an item incl. head for input and output buffers */ 121#define PICOWA_MAXITEMSIZE 260 122 123 124picodata_ProcessingUnit picowa_newWordAnaUnit( 125 picoos_MemoryManager mm, 126 picoos_Common common, 127 picodata_CharBuffer cbIn, 128 picodata_CharBuffer cbOut, 129 picorsrc_Voice voice); 130 131#ifdef __cplusplus 132} 133#endif 134 135#endif /*PICOWA_H_*/ 136