1/*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/**
17 * @file picowa.h
18 *
19 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
20 * All rights reserved.
21 *
22 * History:
23 * - 2009-04-20 -- initial version
24 *
25 */
26
27
28/**
29 * @addtogroup picowa
30 * ---------------------------------------------------\n
31 * <b> Pico Word Analysis </b>\n
32 * ---------------------------------------------------\n
33itemtype, iteminfo1, iteminfo2, content -> TYPE(INFO1,INFO2)content
34in the following
35
36items input\n
37===========
38
39processed by wa:
40- WORDGRAPH(NA,NA)graph
41- OTHER(NA,NA)string
42
43unprocessed:
44- all other item types are forwarded through the PU without modification:
45  - PUNC
46  - CMD
47
48
49minimal input size (before processing starts)\n
50==================
51
52processing (ie. lex lookup and POS prediction) is possible with
53- one item
54
55
56items processed and output\n
57==========================
58
59processing an input WORDGRAPH results in one of the following items:
60- WORDGRAPH(POSes,NA)graph
61   - graph not in lex, POSes determined with dtree, or
62   - graph in lex - single entry without phone (:G2P), POSes from lex
63- WORDINDEX(POSes,NA)pos1|ind1...posN|indN
64   - graph in lex - {1,4} entries with phone, pos1...posN from lex,
65     {1,4} lexentries indices in content, POSes combined with map table
66     in klex
67
68processing an input OTHER results in the item being skipped (in the
69future this can be extended to e.g. spelling)
70
71see picotok.h for PUNC and CMD
72
73- POSes %d
74  - is the superset of all single POS and POS combinations defined
75  in the lingware as unique symbol
76- graph, len>0, utf8 graphemes, %s
77- pos1|ind1, pos2|ind2, ..., posN|indN
78  - pos? are the single, unambiguous POS only, one byte %d
79  - ind? are the lexentry indices, three bytes %d %d %d
80
81
82lexicon (system lexicon, but must also be ensured for user lexica)\n
83=======
84
85- POS GRAPH PHON, all mandatory, but
86  - * PHON can be an empty string -> no pronunciation in the resulting TTS output
87  - * PHON can be :G2P -> use G2P later to add pronunciation
88- (POS,GRAPH) is a uniq key (only one entry allowed)
89- (GRAPH) is almost a uniq key (2-4 entries with the same GRAPH, and
90  differing POS and differing PHON possible)
91  - for one graph we can have 2-4 solutions from the lex which all
92     need to be passed on the the next PU
93  - in this case GRAPH, POS, and PHON all must be available in lex
94  - in this case for each entry only a non-ambiguous, unique POS ID
95     is possible)
96
97other limitations\n
98=================
99
100- item size: header plus len=256 (valid for Pico in general)
101- wa uses one item context only -> internal buffer set to 256+4
102 */
103
104
105#ifndef PICOWA_H_
106#define PICOWA_H_
107
108#include "picoos.h"
109#include "picodata.h"
110#include "picorsrc.h"
111
112#ifdef __cplusplus
113extern "C" {
114#endif
115#if 0
116}
117#endif
118
119
120/* maximum length of an item incl. head for input and output buffers */
121#define PICOWA_MAXITEMSIZE 260
122
123
124picodata_ProcessingUnit picowa_newWordAnaUnit(
125        picoos_MemoryManager mm,
126    picoos_Common common,
127        picodata_CharBuffer cbIn,
128        picodata_CharBuffer cbOut,
129        picorsrc_Voice voice);
130
131#ifdef __cplusplus
132}
133#endif
134
135#endif /*PICOWA_H_*/
136