1b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/*
2b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
4b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Licensed under the Apache License, Version 2.0 (the "License");
5b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * you may not use this file except in compliance with the License.
6b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * You may obtain a copy of the License at
7b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
8b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *     http://www.apache.org/licenses/LICENSE-2.0
9b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
10b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Unless required by applicable law or agreed to in writing, software
11b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * distributed under the License is distributed on an "AS IS" BASIS,
12b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * See the License for the specific language governing permissions and
14b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * limitations under the License.
15b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */
16b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/**
17b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * @file picotok.h
18b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
19b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
20b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * All rights reserved.
21b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
22b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * History:
23b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * - 2009-04-20 -- initial version
24b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
25b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */
26b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
27b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
28b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/** @addtogroup picotok
29b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenitemtype, iteminfo1, iteminfo2, content -> TYPE(INFO1,INFO2)content
30b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenin the following
31b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
32b190149a69b110e6719ce0a41877a683f8db7ae7Charles Cheninput
33b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen=====
34b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
35b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- UTF8 text
36b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
37b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenlimitations: currently only german umlauts in addition to ASCII
38b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
39b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
40b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenminimal input size (before processing starts)
41b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen==================
42b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
43b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenprocessing (ie. tokenization) starts when
44b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- 'PICO_EOF' char received (which happens whenever the cbIn buffer is empty)
45b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- tok-internal buffer is full
46b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
47b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
48b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenitems output
49b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen============
50b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
51b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenprocessing the character stream can result in one of the
52b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenfollowing items:
53b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-> WORDGRAPH(NA,NA)graph    <- mapped to lower case; incl. 1-2 digit nrs (0-99)
54b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-> OTHER(NA,NA)string       <- skip or spell
55b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-> PUNC(PUNCtype,PUNCsubtype)
56b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-> CMD(CMDtype,CMDsubtype)args
57b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
58b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenwith
59b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- PUNCtype %d
60b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODATA_ITEMINFO1_PUNC_SENTEND
61b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODATA_ITEMINFO1_PUNC_PHRASEEND
62b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- PUNCsubtype %d
63b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODATA_ITEMINFO2_PUNC_SENT_T
64b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODATA_ITEMINFO2_PUNC_SENT_Q
65b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODATA_ITEMINFO2_PUNC_SENT_E
66b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODATA_ITEMINFO2_PUNC_PHRASE
67b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    (used later: PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED)
68b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- CMDtype %d
69b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODATA_ITEMINFO1_CMD_FLUSH    (no args)
70b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    ? PICODATA_ITEMINFO1_CMD_PLAY ? (not yet)
71b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- CMDsubtype %d
72b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODATA_ITEMINFO2_NA
73b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    ? PICODATA_ITEMINFO2_CMD_PLAY_G2P ? (not yet)
74b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- graph, len>0, utf8 graphemes, %s
75b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- string, len>0, can be any string with printable ascii characters, %s
76b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
77b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
78b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenother limitations
79b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen=================
80b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
81b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- item size: header plus len=256 (valid for Pico in general)
82b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */
83b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
84b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
85b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifndef PICOTOK_H_
86b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOTOK_H_
87b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
88b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picoos.h"
89b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picodata.h"
90b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picorsrc.h"
91b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
92b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus
93b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenextern "C" {
94b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
95b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if 0
96b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
97b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
98b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
99b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
100b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
101b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpicodata_ProcessingUnit picotok_newTokenizeUnit(
102b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_MemoryManager mm,
103b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_Common common,
104b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picodata_CharBuffer cbIn,
105b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picodata_CharBuffer cbOut,
106b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picorsrc_Voice voice);
107b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
108b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOTOK_OUTBUF_SIZE 256
109b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
110b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus
111b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
112b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
113b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
114b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
115b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif /*PICOTOK_H_*/
116