1b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* 2b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 3b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 4b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Licensed under the Apache License, Version 2.0 (the "License"); 5b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * you may not use this file except in compliance with the License. 6b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * You may obtain a copy of the License at 7b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 8b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * http://www.apache.org/licenses/LICENSE-2.0 9b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 10b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Unless required by applicable law or agreed to in writing, software 11b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * distributed under the License is distributed on an "AS IS" BASIS, 12b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * See the License for the specific language governing permissions and 14b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * limitations under the License. 15b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */ 16b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/** 17b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * @file picotok.h 18b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 19b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 20b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * All rights reserved. 21b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 22b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * History: 23b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * - 2009-04-20 -- initial version 24b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 25b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */ 26b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 27b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 28b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/** @addtogroup picotok 29b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenitemtype, iteminfo1, iteminfo2, content -> TYPE(INFO1,INFO2)content 30b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenin the following 31b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 32b190149a69b110e6719ce0a41877a683f8db7ae7Charles Cheninput 33b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen===== 34b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 35b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- UTF8 text 36b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 37b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenlimitations: currently only german umlauts in addition to ASCII 38b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 39b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 40b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenminimal input size (before processing starts) 41b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen================== 42b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 43b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenprocessing (ie. tokenization) starts when 44b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- 'PICO_EOF' char received (which happens whenever the cbIn buffer is empty) 45b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- tok-internal buffer is full 46b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 47b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 48b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenitems output 49b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen============ 50b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 51b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenprocessing the character stream can result in one of the 52b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenfollowing items: 53b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-> WORDGRAPH(NA,NA)graph <- mapped to lower case; incl. 1-2 digit nrs (0-99) 54b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-> OTHER(NA,NA)string <- skip or spell 55b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-> PUNC(PUNCtype,PUNCsubtype) 56b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen-> CMD(CMDtype,CMDsubtype)args 57b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 58b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenwith 59b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- PUNCtype %d 60b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODATA_ITEMINFO1_PUNC_SENTEND 61b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODATA_ITEMINFO1_PUNC_PHRASEEND 62b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- PUNCsubtype %d 63b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODATA_ITEMINFO2_PUNC_SENT_T 64b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODATA_ITEMINFO2_PUNC_SENT_Q 65b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODATA_ITEMINFO2_PUNC_SENT_E 66b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODATA_ITEMINFO2_PUNC_PHRASE 67b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen (used later: PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED) 68b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- CMDtype %d 69b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODATA_ITEMINFO1_CMD_FLUSH (no args) 70b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen ? PICODATA_ITEMINFO1_CMD_PLAY ? (not yet) 71b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- CMDsubtype %d 72b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODATA_ITEMINFO2_NA 73b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen ? PICODATA_ITEMINFO2_CMD_PLAY_G2P ? (not yet) 74b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- graph, len>0, utf8 graphemes, %s 75b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- string, len>0, can be any string with printable ascii characters, %s 76b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 77b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 78b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenother limitations 79b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen================= 80b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 81b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen- item size: header plus len=256 (valid for Pico in general) 82b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */ 83b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 84b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 85b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifndef PICOTOK_H_ 86b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOTOK_H_ 87b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 88b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picoos.h" 89b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picodata.h" 90b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picorsrc.h" 91b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 92b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus 93b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenextern "C" { 94b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif 95b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if 0 96b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 97b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif 98b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 99b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 100b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 101b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpicodata_ProcessingUnit picotok_newTokenizeUnit( 102b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_MemoryManager mm, 103b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_Common common, 104b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picodata_CharBuffer cbIn, 105b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picodata_CharBuffer cbOut, 106b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picorsrc_Voice voice); 107b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 108b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOTOK_OUTBUF_SIZE 256 109b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 110b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus 111b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 112b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif 113b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 114b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 115b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif /*PICOTOK_H_*/ 116