1/*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/**
17 * @file picotok.c
18 *
19 * tokenizer
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28
29
30/* ************************************************************/
31/* tokenisation and markup handling */
32/* ************************************************************/
33
34/** @addtogroup picotok
35  @b tokenisation_overview
36
37  markup handling overview:
38
39  The following markups are recognized
40     - ignore
41     - speed
42     - pitch
43     - volume
44     - voice
45     - preproccontext
46     - mark
47     - play
48     - usesig
49     - genfile
50     - sentence
51     - s
52     - paragraph
53     - p
54     - break
55     - spell            (pauses between letter)
56     - phoneme
57
58  All markups which are recognized but are not yet implemented in pico
59  system have the mark.
60*/
61
62
63#include "picodefs.h"
64#include "picoos.h"
65#include "picobase.h"
66#include "picodbg.h"
67#include "picodata.h"
68#include "picotok.h"
69#include "picoktab.h"
70
71#ifdef __cplusplus
72extern "C" {
73#endif
74#if 0
75}
76#endif
77
78/* *****************************************************************************/
79
80#define IN_BUF_SIZE   255
81#define OUT_BUF_SIZE  IN_BUF_SIZE + 3 * PICODATA_ITEM_HEADSIZE + 3
82
83#define MARKUP_STRING_BUF_SIZE (IN_BUF_SIZE*5)
84#define MAX_NR_MARKUP_PARAMS 6
85#define MARKUP_HANDLING_DISABLED  0
86#define MARKUP_HANDLING_ENABLED 1
87#define EOL '\n'
88
89
90typedef picoos_int8 pico_tokenSubType;
91typedef picoos_uint8 pico_tokenType;
92
93/** @todo : consider adding these specialized exception codes: */
94
95#define PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE PICO_ERR_OTHER
96#define PICO_ERR_INVALID_MARKUP_TAG        PICO_ERR_OTHER
97#define PICO_ERR_INTERNAL_LIMIT            PICO_ERR_OTHER
98
99typedef enum {MIDummyStart, MIIgnore,
100              MIPitch, MISpeed, MIVolume,
101              MIVoice, MIPreprocContext, MIMarker,
102              MIPlay, MIUseSig, MIGenFile, MIParagraph,
103              MISentence, MIBreak, MISpell, MIPhoneme, MIItem, MISpeaker, MIDummyEnd
104             }  MarkupId;
105typedef enum {MSNotInMarkup, MSGotStart, MSExpectingmarkupTagName, MSInmarkupTagName,
106              MSGotmarkupTagName, MSInAttrName, MSGotAttrName, MSGotEqual, MSInAttrValue,
107              MSInAttrValueEscaped, MSGotAttrValue, MSGotEndSlash, MSGotEnd,
108              MSError, MSErrorTooLong, MSErrorSyntax
109             }  MarkupState;
110typedef enum {MENone, MEMissingStart, MEUnknownTag, MEIdent, MEMissingEqual,
111              MEMissingQuote, MEMissingEnd, MEUnexpectedChar, MEInterprete
112             }  MarkupParseError;
113
114typedef enum {MTNone, MTStart, MTEnd, MTEmpty} MarkupTagType;
115
116#define UTF_CHAR_COMPLETE   2
117#define UTF_CHAR_INCOMPLETE 1
118#define UTF_CHAR_MALFORMED  0
119
120#define TOK_MARKUP_KW_IGNORE     (picoos_uchar*)"ignore"
121#define TOK_MARKUP_KW_SPEED      (picoos_uchar*)"speed"
122#define TOK_MARKUP_KW_PITCH      (picoos_uchar*)"pitch"
123#define TOK_MARKUP_KW_VOLUME     (picoos_uchar*)"volume"
124#define TOK_MARKUP_KW_VOICE      (picoos_uchar*)"voice"
125#define TOK_MARKUP_KW_CONTEXT    (picoos_uchar*)"preproccontext"
126#define TOK_MARKUP_KW_MARK       (picoos_uchar*)"mark"
127#define TOK_MARKUP_KW_PLAY       (picoos_uchar*)"play"
128#define TOK_MARKUP_KW_USESIG     (picoos_uchar*)"usesig"
129#define TOK_MARKUP_KW_GENFILE    (picoos_uchar*)"genfile"
130#define TOK_MARKUP_KW_SENTENCE   (picoos_uchar*)"sentence"
131#define TOK_MARKUP_KW_S          (picoos_uchar*)"s"
132#define TOK_MARKUP_KW_PARAGRAPH  (picoos_uchar*)"paragraph"
133#define TOK_MARKUP_KW_P          (picoos_uchar*)"p"
134#define TOK_MARKUP_KW_BREAK      (picoos_uchar*)"break"
135#define TOK_MARKUP_KW_SPELL      (picoos_uchar*)"spell"
136#define TOK_MARKUP_KW_PHONEME    (picoos_uchar*)"phoneme"
137#define TOK_MARKUP_KW_ITEM       (picoos_uchar*)"item"
138#define TOK_MARKUP_KW_SPEAKER    (picoos_uchar*)"speaker"
139
140#define KWLevel (picoos_uchar *)"level"
141#define KWName (picoos_uchar *)"name"
142#define KWProsDomain (picoos_uchar *)"prosodydomain"
143#define KWTime (picoos_uchar *)"time"
144#define KWMode (picoos_uchar *)"mode"
145#define KWSB (picoos_uchar *)"sb"
146#define KWPB (picoos_uchar *)"pb"
147#define KWFile (picoos_uchar *)"file"
148#define KWType (picoos_uchar *)"type"
149#define KWF0Beg (picoos_uchar *)"f0beg"
150#define KWF0End (picoos_uchar *)"f0end"
151#define KWXFadeBeg (picoos_uchar *)"xfadebeg"
152#define KWXFadeEnd (picoos_uchar *)"xfadeend"
153#define KWAlphabet (picoos_uchar *)"alphabet"
154#define KWPH (picoos_uchar *)"ph"
155#define KWOrthMode (picoos_uchar *)"orthmode"
156#define KWIgnorePunct (picoos_uchar *)"ignorepunct"
157#define KWInfo1 (picoos_uchar *)"info1"
158#define KWInfo2 (picoos_uchar *)"info2"
159#define KWDATA (picoos_uchar *)"data"
160
161#define PICO_SPEED_MIN           20
162#define PICO_SPEED_MAX          500
163#define PICO_SPEED_DEFAULT      100
164#define PICO_SPEED_FACTOR_MIN   500
165#define PICO_SPEED_FACTOR_MAX  2000
166
167#define PICO_PITCH_MIN           50
168#define PICO_PITCH_MAX          200
169#define PICO_PITCH_DEFAULT      100
170#define PICO_PITCH_FACTOR_MIN   500
171#define PICO_PITCH_FACTOR_MAX  2000
172#define PICO_PITCH_ADD_MIN     -100
173#define PICO_PITCH_ADD_MAX      100
174#define PICO_PITCH_ADD_DEFAULT    0
175
176#define PICO_VOLUME_MIN           0
177#define PICO_VOLUME_MAX         500
178#define PICO_VOLUME_DEFAULT     100
179#define PICO_VOLUME_FACTOR_MIN  500
180#define PICO_VOLUME_FACTOR_MAX 2000
181
182#define PICO_SPEAKER_MIN          20
183#define PICO_SPEAKER_MAX         180
184#define PICO_SPEAKER_DEFAULT     100
185#define PICO_SPEAKER_FACTOR_MIN  500
186#define PICO_SPEAKER_FACTOR_MAX 2000
187
188#define PICO_CONTEXT_DEFAULT   (picoos_uchar*)"DEFAULT"
189
190#define PARAGRAPH_PAUSE_DUR 500
191#define SPELL_WITH_PHRASE_BREAK  1
192#define SPELL_WITH_SENTENCE_BREAK  2
193
194/* *****************************************************************************/
195
196#define TOK_PUNC_FLUSH  (picoos_char) '\0'
197
198typedef picoos_uchar Word[MARKUP_STRING_BUF_SIZE];
199
200
201struct MarkupParam {
202    Word paramId;
203    Word paramVal;
204};
205
206typedef struct MarkupParam MarkupParams[MAX_NR_MARKUP_PARAMS];
207
208typedef picoos_uchar utf8char0c[5]; /* one more than needed so it is ended always with 0c*/
209
210/** subobject : TokenizeUnit
211 *  shortcut  : tok
212 */
213typedef struct tok_subobj
214{
215    picoos_int32 ignLevel;
216
217    utf8char0c   utf;
218    picoos_int32 utfpos;
219    picoos_int32 utflen;
220
221    MarkupParams markupParams;
222    picoos_int32 nrMarkupParams;
223    MarkupState markupState;
224    picoos_uchar markupStr[MARKUP_STRING_BUF_SIZE];
225    picoos_int32 markupPos;
226    picoos_int32 markupLevel[MIDummyEnd+1];
227    picoos_uchar markupTagName[IN_BUF_SIZE];
228    MarkupTagType markupTagType;
229    MarkupParseError markupTagErr;
230
231    picoos_int32 strPos;
232    picoos_uchar strDelim;
233    picoos_bool isFileAttr;
234
235    pico_tokenType tokenType;
236    pico_tokenSubType tokenSubType;
237
238    picoos_int32 tokenPos;
239    picoos_uchar tokenStr[IN_BUF_SIZE];
240
241    picoos_int32 nrEOL;
242
243    picoos_bool markupHandlingMode;       /* to be moved ??? */
244    picoos_bool aborted;                  /* to be moved ??? */
245
246    picoos_bool start;
247
248    picoos_uint8 outBuf[OUT_BUF_SIZE]; /* internal output buffer */
249    picoos_uint16 outReadPos; /* next pos to read from outBuf */
250    picoos_uint16 outWritePos; /* next pos to write to outBuf */
251
252    picoos_uchar saveFile[IN_BUF_SIZE];
253    Word phonemes;
254
255    picotrns_SimpleTransducer transducer;
256
257    /* kbs */
258
259    picoktab_Graphs graphTab;
260    picokfst_FST xsampa_parser;
261    picokfst_FST svoxpa_parser;
262    picokfst_FST xsampa2svoxpa_mapper;
263
264
265
266} tok_subobj_t;
267
268/* *****************************************************************************/
269
270static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok);
271static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling);
272static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok);
273static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[]);
274static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok);
275static MarkupId tok_markupTagId (picoos_uchar tagId[]);
276
277/* *****************************************************************************/
278
279static picoos_bool tok_strEqual(picoos_uchar * str1, picoos_uchar * str2)
280{
281   return (picoos_strcmp((picoos_char*)str1, (picoos_char*)str2) == 0);
282}
283
284static void tok_reduceBlanks(picoos_uchar * str)
285            /* Remove leading and trailing blanks of 'str' and reduce
286               groups of blanks within string to exactly one blank. */
287
288{
289    int i = 0;
290    int j = 0;
291
292     while (str[j] != 0) {
293        if (str[j] == (picoos_uchar)' ') {
294            /* note one blank except at the beginning of string */
295            if (i > 0) {
296                str[i] = (picoos_uchar)' ';
297                i++;
298            }
299            j++;
300            while (str[j] == (picoos_uchar)' ') {
301                j++;
302            }
303        } else {
304            str[i] = str[j];
305            j++;
306            i++;
307        }
308    }
309
310    /* remove blanks at end of string */
311    if ((i > 0) && (str[i - 1] == ' ')) {
312        i--;
313    }
314    str[i] = 0;
315}
316
317
318static void tok_startIgnore (tok_subobj_t * tok)
319{
320    tok->ignLevel++;
321}
322
323
324static void tok_endIgnore (tok_subobj_t * tok)
325{
326    if (tok->ignLevel > 0) {
327        tok->ignLevel--;
328    }
329}
330
331
332static void tok_getParamIntVal (MarkupParams params, picoos_uchar paramId[], picoos_int32 * paramVal, picoos_bool * paramFound)
333{
334    int i=0;
335
336    while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId,params[i].paramId)) {
337        i++;
338    }
339    if ((i < MAX_NR_MARKUP_PARAMS)) {
340        (*paramVal) = picoos_atoi((picoos_char*)params[i].paramVal);
341        (*paramFound) = TRUE;
342    } else {
343        (*paramVal) =  -1;
344        (*paramFound) = FALSE;
345    }
346}
347
348
349
350static void tok_getParamStrVal (MarkupParams params, picoos_uchar paramId[], picoos_uchar paramStrVal[], picoos_bool * paramFound)
351{
352    int i=0;
353
354    while ((i < MAX_NR_MARKUP_PARAMS) &&  !tok_strEqual(paramId,params[i].paramId)) {
355        i++;
356    }
357    if (i < MAX_NR_MARKUP_PARAMS) {
358        picoos_strcpy((picoos_char*)paramStrVal, (picoos_char*)params[i].paramVal);
359        (*paramFound) = TRUE;
360    } else {
361        paramStrVal[0] = 0;
362        (*paramFound) = FALSE;
363    }
364}
365
366
367static void tok_getParamPhonesStr (MarkupParams params, picoos_uchar paramId[], picoos_uchar alphabet[], picoos_uchar phones[], picoos_int32 phoneslen, picoos_bool * paramFound)
368{
369
370    int i;
371    picoos_bool done;
372
373    i = 0;
374    while ((i < MAX_NR_MARKUP_PARAMS) &&  !tok_strEqual(paramId, params[i].paramId)) {
375        i++;
376    }
377    if (i < MAX_NR_MARKUP_PARAMS) {
378        if (tok_strEqual(alphabet, PICODATA_XSAMPA) || tok_strEqual(alphabet, (picoos_uchar*)"")) {
379            picoos_strlcpy((picoos_char*)phones, (picoos_char*)params[i].paramVal, phoneslen);
380            done = TRUE;
381        } else {
382            done = FALSE;
383        }
384        (*paramFound) = TRUE;
385    } else {
386        done = FALSE;
387        (*paramFound) = FALSE;
388    }
389    if (!done) {
390        phones[0] = 0;
391    }
392}
393
394
395static void tok_clearMarkupParams (MarkupParams params)
396{
397    int i;
398
399    for (i = 0; i<MAX_NR_MARKUP_PARAMS; i++) {
400        params[i].paramId[0] = 0;
401        params[i].paramVal[0] = 0;
402    }
403}
404
405
406static void tok_getDur (picoos_uchar durStr[], picoos_uint32 * dur, picoos_bool * done)
407{
408
409    int num=0;
410    int i=0;
411    picoos_uchar tmpWord[IN_BUF_SIZE];
412
413    picoos_strlcpy((picoos_char*)tmpWord, (picoos_char*)durStr, sizeof(tmpWord));
414    tok_reduceBlanks(tmpWord);
415    while ((durStr[i] >= '0') && (durStr[i] <= '9')) {
416        num = 10 * num + (int)durStr[i] - (int)'0';
417        tmpWord[i] = ' ';
418        i++;
419    }
420    tok_reduceBlanks(tmpWord);
421    if (tok_strEqual(tmpWord, (picoos_uchar*)"s")) {
422        (*dur) = (1000 * num);
423        (*done) = TRUE;
424    } else if (tok_strEqual(tmpWord,(picoos_uchar*)"ms")) {
425        (*dur) = num;
426        (*done) = TRUE;
427    } else {
428        (*dur) = 0;
429        (*done) = FALSE;
430    }
431}
432
433
434static picoos_int32 tok_putToUtf (tok_subobj_t * tok, picoos_uchar ch)
435{
436    if (tok->utfpos < PICOBASE_UTF8_MAXLEN) {
437        tok->utf[tok->utfpos] = ch;
438        if (tok->utfpos == 0) {
439            tok->utflen = picobase_det_utf8_length(ch);
440        } else if (((ch < (picoos_uchar)'\200') || (ch >= (picoos_uchar)'\300'))) {
441            tok->utflen = 0;
442        }
443        (tok->utfpos)++;
444        if ((tok->utfpos == tok->utflen)) {
445            if ((tok->utfpos < PICOBASE_UTF8_MAXLEN)) {
446                tok->utf[tok->utfpos] = 0;
447            }
448            return UTF_CHAR_COMPLETE;
449        } else if (tok->utfpos < tok->utflen) {
450            return UTF_CHAR_INCOMPLETE;
451        } else {
452            return UTF_CHAR_MALFORMED;
453        }
454    } else {
455        return UTF_CHAR_MALFORMED;
456    }
457}
458
459
460static picoos_bool tok_isRelative (picoos_uchar strval[], picoos_uint32 * val)
461{
462    picoos_int32 len;
463    picoos_bool rel;
464
465    rel = FALSE;
466    len = picoos_strlen((picoos_char*)strval);
467    if (len > 0) {
468        if (strval[len - 1] == '%') {
469            strval[len - 1] = 0;
470            if ((strval[0] == '+') || (strval[0] == '-')) {
471                (*val) = 1000 + (picoos_atoi((picoos_char*)strval) * 10);
472            } else {
473                (*val) = picoos_atoi((picoos_char*)strval) * 10;
474            }
475            rel = TRUE;
476        }
477    }
478    return rel;
479}
480
481
482static void tok_putItem (picodata_ProcessingUnit this,  tok_subobj_t * tok,
483                         picoos_uint8 itemType, picoos_uint8 info1, picoos_uint8 info2,
484                         picoos_uint16 val,
485                         picoos_uchar str[])
486{
487    picoos_int32 len, i;
488
489    if ((itemType == PICODATA_ITEM_CMD) && (info1 == PICODATA_ITEMINFO1_CMD_FLUSH)) {
490        tok->outBuf[tok->outWritePos++] = itemType;
491        tok->outBuf[tok->outWritePos++] = info1;
492        tok->outBuf[tok->outWritePos++] = info2;
493        tok->outBuf[tok->outWritePos++] = 0;
494    }
495    else if (tok->ignLevel <= 0) {
496        switch (itemType) {
497        case PICODATA_ITEM_CMD:
498            switch (info1) {
499            case PICODATA_ITEMINFO1_CMD_CONTEXT:
500            case PICODATA_ITEMINFO1_CMD_VOICE:
501            case PICODATA_ITEMINFO1_CMD_MARKER:
502            case PICODATA_ITEMINFO1_CMD_PLAY:
503            case PICODATA_ITEMINFO1_CMD_SAVE:
504            case PICODATA_ITEMINFO1_CMD_UNSAVE:
505            case PICODATA_ITEMINFO1_CMD_PROSDOMAIN:
506            case PICODATA_ITEMINFO1_CMD_PHONEME:
507                len = picoos_strlen((picoos_char*)str);
508                if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) {
509                    tok->outBuf[tok->outWritePos++] = itemType;
510                    tok->outBuf[tok->outWritePos++] = info1;
511                    tok->outBuf[tok->outWritePos++] = info2;
512                    tok->outBuf[tok->outWritePos++] = len;
513                    for (i=0; i<len; i++) {
514                        tok->outBuf[tok->outWritePos++] = str[i];
515                    }
516                }
517                else {
518                    PICODBG_WARN(("tok_putItem: output buffer too small"));
519                }
520                break;
521            case PICODATA_ITEMINFO1_CMD_IGNSIG:
522            case PICODATA_ITEMINFO1_CMD_IGNORE:
523                if (tok->outWritePos + 4 < OUT_BUF_SIZE) {
524                    tok->outBuf[tok->outWritePos++] = itemType;
525                    tok->outBuf[tok->outWritePos++] = info1;
526                    tok->outBuf[tok->outWritePos++] = info2;
527                    tok->outBuf[tok->outWritePos++] = 0;
528                }
529                else {
530                    PICODBG_WARN(("tok_putItem: output buffer too small"));
531                }
532                break;
533            case PICODATA_ITEMINFO1_CMD_SPEED:
534            case PICODATA_ITEMINFO1_CMD_PITCH:
535            case PICODATA_ITEMINFO1_CMD_VOLUME:
536            case PICODATA_ITEMINFO1_CMD_SPELL:
537            case PICODATA_ITEMINFO1_CMD_SIL:
538            case PICODATA_ITEMINFO1_CMD_SPEAKER:
539                if (tok->outWritePos + 4 + 2 < OUT_BUF_SIZE) {
540                    tok->outBuf[tok->outWritePos++] = itemType;
541                    tok->outBuf[tok->outWritePos++] = info1;
542                    tok->outBuf[tok->outWritePos++] = info2;
543                    tok->outBuf[tok->outWritePos++] = 2;
544                    tok->outBuf[tok->outWritePos++] = val % 256;
545                    tok->outBuf[tok->outWritePos++] = val / 256;
546                }
547                else {
548                    PICODBG_WARN(("tok_putItem: output buffer too small"));
549                }
550                break;
551            default:
552                PICODBG_WARN(("tok_putItem: unknown command type"));
553            }
554            break;
555        case PICODATA_ITEM_TOKEN:
556            len = picoos_strlen((picoos_char*)str);
557            if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) {
558                tok->outBuf[tok->outWritePos++] = itemType;
559                tok->outBuf[tok->outWritePos++] = info1;
560                tok->outBuf[tok->outWritePos++] = info2;
561                tok->outBuf[tok->outWritePos++] = len;
562                for (i=0; i<len; i++) {
563                    tok->outBuf[tok->outWritePos++] = str[i];
564                }
565            }
566            else {
567                PICODBG_WARN(("tok_putItem: output buffer too small"));
568            }
569            break;
570        default:
571            PICODBG_WARN(("tok_putItem: unknown item type"));
572        }
573    }
574}
575
576
577static void tok_putItem2 (picodata_ProcessingUnit this,  tok_subobj_t * tok,
578                          picoos_uint8 type,
579                          picoos_uint8 info1, picoos_uint8 info2,
580                          picoos_uint8 len,
581                          picoos_uint8 data[])
582{
583    picoos_int32 i;
584
585    if (is_valid_itemtype(type)) {
586        tok->outBuf[tok->outWritePos++] = type;
587        tok->outBuf[tok->outWritePos++] = info1;
588        tok->outBuf[tok->outWritePos++] = info2;
589        tok->outBuf[tok->outWritePos++] = len;
590        for (i=0; i<len; i++) {
591            tok->outBuf[tok->outWritePos++] = data[i];
592        }
593    }
594}
595
596
597static MarkupId tok_markupTagId (picoos_uchar tagId[])
598{
599    if (picoos_strstr(tagId,(picoos_char *)"svox:") == (picoos_char *)tagId) {
600        tagId+=5;
601    }
602    if (tok_strEqual(tagId, TOK_MARKUP_KW_IGNORE)) {
603        return MIIgnore;
604    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEED)) {
605        return MISpeed;
606    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PITCH)) {
607        return MIPitch;
608    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOLUME)) {
609        return MIVolume;
610    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEAKER)) {
611        return MISpeaker;
612    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOICE)) {
613        return MIVoice;
614    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_CONTEXT)) {
615        return MIPreprocContext;
616    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_MARK)) {
617        return MIMarker;
618    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PLAY)) {
619        return MIPlay;
620    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_USESIG)) {
621        return MIUseSig;
622    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_GENFILE)) {
623        return MIGenFile;
624    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SENTENCE) || tok_strEqual(tagId, TOK_MARKUP_KW_S)) {
625        return MISentence;
626    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PARAGRAPH) || tok_strEqual(tagId, TOK_MARKUP_KW_P)) {
627        return MIParagraph;
628    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_BREAK)) {
629        return MIBreak;
630    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPELL)) {
631        return MISpell;
632    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PHONEME)) {
633        return MIPhoneme;
634    } else if (tok_strEqual(tagId, TOK_MARKUP_KW_ITEM)) {
635        return MIItem;
636    } else {
637        return MIDummyEnd;
638    }
639}
640
641
642static void tok_checkLimits (picodata_ProcessingUnit this, picoos_uint32 * value, picoos_uint32 min, picoos_uint32 max, picoos_uchar valueType[])
643{
644    if ((((*value) < min) || ((*value) > max))) {
645        picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %i for %s", *value, valueType);
646        if (((*value) < min)) {
647            (*value) = min;
648        } else if (((*value) > max)) {
649            (*value) = max;
650        }
651    }
652}
653
654
655
656/*
657
658static void tok_checkRealLimits (picodata_ProcessingUnit this, picoos_single * value, picoos_single min, picoos_single max, picoos_uchar valueType[])
659{
660    if ((((*value) < min) || ((*value) > max))) {
661          picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %f for %s", *value, valueType);
662        if (((*value) < min)) {
663            (*value) = min;
664        } else if (((*value) > max)) {
665            (*value) = max;
666        }
667    }
668}
669*/
670
671#define VAL_STR_LEN 21
672
673static void tok_interpretMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_bool isStartTag, MarkupId mId)
674{
675    picoos_bool done;
676    picoos_int32 ival;
677    picoos_uint32 uval;
678    picoos_int32 ival2;
679    picoos_uchar valStr[VAL_STR_LEN];
680    picoos_uchar valStr2[VAL_STR_LEN];
681    picoos_uchar valStr3[VAL_STR_LEN];
682    picoos_int32 i2;
683    picoos_uint32 dur;
684    picoos_bool done1;
685    picoos_bool paramFound;
686    picoos_uint8 type, info1, info2;
687    picoos_uint8 data[256];
688    picoos_int32 pos, n, len;
689    picoos_uchar part[10];
690
691    done = FALSE;
692    switch (mId) {
693        case MIIgnore:
694            if (isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
695                tok_startIgnore(tok);
696                done = TRUE;
697            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
698                tok_endIgnore(tok);
699                done = TRUE;
700            }
701            break;
702        case MISpeed:
703            if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
704                if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
705                    tok_checkLimits(this, & uval, PICO_SPEED_FACTOR_MIN, PICO_SPEED_FACTOR_MAX,(picoos_uchar*)"relative speed factor");
706                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
707                } else {
708                    uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
709                    tok_checkLimits(this, & uval, PICO_SPEED_MIN, PICO_SPEED_MAX,(picoos_uchar*)"speed");
710                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
711                }
712                done = TRUE;
713            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
714                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEED_DEFAULT, (picoos_uchar*)"");
715                done = TRUE;
716            }
717            break;
718        case MIPitch:
719            if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
720                if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
721                    tok_checkLimits(this, & uval,PICO_PITCH_FACTOR_MIN,PICO_PITCH_FACTOR_MAX, (picoos_uchar*)"relative pitch factor");
722                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
723                } else {
724                    uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
725                    tok_checkLimits(this, & uval,PICO_PITCH_MIN,PICO_PITCH_MAX, (picoos_uchar*)"pitch");
726                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
727                }
728                done = TRUE;
729            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
730                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_PITCH_DEFAULT, (picoos_uchar*)"");
731                done = TRUE;
732            }
733            break;
734        case MIVolume:
735            if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
736                if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
737                    tok_checkLimits(this, & uval, PICO_VOLUME_FACTOR_MIN, PICO_VOLUME_FACTOR_MAX, (picoos_uchar*)"relative volume factor");
738                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
739                } else {
740                    uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
741                    tok_checkLimits(this, & uval, PICO_VOLUME_MIN, PICO_VOLUME_MAX, (picoos_uchar*)"volume");
742                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
743                }
744                done = TRUE;
745            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
746                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_VOLUME_DEFAULT, (picoos_uchar*)"");
747                done = TRUE;
748            }
749            break;
750        case MISpeaker:
751            if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
752                if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
753                    tok_checkLimits(this, & uval, PICO_SPEAKER_FACTOR_MIN, PICO_SPEAKER_FACTOR_MAX, (picoos_uchar*)"relative speaker factor");
754                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
755                } else {
756                    uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
757                    tok_checkLimits(this, & uval, PICO_SPEAKER_MIN, PICO_SPEAKER_MAX, (picoos_uchar*)"volume");
758                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
759                }
760                done = TRUE;
761            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
762                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEAKER_DEFAULT, (picoos_uchar*)"");
763                done = TRUE;
764            }
765            break;
766
767        case MIVoice:
768            if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
769                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
770                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
771                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)"");
772                done = TRUE;
773            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
774                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
775                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
776                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)"");
777                done = TRUE;
778            }
779            break;
780        case MIPreprocContext:
781            if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
782                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
783                done = TRUE;
784            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
785                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, PICO_CONTEXT_DEFAULT);
786                done = TRUE;
787            }
788            break;
789        case MIMarker:
790            if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
791                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_MARKER, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
792                done = TRUE;
793            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
794                done = TRUE;
795            }
796            break;
797        case MISentence:
798            if (isStartTag) {
799                tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound);
800                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
801                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, valStr);
802                done = TRUE;
803            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
804                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
805                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, (picoos_uchar*)"");
806                done = TRUE;
807            }
808            break;
809        case MIParagraph:
810            if (isStartTag) {
811                tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound);
812                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
813                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, valStr);
814                done = TRUE;
815            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
816                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
817                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, PARAGRAPH_PAUSE_DUR, (picoos_uchar*)"");
818                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, (picoos_uchar*)"");
819                done = TRUE;
820            }
821            break;
822        case MIBreak:
823            if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWTime)) {
824                tok_getDur(tok->markupParams[0].paramVal, & dur, & done1);
825                tok_checkLimits (this, &dur, 0, 65535, (picoos_uchar*)"time");
826                if (done1) {
827                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, dur, (picoos_uchar*)"");
828                    done = TRUE;
829                }
830            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
831                done = TRUE;
832            }
833            break;
834        case MISpell:
835            if (isStartTag) {
836                if (tok_strEqual(tok->markupParams[0].paramId, KWMode)) {
837                    if (tok_strEqual(tok->markupParams[0].paramVal, KWPB)) {
838                        uval = SPELL_WITH_PHRASE_BREAK;
839                    } else if (tok_strEqual(tok->markupParams[0].paramVal, KWSB)) {
840                        uval = SPELL_WITH_SENTENCE_BREAK;
841                    } else {
842                        tok_getDur(tok->markupParams[0].paramVal, & uval, & done1);
843                        tok_checkLimits (this, & uval, 0, 65535, (picoos_uchar*)"time");
844                        if (done1) {
845                            done = TRUE;
846                        }
847                    }
848                } else {
849                    uval = SPELL_WITH_PHRASE_BREAK;
850                }
851                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_START, uval, (picoos_uchar*)"");
852                done = TRUE;
853            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
854                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)"");
855                done = TRUE;
856            }
857            break;
858        case MIGenFile:
859            if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
860                if (tok->saveFile[0] != 0) {
861                   tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE,
862                               picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, tok->saveFile);
863                   tok->saveFile[0] = 0;
864                }
865                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SAVE,
866                            picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal,  /*input*/FALSE), 0, tok->markupParams[0].paramVal);
867                picoos_strcpy((picoos_char*)tok->saveFile, (picoos_char*)tok->markupParams[0].paramVal);
868                done = TRUE;
869            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
870                if (tok->saveFile[0] != 0) {
871                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE,
872                                picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, (picoos_uchar*)"");
873                    tok->saveFile[0] = 0;
874                }
875                done = TRUE;
876            }
877            break;
878        case MIPlay:
879            if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
880                if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) {
881                    tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound);
882                    tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound);
883                    tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3,& paramFound);
884                    tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound);
885                    tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound);
886                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY,
887                                picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal);
888                    tok_startIgnore(tok);
889                } else {
890                    if (tok->ignLevel > 0) {
891                        tok_startIgnore(tok);
892                    } else {
893                       picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead\n", tok->markupParams[0].paramVal);
894                    }
895                }
896                done = TRUE;
897            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
898                tok_endIgnore(tok);
899                done = TRUE;
900            }
901            break;
902        case MIUseSig:
903            if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
904                if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) {
905                    tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound);
906                    tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound);
907                    tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3, & paramFound);
908                    tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound);
909                    tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound);
910                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY,
911                                picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal);
912                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_START, 0, (picoos_uchar*)"");
913                } else {
914                    if (tok->ignLevel <= 0) {
915                        picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead", tok->markupParams[0].paramVal);
916                    }
917                }
918                done = TRUE;
919            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
920                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)"");
921                done = TRUE;
922            }
923            break;
924        case MIPhoneme:
925            i2 = 0;
926            if (isStartTag) {
927                if (tok_strEqual(tok->markupParams[0].paramId, KWAlphabet) && tok_strEqual(tok->markupParams[1].paramId, KWPH)) {
928                    if (tok_strEqual(tok->markupParams[2].paramId, KWOrthMode)
929                        && tok_strEqual(tok->markupParams[2].paramVal, KWIgnorePunct)) {
930                        i2 = 1;
931                    }
932                    if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[1].paramVal, tok->markupParams[0].paramVal, tok->phonemes, sizeof(tok->phonemes)-1) == PICO_OK) {
933                        tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
934                            PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes);
935                        done = TRUE;
936                    } else {
937                        PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal));
938                        picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal);
939                        done = TRUE;
940                    }
941                } else if (tok_strEqual(tok->markupParams[0].paramId, KWPH)) {
942                    if (tok_strEqual(tok->markupParams[1].paramId, KWOrthMode)
943                        && tok_strEqual(tok->markupParams[1].paramVal, KWIgnorePunct)) {
944                        i2 = 1;
945                    }
946                    if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[0].paramVal, PICODATA_XSAMPA, tok->phonemes, sizeof(tok->phonemes)) == PICO_OK) {
947                        tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
948                            PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes);
949                        done = TRUE;
950                    }
951                    else {
952                        PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal));
953                        picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizing text instead", tok->markupParams[0].paramVal);
954                        done = TRUE;
955                    }
956                }
957            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
958                tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
959                    PICODATA_ITEMINFO2_CMD_END, i2, (picoos_uchar*)"");
960                done = TRUE;
961            }
962            break;
963        case MIItem:
964            if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWType) &&
965                              tok_strEqual(tok->markupParams[1].paramId, KWInfo1)&&
966                              tok_strEqual(tok->markupParams[2].paramId, KWInfo2)&&
967                              tok_strEqual(tok->markupParams[3].paramId, KWDATA)) {
968                  picoos_int32 len2, n2;
969                  type = picoos_atoi(tok->markupParams[0].paramVal);
970                  info1 = picoos_atoi(tok->markupParams[1].paramVal);
971                  info2 = picoos_atoi(tok->markupParams[2].paramVal);
972                  n = 0; n2 = 0;
973                  len2 = (picoos_int32)picoos_strlen(tok->markupParams[3].paramVal);
974                  while (n<len2) {
975                      while ((tok->markupParams[3].paramVal[n] != 0) && (tok->markupParams[3].paramVal[n] <= 32)) {
976                          n++;
977                      }
978                      tok->markupParams[3].paramVal[n2] = tok->markupParams[3].paramVal[n];
979                      n++;
980                      n2++;
981                  }
982                  if (is_valid_itemtype(type)) {
983                      done = TRUE;
984                      len = 0;
985                      pos = 0;
986                      picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal),
987                                          &pos, ',', part, 10, &done1);
988                      while (done && done1) {
989                          n = picoos_atoi(part);
990                          if ((n>=0) && (n<256) && (len<256)) {
991                              data[len++] = n;
992                          }
993                          else {
994                              done = FALSE;
995                          }
996                          picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal),
997                                          &pos, ',', part, 10, &done1);
998                      }
999                      if (done) {
1000                          tok_putItem2(this, tok, type, info1, info2, len, data);
1001                      }
1002                  }
1003                  else {
1004                      done = FALSE;
1005                  }
1006            } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
1007                done = TRUE;
1008            }
1009            break;
1010    default:
1011        break;
1012    }
1013    if (!done) {
1014        tok->markupTagErr = MEInterprete;
1015    }
1016    if (isStartTag) {
1017        tok->markupLevel[mId]++;
1018    } else if ((tok->markupLevel[mId] > 0)) {
1019        tok->markupLevel[mId]--;
1020    }
1021}
1022
1023
1024static picoos_bool tok_attrChar (picoos_uchar ch, picoos_bool first)
1025{
1026    return ((((ch >= (picoos_uchar)'A') && (ch <= (picoos_uchar)'Z')) ||
1027             ((ch >= (picoos_uchar)'a') && (ch <= (picoos_uchar)'z'))) ||
1028             ( !(first) && ((ch >= (picoos_uchar)'0') && (ch <= (picoos_uchar)'9'))));
1029}
1030
1031
1032
1033static picoos_bool tok_idChar (picoos_uchar ch, picoos_bool first)
1034{
1035    return tok_attrChar(ch, first) || ( !(first) && (ch == (picoos_uchar)':'));
1036}
1037
1038
1039static void tok_setIsFileAttr (picoos_uchar name[], picoos_bool * isFile)
1040{
1041    (*isFile) = tok_strEqual(name, KWFile);
1042}
1043
1044/* *****************************************************************************/
1045
1046static void tok_putToSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[], pico_tokenType type, pico_tokenSubType subtype)
1047{
1048    int i, len;
1049
1050    if (str[0] != 0) {
1051        len = picoos_strlen((picoos_char*)str);
1052        for (i = 0; i < len; i++) {
1053            if (tok->tokenPos >= IN_BUF_SIZE) {
1054                picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT, (picoos_char*)"", (picoos_char*)"simple token too long; forced treatment");
1055                tok_treatSimpleToken(this, tok);
1056            }
1057            tok->tokenStr[tok->tokenPos] = str[i];
1058            tok->tokenPos++;
1059        }
1060    }
1061    tok->tokenType = type;
1062    tok->tokenSubType = subtype;
1063}
1064
1065
1066static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[])
1067{
1068    picoos_int32 i, len;
1069    picoos_uint8 ok;
1070
1071    tok->markupTagErr = MENone;
1072    len = picoos_strlen((picoos_char*)str);
1073    for (i = 0; i< len; i++) {
1074        if (tok->markupPos >= (MARKUP_STRING_BUF_SIZE - 1)) {
1075            if ((tok->markupPos == (MARKUP_STRING_BUF_SIZE - 1)) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) {
1076                picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"markup tag too long");
1077            }
1078            tok->markupState = MSErrorTooLong;
1079        } else if ((str[i] == (picoos_uchar)' ') && ((tok->markupState == MSExpectingmarkupTagName) || (tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSGotAttrName) || (tok->markupState == MSGotEqual) || (tok->markupState == MSGotAttrValue))) {
1080        } else if ((str[i] == (picoos_uchar)'>') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) {
1081            tok->markupState = MSGotEnd;
1082        } else if ((str[i] == (picoos_uchar)'/') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) {
1083            if (tok->markupTagType == MTEnd) {
1084                tok->markupTagErr = MEUnexpectedChar;
1085                tok->markupState = MSError;
1086            } else {
1087                tok->markupTagType = MTEmpty;
1088                tok->markupState = MSGotEndSlash;
1089            }
1090        } else {
1091            switch (tok->markupState) {
1092                case MSNotInMarkup:
1093                    if (str[i] == (picoos_uchar)'<') {
1094                        tok_clearMarkupParams(tok->markupParams);
1095                        tok->nrMarkupParams = 0;
1096                        tok->strPos = 0;
1097                        tok->markupTagType = MTStart;
1098                        tok->markupState = MSGotStart;
1099                    } else {
1100                        tok->markupTagErr = MEMissingStart;
1101                        tok->markupState = MSError;
1102                    }
1103                    break;
1104                case MSGotStart:
1105                    if (str[i] == (picoos_uchar)'/') {
1106                        tok->markupTagType = MTEnd;
1107                        tok->markupState = MSExpectingmarkupTagName;
1108                    } else if (str[i] == (picoos_uchar)' ') {
1109                        tok->markupState = MSExpectingmarkupTagName;
1110                    } else if (tok_idChar(str[i],TRUE)) {
1111                        tok->markupTagType = MTStart;
1112                        tok->markupTagName[tok->strPos] = str[i];
1113                        tok->strPos++;
1114                        tok->markupTagName[tok->strPos] = 0;
1115                        tok->markupState = MSInmarkupTagName;
1116                    } else {
1117                        tok->markupTagErr = MEUnexpectedChar;
1118                        tok->markupState = MSError;
1119                    }
1120                    break;
1121                case MSInmarkupTagName:   case MSExpectingmarkupTagName:
1122                    if (tok_idChar(str[i],tok->markupState == MSExpectingmarkupTagName)) {
1123                        tok->markupTagName[tok->strPos] = str[i];
1124                        tok->strPos++;
1125                        tok->markupTagName[(tok->strPos)] = 0;
1126                        tok->markupState = MSInmarkupTagName;
1127                    } else if ((tok->markupState == MSInmarkupTagName) && (str[i] == (picoos_uchar)' ')) {
1128                        tok->markupState = MSGotmarkupTagName;
1129                        picobase_lowercase_utf8_str(tok->markupTagName, (picoos_char*)tok->markupTagName, IN_BUF_SIZE, &ok);
1130                        tok->strPos = 0;
1131                    } else {
1132                        tok->markupTagErr = MEIdent;
1133                        tok->markupState = MSError;
1134                    }
1135                    break;
1136                case MSGotmarkupTagName:   case MSGotAttrValue:
1137                    if (tok_attrChar(str[i], TRUE)) {
1138                        if (tok->markupTagType == MTEnd) {
1139                            tok->markupTagErr = MEUnexpectedChar;
1140                            tok->markupState = MSError;
1141                        } else {
1142                            if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1143                                tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i];
1144                                tok->strPos++;
1145                                tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0;
1146                            } else {
1147                                picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"too many attributes in markup; ignoring");
1148                            }
1149                            tok->markupState = MSInAttrName;
1150                        }
1151                    } else {
1152                        tok->markupTagErr = MEUnexpectedChar;
1153                        tok->markupState = MSError;
1154                    }
1155                    break;
1156                case MSInAttrName:
1157                    if (tok_attrChar(str[i], FALSE)) {
1158                        if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1159                            tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i];
1160                            tok->strPos++;
1161                            tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0;
1162                        }
1163                        tok->markupState = MSInAttrName;
1164                    } else if (str[i] == (picoos_uchar)' ') {
1165                        picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok);
1166                        tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr);
1167                        tok->markupState = MSGotAttrName;
1168                    } else if (str[i] == (picoos_uchar)'=') {
1169                        picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok);
1170                        tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr);
1171                        tok->markupState = MSGotEqual;
1172                    } else {
1173                        tok->markupTagErr = MEMissingEqual;
1174                        tok->markupState = MSError;
1175                    }
1176                    break;
1177                case MSGotAttrName:
1178                    if (str[i] == (picoos_uchar)'=') {
1179                        tok->markupState = MSGotEqual;
1180                    } else {
1181                        tok->markupTagErr = MEMissingEqual;
1182                        tok->markupState = MSError;
1183                    }
1184                    break;
1185                case MSGotEqual:
1186                    if ((str[i] == (picoos_uchar)'"') || (str[i] == (picoos_uchar)'\'')) {
1187                        tok->strDelim = str[i];
1188                        tok->strPos = 0;
1189                        tok->markupState = MSInAttrValue;
1190                    } else {
1191                        tok->markupTagErr = MEMissingQuote;
1192                        tok->markupState = MSError;
1193                    }
1194                    break;
1195                case MSInAttrValue:
1196                    if (!(tok->isFileAttr) && (str[i] == (picoos_uchar)'\\')) {
1197                        tok->markupState = MSInAttrValueEscaped;
1198                    } else if (str[i] == tok->strDelim) {
1199                        if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1200                            tok->nrMarkupParams++;
1201                        }
1202                        tok->strPos = 0;
1203                        tok->markupState = MSGotAttrValue;
1204                    } else {
1205                        if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1206                            tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i];
1207                            tok->strPos++;
1208                            tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0;
1209                        }
1210                        tok->markupState = MSInAttrValue;
1211                    }
1212                    break;
1213                case MSInAttrValueEscaped:
1214                    if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1215                        tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i];
1216                        tok->strPos++;
1217                        tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0;
1218                    }
1219                    tok->markupState = MSInAttrValue;
1220                    break;
1221                case MSGotEndSlash:
1222                    if (str[i] == (picoos_uchar)'>') {
1223                        tok->markupState = MSGotEnd;
1224                    } else {
1225                        tok->markupTagErr = MEUnexpectedChar;
1226                        tok->markupState = MSError;
1227                    }
1228                    break;
1229            default:
1230                tok->markupTagErr = MEUnexpectedChar;
1231                tok->markupState = MSError;
1232                break;
1233            }
1234        }
1235        if (tok->markupTagErr == MENone) {
1236            tok->markupStr[tok->markupPos] = str[i];
1237            tok->markupPos++;
1238        } /* else restart parsing at current char */
1239        tok->markupStr[tok->markupPos] = 0;
1240    }
1241    /*
1242    PICODBG_DEBUG(("putToMarkup %s", tok->markupStr));
1243    */
1244}
1245
1246/* *****************************************************************************/
1247
1248static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok)
1249{
1250    picoos_int32 i;
1251
1252    tok->utfpos = 0;
1253    tok->utflen = 0;
1254    tok->markupState = MSNotInMarkup;
1255    for (i = 0; i < tok->markupPos; i++) {
1256        tok_treatChar(this, tok, tok->markupStr[i], FALSE);
1257    }
1258    tok->markupPos = 0;
1259    tok->strPos = 0;
1260}
1261
1262
1263static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok)
1264{
1265    MarkupId mId;
1266
1267    if (tok_markupTagId(tok->markupTagName) != MIDummyEnd) {
1268        if (tok->markupTagErr == MENone) {
1269            tok->markupState = MSNotInMarkup;
1270            if ((tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_SPACE) && (tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED)) {
1271                tok_treatSimpleToken(this, tok);
1272            }
1273            tok_putToSimpleToken(this, tok, (picoos_uchar*)" ", PICODATA_ITEMINFO1_TOKTYPE_SPACE, -1);
1274            mId = tok_markupTagId(tok->markupTagName);
1275            if ((tok->markupTagType == MTStart) || (tok->markupTagType == MTEmpty)) {
1276                tok_interpretMarkup(this, tok, TRUE, mId);
1277            }
1278            if (((tok->markupTagType == MTEnd) || (tok->markupTagType == MTEmpty))) {
1279                tok_clearMarkupParams(tok->markupParams);
1280                tok->nrMarkupParams = 0;
1281                tok_interpretMarkup(this, tok, FALSE,mId);
1282            }
1283        }
1284        if (tok->markupTagErr != MENone) {
1285            if (!tok->aborted) {
1286              picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"syntax error in markup token '%s'",tok->markupStr);
1287            }
1288            tok_treatMarkupAsSimpleToken(this, tok);
1289        }
1290    } else {
1291        tok_treatMarkupAsSimpleToken(this, tok);
1292    }
1293    tok->markupState = MSNotInMarkup;
1294    tok->markupPos = 0;
1295    tok->strPos = 0;
1296}
1297
1298
1299
1300static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling)
1301{
1302    picoos_int32 i, id;
1303    picoos_uint8 uval8;
1304    pico_tokenType type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1305    pico_tokenSubType subtype = -1;
1306    picoos_bool dummy;
1307    utf8char0c utf2;
1308    picoos_int32 utf2pos;
1309
1310    if (ch == NULLC) {
1311      tok_treatSimpleToken(this, tok);
1312      tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
1313    }
1314    else {
1315      switch (tok_putToUtf(tok, ch)) {
1316        case UTF_CHAR_MALFORMED:
1317            tok->utfpos = 0;
1318            tok->utflen = 0;
1319            break;
1320        case UTF_CHAR_INCOMPLETE:
1321            break;
1322        case UTF_CHAR_COMPLETE:
1323            markupHandling = (markupHandling && (tok->markupHandlingMode == MARKUP_HANDLING_ENABLED));
1324            id = picoktab_graphOffset(tok->graphTab, tok->utf);
1325            if (id > 0) {
1326                if (picoktab_getIntPropTokenType(tok->graphTab, id, &uval8)) {
1327                    type = (pico_tokenType)uval8;
1328                    if (type == PICODATA_ITEMINFO1_TOKTYPE_LETTERV) {
1329                        type = PICODATA_ITEMINFO1_TOKTYPE_LETTER;
1330                    }
1331                }
1332                dummy = picoktab_getIntPropTokenSubType(tok->graphTab, id, &subtype);
1333            } else if (tok->utf[tok->utfpos-1] <= (picoos_uchar)' ') {
1334                type = PICODATA_ITEMINFO1_TOKTYPE_SPACE;
1335                subtype =  -1;
1336            } else {
1337                type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1338                subtype =  -1;
1339            }
1340            if ((tok->utf[tok->utfpos-1] > (picoos_uchar)' ')) {
1341                tok->nrEOL = 0;
1342            } else if ((tok->utf[tok->utfpos-1] == EOL)) {
1343                tok->nrEOL++;
1344            }
1345            if (markupHandling && (tok->markupState != MSNotInMarkup)) {
1346                tok_putToMarkup(this, tok, tok->utf);
1347                if (tok->markupState >= MSError) {
1348                    picoos_strlcpy(utf2, tok->utf, 5);
1349                    utf2pos = tok->utfpos;
1350                    /* treat string up to (but not including) current char as simple
1351                       token and restart markup tag parsing with current char */
1352                    tok_treatMarkupAsSimpleToken(this, tok);
1353                    for (i = 0; i < utf2pos; i++) {
1354                        tok_treatChar(this, tok, utf2[i], markupHandling);
1355                    }
1356                } else if (tok->markupState == MSGotEnd) {
1357                    tok_treatMarkup(this, tok);
1358                }
1359            } else if ((markupHandling && (tok->utf[tok->utfpos-1] == (picoos_uchar)'<'))) {
1360                tok_putToMarkup(this, tok, tok->utf);
1361            } else if (type != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED) {
1362                if ((type != tok->tokenType) || (type == PICODATA_ITEMINFO1_TOKTYPE_CHAR) || (subtype != tok->tokenSubType)) {
1363                    tok_treatSimpleToken(this, tok);
1364                } else if ((tok->utf[tok->utfpos-1] == EOL) && (tok->nrEOL == 2)) {
1365                    tok_treatSimpleToken(this, tok);
1366                    tok_putToSimpleToken(this, tok, (picoos_uchar*)".", PICODATA_ITEMINFO1_TOKTYPE_CHAR, -1);
1367                    tok_treatSimpleToken(this, tok);
1368                }
1369                tok_putToSimpleToken(this, tok, tok->utf, type, subtype);
1370            } else {
1371                tok_treatSimpleToken(this, tok);
1372            }
1373            tok->utfpos = 0;
1374            tok->utflen = 0;
1375            break;
1376      }
1377    }
1378}
1379
1380
1381static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok)
1382{
1383    if (tok->tokenPos < IN_BUF_SIZE) {
1384        tok->tokenStr[tok->tokenPos] = 0;
1385    }
1386    if (tok->markupState != MSNotInMarkup) {
1387        if (!(tok->aborted) && (tok->markupState >= MSGotmarkupTagName) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) {
1388            picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"unfinished markup tag '%s'",tok->markupStr);
1389        }
1390        tok_treatMarkupAsSimpleToken(this, tok);
1391        tok_treatSimpleToken(this, tok);
1392    } else if ((tok->tokenPos > 0) && ((tok->ignLevel <= 0) || (tok->tokenType == PICODATA_ITEMINFO1_TOKTYPE_SPACE))) {
1393        tok_putItem(this, tok, PICODATA_ITEM_TOKEN, tok->tokenType, (picoos_uint8)tok->tokenSubType, 0, tok->tokenStr);
1394    }
1395    tok->tokenPos = 0;
1396    tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1397    tok->tokenSubType =  -1;
1398}
1399
1400/* *****************************************************************************/
1401
1402static pico_status_t tokReset(register picodata_ProcessingUnit this, picoos_int32 resetMode)
1403{
1404    tok_subobj_t * tok;
1405    MarkupId mId;
1406
1407    if (NULL == this || NULL == this->subObj) {
1408        return PICO_ERR_OTHER;
1409    }
1410    tok = (tok_subobj_t *) this->subObj;
1411
1412    tok->ignLevel = 0;
1413
1414    tok->utfpos = 0;
1415    tok->utflen = 0;
1416
1417    tok_clearMarkupParams(tok->markupParams);
1418    tok->nrMarkupParams = 0;
1419    tok->markupState = MSNotInMarkup;
1420    tok->markupPos = 0;
1421    for (mId = MIDummyStart; mId <= MIDummyEnd; mId++) {
1422        tok->markupLevel[mId] = 0;
1423    }
1424    tok->markupTagName[0] = 0;
1425    tok->markupTagType = MTNone;
1426    tok->markupTagErr = MENone;
1427
1428    tok->strPos = 0;
1429    tok->strDelim = 0;
1430    tok->isFileAttr = FALSE;
1431
1432    tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1433    tok->tokenSubType =  -1;
1434    tok->tokenPos = 0;
1435
1436    tok->nrEOL = 0;
1437
1438
1439    tok->markupHandlingMode = TRUE;
1440    tok->aborted = FALSE;
1441
1442    tok->start = TRUE;
1443
1444    tok->outReadPos = 0;
1445    tok->outWritePos = 0;
1446
1447    tok->saveFile[0] = 0;
1448
1449
1450    tok->graphTab = picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]);
1451
1452    tok->xsampa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA_PARSE]);
1453    PICODBG_TRACE(("got xsampa_parser @ %i",tok->xsampa_parser));
1454
1455    tok->svoxpa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_SVOXPA_PARSE]);
1456    PICODBG_TRACE(("got svoxpa_parser @ %i",tok->svoxpa_parser));
1457
1458    tok->xsampa2svoxpa_mapper = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA2SVOXPA]);
1459    PICODBG_TRACE(("got xsampa2svoxpa_mapper @ %i",tok->xsampa2svoxpa_mapper));
1460
1461
1462
1463    return PICO_OK;
1464}
1465
1466static pico_status_t tokInitialize(register picodata_ProcessingUnit this, picoos_int32 resetMode)
1467{
1468/*
1469
1470    tok_subobj_t * tok;
1471
1472    if (NULL == this || NULL == this->subObj) {
1473        return PICO_ERR_OTHER;
1474    }
1475    tok = (tok_subobj_t *) this->subObj;
1476*/
1477    return tokReset(this, resetMode);
1478}
1479
1480
1481static pico_status_t tokTerminate(register picodata_ProcessingUnit this)
1482{
1483    return PICO_OK;
1484}
1485
1486static picodata_step_result_t tokStep(register picodata_ProcessingUnit this, picoos_int16 mode, picoos_uint16 * numBytesOutput);
1487
1488static pico_status_t tokSubObjDeallocate(register picodata_ProcessingUnit this,
1489        picoos_MemoryManager mm)
1490{
1491
1492    if (NULL != this) {
1493        picoos_deallocate(this->common->mm, (void *) &this->subObj);
1494    }
1495    mm = mm;        /* avoid warning "var not used in this function"*/
1496    return PICO_OK;
1497}
1498
1499picodata_ProcessingUnit picotok_newTokenizeUnit(picoos_MemoryManager mm, picoos_Common common,
1500        picodata_CharBuffer cbIn, picodata_CharBuffer cbOut,
1501        picorsrc_Voice voice)
1502{
1503    tok_subobj_t * tok;
1504    picodata_ProcessingUnit this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
1505    if (this == NULL) {
1506        return NULL;
1507    }
1508    this->initialize = tokInitialize;
1509    PICODBG_DEBUG(("set this->step to tokStep"));
1510    this->step = tokStep;
1511    this->terminate = tokTerminate;
1512    this->subDeallocate = tokSubObjDeallocate;
1513    this->subObj = picoos_allocate(mm, sizeof(tok_subobj_t));
1514    if (this->subObj == NULL) {
1515        picoos_deallocate(mm, (void *)&this);
1516        return NULL;
1517    }
1518    tok = (tok_subobj_t *) this->subObj;
1519    tok->transducer = picotrns_newSimpleTransducer(mm, common, 10*(PICOTRNS_MAX_NUM_POSSYM+2));
1520    if (NULL == tok->transducer) {
1521        tokSubObjDeallocate(this,mm);
1522        picoos_deallocate(mm, (void *)&this);
1523        return NULL;
1524    }
1525    tokInitialize(this, PICO_RESET_FULL);
1526    return this;
1527}
1528
1529/**
1530 * fill up internal buffer, try to locate token, write token to output
1531 */
1532picodata_step_result_t tokStep(register picodata_ProcessingUnit this,
1533        picoos_int16 mode, picoos_uint16 * numBytesOutput)
1534{
1535    register tok_subobj_t * tok;
1536
1537    if (NULL == this || NULL == this->subObj) {
1538        return PICODATA_PU_ERROR;
1539    }
1540    tok = (tok_subobj_t *) this->subObj;
1541
1542    mode = mode;        /* avoid warning "var not used in this function"*/
1543
1544    *numBytesOutput = 0;
1545    while (1) { /* exit via return */
1546        picoos_int16 ch;
1547
1548        if ((tok->outWritePos - tok->outReadPos) > 0) {
1549            if (picodata_cbPutItem(this->cbOut, &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos, numBytesOutput) == PICO_OK) {
1550                PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1551                    (picoos_uint8 *)"tok:", &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos);
1552                tok->outReadPos += *numBytesOutput;
1553                if (tok->outWritePos == tok->outReadPos) {
1554                    tok->outWritePos = 0;
1555                    tok->outReadPos = 0;
1556                }
1557            }
1558            else {
1559                return PICODATA_PU_OUT_FULL;
1560            }
1561
1562        }
1563        else if (PICO_EOF != (ch = picodata_cbGetCh(this->cbIn))) {
1564            PICODBG_DEBUG(("read in %c", (picoos_char) ch));
1565            tok_treatChar(this, tok, (picoos_uchar) ch, /*markupHandling*/TRUE);
1566        }
1567        else {
1568            return PICODATA_PU_IDLE;
1569        }
1570    }
1571}
1572
1573#ifdef __cplusplus
1574}
1575#endif
1576
1577/* end */
1578