1b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/*
2b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
4b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Licensed under the Apache License, Version 2.0 (the "License");
5b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * you may not use this file except in compliance with the License.
6b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * You may obtain a copy of the License at
7b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
8b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *     http://www.apache.org/licenses/LICENSE-2.0
9b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
10b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Unless required by applicable law or agreed to in writing, software
11b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * distributed under the License is distributed on an "AS IS" BASIS,
12b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * See the License for the specific language governing permissions and
14b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * limitations under the License.
15b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */
16b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/**
17b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * @file picosa.c
18b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
19b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * sentence analysis - POS disambiguation
20b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
21b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * All rights reserved.
23b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
24b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * History:
25b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * - 2009-04-20 -- initial version
26b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
27b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */
28b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
29b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picoos.h"
30b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picodbg.h"
31b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picobase.h"
32b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picokdt.h"
33b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picoklex.h"
34b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picoktab.h"
35b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picokfst.h"
36b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picotrns.h"
37b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picodata.h"
38b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picosa.h"
39b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
40b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus
41b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenextern "C" {
42b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
43b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if 0
44b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
45b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
46b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
47b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
48b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* PU saStep states */
49b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define SA_STEPSTATE_COLLECT       0
50b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define SA_STEPSTATE_PROCESS_POSD 10
51b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define SA_STEPSTATE_PROCESS_WPHO 11
52b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define SA_STEPSTATE_PROCESS_TRNS_PARSE 12
53b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define SA_STEPSTATE_PROCESS_TRNS_FST 13
54b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define SA_STEPSTATE_FEED          2
55b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
56b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define SA_MAX_ALTDESC_SIZE (30*(PICOTRNS_MAX_NUM_POSSYM + 2))
57b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
58b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define SA_MSGSTR_SIZE 32
59b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
60b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/*  subobject    : SentAnaUnit
61b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *  shortcut     : sa
62b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *  context size : one phrase, max. 30 non-PUNC items, for non-processed items
63b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *                 one item if internal input empty
64b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */
65b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
66b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/** @addtogroup picosa
67b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
68b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  internal buffers:
69b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
70b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - headx: array for extended item heads of fixed size (head plus
71b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    index for content, plus two fields for boundary strength/type)
72b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
73b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - cbuf1, cbuf2: buffers for item contents (referenced by index in
74b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    headx). Future: replace these two buffers by a single double-sided
75b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    buffer (double shrink-grow type)
76b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
77b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  0. bottom up filling of items in headx and cbuf1
78b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
79b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  1. POS disambiguation (right-to-left, top-to-bottom):
80b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - number and sequence of items unchanged
81b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - item content can only get smaller (reducing nr of results in WORDINDEX)
82b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  -> info stays in "headx, cbuf1" and changed in place                      \n
83b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     WORDGRAPH(POSes,NA)graph             -> WORDGRAPH(POS,NA)graph         \n
84b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     WORDINDEX(POSes,NA)POS1ind1...POSNindN  -> WORDINDEX(POS,NA)POS|ind    \n
85b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
86b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  2. lex-index lookup and G2P (both directions possible, left-to-right done):
87b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - number and sequence of items unchanged, item head info and content
88b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    changes
89b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  -> headx changed in place; cbuf1 to cbuf2                                 \n
90b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     WORDGRAPH(POS,NA)graph    -> WORDPHON(POS,NA)phon                      \n
91b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     WORDINDEX(POS,NA)POS|ind  -> WORDPHON(POS,NA)phon                      \n
92b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
93b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  3. phrasing (right-to-left):
94b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
95b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     Previous (before introducing SBEG)\n
96b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     ----------------------------------
97b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                           1|          2|             3|    4|    \n
98b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     e.g. from      WP WP WP       WP WP PUNC  WP WP PUNC  WP WP WP PUNC FLUSH    \n
99b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     e.g. to  BINIT WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND WP WP WP BSEND BTERM   \n
100b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen              |1                         |2          |3             |4            \n
101b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
102b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     3-level bound state: to keep track of bound strength from end of
103b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     previous punc-phrase, then BOUND item output as first item
104b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     (strength from prev punc-phrase and type from current
105b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     punc-phrase).
106b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
107b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     trailing PUNC item       bound states
108b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                              INIT         SEND         PHR1
109b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(SENTEND, T)       B(I,T)>SEND  B(S,T)>SEND  B(P1,T)>SEND
110b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(SENTEND, Q)       B(I,Q)>SEND  B(S,Q)>SEND  B(P1,Q)>SEND
111b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(SENTEND, E)       B(I,E)>SEND  B(S,E)>SEND  B(P1,E)>SEND
112b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(PHRASEEND, P)     B(I,P)>PHR1  B(S,P)>PHR1  B(P1,P)>PHR1
113b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(PHRASEEND, FORC)  B(I,P)>PHR1  B(S,P)>PHR1  B(P1,P)>PHR1
114b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(FLUSH, T)         B(I,T)..     B(S,T)..     B(P1,T)..
115b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                B(T,NA)      B(T,NA)      B(T,NA)
116b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                >INIT        >INIT        >INIT
117b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
118b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     PHR2/3 case:
119b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     trailing PUNC item       bound states
120b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                          INIT              SEND              PHR1
121b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(SENTEND, T)   B(I,P)B(P,T)>SEND B(S,P)B(P,T)>SEND B(P1,P)B(P,T)>SEND
122b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(SENTEND, Q)   B(I,P)B(P,Q)>SEND B(S,P)B(P,Q)>SEND B(P1,P)B(P,Q)>SEND
123b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(SENTEND, E)   B(I,P)B(P,E)>SEND B(S,P)B(P,E)>SEND B(P1,P)B(P,E)>SEND
124b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(PHRASEEND, P) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1
125b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(PHREND, FORC) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1
126b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(FLUSH, T)     B(I,P)B(P,T)..    B(S,T)B(P,T)..    B(P1,T)B(P,T)..
127b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            B(T,NA)             B(T,NA)             B(T,NA)
128b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            >INIT               >INIT               >INIT
129b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
130b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     Current
131b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     --------
132b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     e.g. from      WP WP WP       WP WP PUNC  WP WP PUNC        WP WP WP PUNC  FLUSH
133b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     e.g. to  BSBEG WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND BSBEG WP WP WP BSEND BTERM
134b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen              |1                         |2                |3                   |4
135b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
136b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     2-level bound state: The internal buffer contains one primary phrase (sometimes forced, if buffer
137b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     allmost full), with the trailing PUNCT item included (last item).
138b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     If the trailing PUNC is a a primary phrase separator, the
139b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       item is not output, but instead, the bound state is set to PPHR, so that the correct BOUND can
140b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       be output at the start of the next primary phrase.
141b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     Otherwise,
142b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       the item is converted to the corresponding BOUND and output. the bound state is set to SSEP,
143b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       so that a BOUND of type SBEG is output at the start of the next primary phrase.
144b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
145b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     trailing PUNC item       bound states
146b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                              SSEP           PPHR
147b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(SENTEND, X)       B(B,X)>SSEP    B(P1,X)>SSEP  (X = T | Q | E)
148b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(FLUSH, T)         B(B,T)>SSEP*    B(P1,T)>SSEP
149b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(PHRASEEND, P)     B(B,P)>PPHR    B(P1,P)>PPHR
150b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       PUNC(PHRASEEND, FORC)  B(B,P)>PPHR    B(P1,P)>PPHR
151b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
152b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen*    If more than one sentence separators follow each other (e.g. SEND-FLUSH, SEND-SEND) then
153b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     all but the first will be treated as an (empty) phrase containing just this item.
154b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     If this (single) item is a flush, creation of SBEG is suppressed.
155b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
156b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
157b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - dtphr phrasing tree (rather subphrasing tree it should be called)
158b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    determines
159b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen      BOUND_PHR2
160b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen      BOUND_PHR3
161b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - boundary strenghts are determined for every word (except the
162b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    first one) from right-to-left. The boundary types mark the phrase
163b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    type of the phrase following the boundary.
164b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - number of items actually changed (new BOUND items added): because
165b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    of fixed size without content, two fields are contained in headx
166b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    to indicate if a BOUND needs to be added to the LEFT of the item.
167b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    -> headx further extended with boundary strength and type info to
168b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    indicate that to the left of the headx ele a BOUND needs to be
169b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    inserted when outputting.
170b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
171b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  4. accentuation:
172b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - number of items unchanged, content unchanged, only head info changes
173b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  -> changed in place in headx
174b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen*/
175b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
176b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
177b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chentypedef struct {
178b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picodata_itemhead_t head;
179b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 cind;
180b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} picosa_headx_t;
181b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
182b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
183b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chentypedef struct sa_subobj {
184b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 procState; /* for next processing step decision */
185b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
186b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 inspaceok;      /* flag: headx/cbuf1 has space for an item */
187b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 needsmoreitems; /* flag: need more items */
188b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 phonesTransduced; /* flag: */
189b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
190b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 tmpbuf[PICODATA_MAX_ITEMSIZE];  /* tmp. location for an item */
191b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
192b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picosa_headx_t headx[PICOSA_MAXNR_HEADX];
193b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 headxBottom; /* bottom */
194b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 headxLen;    /* length, 0 if empty */
195b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
196b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 cbuf1[PICOSA_MAXSIZE_CBUF];
197b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 cbuf1BufSize; /* actually allocated size */
198b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 cbuf1Len;     /* length, 0 if empty */
199b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
200b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 cbuf2[PICOSA_MAXSIZE_CBUF];
201b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 cbuf2BufSize; /* actually allocated size */
202b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 cbuf2Len;     /* length, 0 if empty */
203b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
204b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picotrns_possym_t phonBufA[PICOTRNS_MAX_NUM_POSSYM+1];
205b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picotrns_possym_t phonBufB[PICOTRNS_MAX_NUM_POSSYM+1];
206b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picotrns_possym_t * phonBuf;
207b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picotrns_possym_t * phonBufOut;
208b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 phonReadPos, phonWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */
209b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 nextReadPos; /* position of (potential) next item to read from */
210b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
211b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
212b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* buffer for internal calculation of transducer */
213b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picotrns_AltDesc altDescBuf;
214b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* the number of AltDesc in the buffer */
215b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 maxAltDescLen;
216b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
217b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* tab knowledge base */
218b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoktab_Graphs tabgraphs;
219b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoktab_Phones tabphones;
220b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoktab_Pos tabpos;
221b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoktab_FixedIds fixedIds;
222b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
223b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* dtposd knowledge base */
224b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picokdt_DtPosD dtposd;
225b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
226b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* dtg2p knowledge base */
227b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picokdt_DtG2P dtg2p;
228b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
229b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* lex knowledge base */
230b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoklex_Lex lex;
231b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
232b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* ulex knowledge bases */
233b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 numUlex;
234b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoklex_Lex ulex[PICOKNOW_MAX_NUM_ULEX];
235b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
236b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* fst knowledge bases */
237b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 numFsts;
238b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picokfst_FST fst[PICOKNOW_MAX_NUM_WPHO_FSTS];
239b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 curFst; /* the fst to be applied next */
240b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
241b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
242b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} sa_subobj_t;
243b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
244b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
245e9f72c8954f29f10cb4feb16d328a1b5c1fd7169Jean-Michel Trivistatic pico_status_t saInitialize(register picodata_ProcessingUnit this, picoos_int32 resetMode) {
246b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa_subobj_t * sa;
247b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 i;
248b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picokfst_FST fst;
249b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoknow_kb_id_t fstKbIds[PICOKNOW_MAX_NUM_WPHO_FSTS] = PICOKNOW_KBID_WPHO_ARRAY;
250b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoklex_Lex ulex;
251b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoknow_kb_id_t ulexKbIds[PICOKNOW_MAX_NUM_ULEX] = PICOKNOW_KBID_ULEX_ARRAY;
252b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
253b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("calling"));
254b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
255b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (NULL == this || NULL == this->subObj) {
256b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return picoos_emRaiseException(this->common->em,
257b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                       PICO_ERR_NULLPTR_ACCESS, NULL, NULL);
258b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
259b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa = (sa_subobj_t *) this->subObj;
260b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
261b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /*  sa->common = this->common; */
262b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
263b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->procState = SA_STEPSTATE_COLLECT;
264b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
265b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->inspaceok = TRUE;
266b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->needsmoreitems = TRUE;
267b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
268b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->headxBottom = 0;
269b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->headxLen = 0;
270b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->cbuf1BufSize = PICOSA_MAXSIZE_CBUF;
271b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->cbuf2BufSize = PICOSA_MAXSIZE_CBUF;
272b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->cbuf1Len = 0;
273b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->cbuf2Len = 0;
274b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
275b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* init headx, cbuf1, cbuf2 */
276b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (i = 0; i < PICOSA_MAXNR_HEADX; i++){
277b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->headx[i].head.type = 0;
278b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->headx[i].head.info1 = PICODATA_ITEMINFO1_NA;
279b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->headx[i].head.info2 = PICODATA_ITEMINFO2_NA;
280b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->headx[i].head.len = 0;
281b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->headx[i].cind = 0;
282b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
283b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (i = 0; i < PICOSA_MAXSIZE_CBUF; i++) {
284b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->cbuf1[i] = 0;
285b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->cbuf2[i] = 0;
286b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
287b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
288b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
289b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* possym buffer */
290b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->phonesTransduced = FALSE;
291b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->phonBuf = sa->phonBufA;
292b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->phonBufOut = sa->phonBufB;
293b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->phonReadPos = 0;
294b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->phonWritePos = 0;
295b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->nextReadPos = 0;
296b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
297e9f72c8954f29f10cb4feb16d328a1b5c1fd7169Jean-Michel Trivi    if (resetMode == PICO_RESET_SOFT) {
2987bc39b0d41efe0d8733490d54e14bc392d9f0b6dJean-Michel Trivi        /*following initializations needed only at startup or after a full reset*/
2997bc39b0d41efe0d8733490d54e14bc392d9f0b6dJean-Michel Trivi        return PICO_OK;
3007bc39b0d41efe0d8733490d54e14bc392d9f0b6dJean-Michel Trivi    }
3017bc39b0d41efe0d8733490d54e14bc392d9f0b6dJean-Michel Trivi
302b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* kb fst[] */
303b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->numFsts = 0;
304b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (i = 0; i<PICOKNOW_MAX_NUM_WPHO_FSTS; i++) {
305b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        fst = picokfst_getFST(this->voice->kbArray[fstKbIds[i]]);
306b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (NULL != fst) {
307b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            sa->fst[sa->numFsts++] = fst;
308b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
309b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
310b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->curFst = 0;
311b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("got %i fsts", sa->numFsts));
312b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* kb fixedIds */
313b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->fixedIds = picoktab_getFixedIds(this->voice->kbArray[PICOKNOW_KBID_FIXED_IDS]);
314b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
315b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* kb tabgraphs */
316b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->tabgraphs =
317b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]);
318b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (sa->tabgraphs == NULL) {
319b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
320b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                       NULL, NULL);
321b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
322b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("got tabgraphs"));
323b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
324b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* kb tabphones */
325b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->tabphones =
326b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoktab_getPhones(this->voice->kbArray[PICOKNOW_KBID_TAB_PHONES]);
327b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (sa->tabphones == NULL) {
328b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
329b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                       NULL, NULL);
330b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
331b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("got tabphones"));
332b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
333b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef PICO_DEBU
334b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    {
335b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_uint16 itmp;
336b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        for (itmp = 0; itmp < 256; itmp++) {
337b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (picoktab_hasVowelProp(sa->tabphones, itmp)) {
338b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_DEBUG(("tabphones hasVowel: %d", itmp));
339b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
340b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (picoktab_hasDiphthProp(sa->tabphones, itmp)) {
341b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_DEBUG(("tabphones hasDiphth: %d", itmp));
342b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
343b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (picoktab_hasGlottProp(sa->tabphones, itmp)) {
344b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_DEBUG(("tabphones hasGlott: %d", itmp));
345b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
346b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (picoktab_hasNonsyllvowelProp(sa->tabphones, itmp)) {
347b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_DEBUG(("tabphones hasNonsyllvowel: %d", itmp));
348b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
349b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (picoktab_hasSyllconsProp(sa->tabphones, itmp)) {
350b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_DEBUG(("tabphones hasSyllcons: %d", itmp));
351b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
352b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (picoktab_isPrimstress(sa->tabphones, itmp)) {
353b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_DEBUG(("tabphones isPrimstress: %d", itmp));
354b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
355b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (picoktab_isSecstress(sa->tabphones, itmp)) {
356b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_DEBUG(("tabphones isSecstress: %d", itmp));
357b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
358b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (picoktab_isSyllbound(sa->tabphones, itmp)) {
359b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_DEBUG(("tabphones isSyllbound: %d", itmp));
360b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
361b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (picoktab_isPause(sa->tabphones, itmp)) {
362b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_DEBUG(("tabphones isPause: %d", itmp));
363b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
364b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
365b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
366b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("tabphones primstressID: %d",
367b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       picoktab_getPrimstressID(sa->tabphones)));
368b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("tabphones secstressID: %d",
369b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       picoktab_getSecstressID(sa->tabphones)));
370b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("tabphones syllboundID: %d",
371b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       picoktab_getSyllboundID(sa->tabphones)));
372b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("tabphones pauseID: %d",
373b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       picoktab_getPauseID(sa->tabphones)));
374b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
375b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
376b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
377b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* kb tabpos */
378b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->tabpos =
379b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoktab_getPos(this->voice->kbArray[PICOKNOW_KBID_TAB_POS]);
380b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (sa->tabpos == NULL) {
381b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
382b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                       NULL, NULL);
383b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
384b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("got tabpos"));
385b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
386b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* kb dtposd */
387b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->dtposd = picokdt_getDtPosD(this->voice->kbArray[PICOKNOW_KBID_DT_POSD]);
388b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (sa->dtposd == NULL) {
389b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
390b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                       NULL, NULL);
391b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
392b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("got dtposd"));
393b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
394b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* kb dtg2p */
395b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->dtg2p = picokdt_getDtG2P(this->voice->kbArray[PICOKNOW_KBID_DT_G2P]);
396b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (sa->dtg2p == NULL) {
397b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
398b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                       NULL, NULL);
399b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
400b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("got dtg2p"));
401b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
402b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* kb lex */
403b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->lex = picoklex_getLex(this->voice->kbArray[PICOKNOW_KBID_LEX_MAIN]);
404b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (sa->lex == NULL) {
405b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
406b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                       NULL, NULL);
407b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
408b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("got lex"));
409b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
410b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* kb ulex[] */
411b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->numUlex = 0;
412b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (i = 0; i<PICOKNOW_MAX_NUM_ULEX; i++) {
413b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        ulex = picoklex_getLex(this->voice->kbArray[ulexKbIds[i]]);
414b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (NULL != ulex) {
415b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            sa->ulex[sa->numUlex++] = ulex;
416b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
417b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
418b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("got %i user lexica", sa->numUlex));
419b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
420b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return PICO_OK;
421b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
422b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
423b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic picodata_step_result_t saStep(register picodata_ProcessingUnit this,
424b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                     picoos_int16 mode,
425b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                     picoos_uint16 *numBytesOutput);
426b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
427b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t saTerminate(register picodata_ProcessingUnit this) {
428b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return PICO_OK;
429b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
430b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
431b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t saSubObjDeallocate(register picodata_ProcessingUnit this,
432b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                        picoos_MemoryManager mm) {
433b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa_subobj_t * sa;
434b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (NULL != this) {
435b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa = (sa_subobj_t *) this->subObj;
436b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf);
437b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_deallocate(mm, (void *) &this->subObj);
438b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
439b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return PICO_OK;
440b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
441b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
442b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
443b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpicodata_ProcessingUnit picosa_newSentAnaUnit(picoos_MemoryManager mm,
444b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                              picoos_Common common,
445b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                              picodata_CharBuffer cbIn,
446b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                              picodata_CharBuffer cbOut,
447b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                              picorsrc_Voice voice) {
448b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picodata_ProcessingUnit this;
449b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa_subobj_t * sa;
450b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
451b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (this == NULL) {
452b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return NULL;
453b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
454b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
455b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    this->initialize = saInitialize;
456b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("set this->step to saStep"));
457b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    this->step = saStep;
458b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    this->terminate = saTerminate;
459b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    this->subDeallocate = saSubObjDeallocate;
460b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
461b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    this->subObj = picoos_allocate(mm, sizeof(sa_subobj_t));
462b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (this->subObj == NULL) {
463b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_deallocate(mm, (void *)&this);
464b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
465b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return NULL;
466b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
467b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
468b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa = (sa_subobj_t *) this->subObj;
469b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
470b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->altDescBuf = picotrns_allocate_alt_desc_buf(mm, SA_MAX_ALTDESC_SIZE, &sa->maxAltDescLen);
471b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (NULL == sa->altDescBuf) {
472b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf);
473b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_deallocate(mm, (void *)&sa);
474b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_deallocate(mm, (void *)&this);
475b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_emRaiseException(common->em,PICO_EXC_OUT_OF_MEM, NULL, NULL);
476b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
477b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
478b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
4797bc39b0d41efe0d8733490d54e14bc392d9f0b6dJean-Michel Trivi    saInitialize(this, PICO_RESET_FULL);
480b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return this;
481b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
482b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
483b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
484b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ***********************************************************************/
485b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* PROCESS_POSD disambiguation functions */
486b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ***********************************************************************/
487b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
488b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* find next POS to the right of 'ind' and return its POS and index */
489b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic picoos_uint8 saPosDItemSeqGetPosRight(register picodata_ProcessingUnit this,
490b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                            register sa_subobj_t *sa,
491b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                            const picoos_uint16 ind,
492b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                            const picoos_uint16 top,
493b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                            picoos_uint16 *rightind) {
494b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 val;
495b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_int32 i;
496b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
497b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    val = PICOKDT_EPSILON;
498b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (i = ind + 1; ((val == PICOKDT_EPSILON) && (i < top)); i++) {
499b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if ((sa->headx[i].head.type == PICODATA_ITEM_WORDGRAPH) ||
500b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                (sa->headx[i].head.type == PICODATA_ITEM_WORDINDEX)  ||
501b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                (sa->headx[i].head.type == PICODATA_ITEM_WORDPHON) ) {
502b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            val = sa->headx[i].head.info1;
503b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
504b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
505b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    *rightind = i - 1;
506b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return val;
507b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
508b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
509b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
510b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* left-to-right, for each WORDGRAPH/WORDINDEX/WORDPHON do posd */
511b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t saDisambPos(register picodata_ProcessingUnit this,
512b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                 register sa_subobj_t *sa) {
513b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picokdt_classify_result_t dtres;
514b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 half_nratt_posd = PICOKDT_NRATT_POSD >> 1;
515b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 valbuf[PICOKDT_NRATT_POSD]; /* only [0..half_nratt_posd] can be >2^8 */
516b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 prevout;   /* direct dt output (hist.) or POS of prev word */
517b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 lastprev3; /* last index of POS(es) found to the left */
518b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 curPOS;     /* POS(es) of current word */
519b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_int32 first;    /* index of first item with POS(es) */
520b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_int32 ci;
521b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 okay;       /* two uses: processing okay and lexind resovled */
522b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 i;
523b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 inval;
524b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 fallback;
525b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
526b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* set initial values */
527b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    okay = TRUE;
528b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    prevout = PICOKDT_HISTORY_ZERO;
529b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    curPOS = PICODATA_ITEMINFO1_ERR;
530b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    first = 0;
531b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
532b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    while ((first < sa->headxLen) &&
533b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen           (sa->headx[first].head.type != PICODATA_ITEM_WORDGRAPH) &&
534b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen           (sa->headx[first].head.type != PICODATA_ITEM_WORDINDEX) &&
535b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen           (sa->headx[first].head.type != PICODATA_ITEM_WORDPHON)) {
536b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        first++;
537b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
538b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (first >= sa->headxLen) {
539b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* phrase not containing an item with POSes info, e.g. single flush */
540b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("no item with POSes found"));
541b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return PICO_OK;
542b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
543b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
544b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    lastprev3 = first;
545b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
546b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (i = 0; i <= half_nratt_posd; i++) {
547b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        valbuf[i] = PICOKDT_HISTORY_ZERO;
548b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
549b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* set POS(es) of current word, will be shifted afterwards */
550b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    valbuf[half_nratt_posd+1] = sa->headx[first].head.info1;
551b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (i = half_nratt_posd+2; i < PICOKDT_NRATT_POSD; i++) {
552b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* find next POS to the right and set valbuf[i] */
553b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        valbuf[i] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3);
554b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
555b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
556b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_TRACE(("headxLen: %d", sa->headxLen));
557b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
558b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* process from left to right all items in headx */
559b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (ci = first; ci < sa->headxLen; ci++) {
560b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        okay = TRUE;
561b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
562b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_TRACE(("iter: %d, type: %c", ci, sa->headx[ci].head.type));
563b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
564b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* if not (WORDGRAPH or WORDINDEX) */
565b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if ((sa->headx[ci].head.type != PICODATA_ITEM_WORDGRAPH) &&
566b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                (sa->headx[ci].head.type != PICODATA_ITEM_WORDINDEX)  &&
567b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                (sa->headx[ci].head.type != PICODATA_ITEM_WORDPHON)) {
568b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            continue;
569b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
570b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
571b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_TRACE(("iter: %d, curPOS: %d", ci, sa->headx[ci].head.info1));
572b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
573b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* no continue so far => at [ci] we have a WORDGRAPH / WORDINDEX item */
574b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* shift all elements one position to the left */
575b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* shift predicted values (history) */
576b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        for (i=1; i<half_nratt_posd; i++) {
577b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            valbuf[i-1] = valbuf[i];
578b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
579b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* insert previously predicted value (now history) */
580b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        valbuf[half_nratt_posd-1] = prevout;
581b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* shift not yet predicted values */
582b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        for (i=half_nratt_posd+1; i<PICOKDT_NRATT_POSD; i++) {
583b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            valbuf[i-1] = valbuf[i];
584b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
585b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* find next POS to the right and set valbuf[PICOKDT_NRATT_POSD-1] */
586b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        valbuf[PICOKDT_NRATT_POSD-1] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3);
587b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
588b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* just to be on the safe side; the following should never happen */
589b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (sa->headx[ci].head.info1 != valbuf[half_nratt_posd]) {
590b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_WARN(("syncing POS"));
591b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
592b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                  NULL, NULL);
593b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            valbuf[half_nratt_posd] = sa->headx[ci].head.info1;
594b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
595b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
596b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        curPOS = valbuf[half_nratt_posd];
597b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
598b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* Check if POS disambiguation not needed */
599b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (picoktab_isUniquePos(sa->tabpos, (picoos_uint8) curPOS)) {
600b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* not needed */
601b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            inval = 0;
602b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            fallback = 0;
603b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (!picokdt_dtPosDreverseMapOutFixed(sa->dtposd, curPOS,
604b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                       &prevout, &fallback)) {
605b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                if (fallback) {
606b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    prevout = fallback;
607b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
608b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                } else {
609b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    PICODBG_ERROR(("problem doing reverse output mapping"));
610b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    prevout = curPOS;
611b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
612b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
613b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_DEBUG(("keeping: %d", sa->headx[ci].head.info1));
614b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            continue;
615b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
616b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
617b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* assuming PICOKDT_NRATT_POSD == 7 */
618b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("%d: [%d %d %d %d %d %d %d]",
619b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       ci, valbuf[0], valbuf[1], valbuf[2],
620b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       valbuf[3], valbuf[4], valbuf[5], valbuf[6]));
621b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
622b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* no continue so far => POS disambiguation needed */
623b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* construct input vector, which is set in dtposd */
624b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (!picokdt_dtPosDconstructInVec(sa->dtposd, valbuf)) {
625b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* error constructing invec */
626b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_WARN(("problem with invec"));
627b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
628b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                  NULL, NULL);
629b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            okay = FALSE;
630b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
631b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* classify */
632b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (okay && (!picokdt_dtPosDclassify(sa->dtposd, &prevout))) {
633b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* error doing classification */
634b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_WARN(("problem classifying"));
635b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
636b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                  NULL, NULL);
637b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            okay = FALSE;
638b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
639b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* decompose */
640b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (okay && (!picokdt_dtPosDdecomposeOutClass(sa->dtposd, &dtres))) {
641b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* error decomposing */
642b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_WARN(("problem decomposing"));
643b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
644b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                  NULL, NULL);
645b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            okay = FALSE;
646b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
647b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (okay && dtres.set) {
648b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_DEBUG(("in: %d, out: %d", valbuf[3], dtres.class));
649b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        } else {
650b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_WARN(("problem disambiguating POS"));
651b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            dtres.class = PICODATA_ITEMINFO1_ERR;
652b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
653b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
654b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (dtres.class > 255) {
655b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_WARN(("dt result outside valid range, setting pos to ERR"));
656b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            dtres.class = PICODATA_ITEMINFO1_ERR;
657b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
658b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
659b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->headx[ci].head.info1 = (picoos_uint8)dtres.class;
660b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (sa->headx[ci].head.type == PICODATA_ITEM_WORDINDEX) {
661b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* find pos/ind entry in cbuf matching unique,
662b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               disambiguated POS, adapt current headx cind/len
663b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               accordingly */
664b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_DEBUG(("select phon based on POS disambiguation"));
665b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            okay = FALSE;
666b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            for (i = 0; i < sa->headx[ci].head.len; i += PICOKLEX_POSIND_SIZE) {
667b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_DEBUG(("comparing POS at cind + %d", i));
668b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                if (picoktab_isPartOfPosGroup(sa->tabpos,
669b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            (picoos_uint8)dtres.class,
670b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            sa->cbuf1[sa->headx[ci].cind + i])) {
671b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    PICODBG_DEBUG(("found match for entry %d",
672b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                   i/PICOKLEX_POSIND_SIZE + 1));
673b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->headx[ci].cind += i;
674b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    okay = TRUE;
675b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    break;
676b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
677b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
678b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* not finding a match is possible if posd predicts a POS that
679b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               is not part of any of the input POSes -> no warning */
680b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if defined(PICO_DEBUG)
681b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (!okay) {
682b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_DEBUG(("no match found, selecting 1st entry"));
683b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
684b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
685b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            sa->headx[ci].head.len = PICOKLEX_POSIND_SIZE;
686b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
687b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
688b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return PICO_OK;
689b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
690b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
691b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
692b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ***********************************************************************/
693b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* PROCESS_WPHO functions, copy, lexindex, and g2p */
694b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ***********************************************************************/
695b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
696b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************** copy ***************/
697b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
698b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t saCopyItemContent1to2(register picodata_ProcessingUnit this,
699b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                           register sa_subobj_t *sa,
700b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                           picoos_uint16 ind) {
701b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 i;
702b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 cind1;
703b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
704b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* set headx.cind, and copy content, head unchanged */
705b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    cind1 = sa->headx[ind].cind;
706b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->headx[ind].cind = sa->cbuf2Len;
707b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
708b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* check cbufLen */
709b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (sa->headx[ind].head.len > (sa->cbuf2BufSize - sa->cbuf2Len)) {
710b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->headx[ind].head.len = sa->cbuf2BufSize - sa->cbuf2Len;
711b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_WARN(("phones skipped"));
712b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_emRaiseWarning(this->common->em,
713b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                              PICO_WARN_INCOMPLETE, NULL, NULL);
714b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (sa->headx[ind].head.len == 0) {
715b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            sa->headx[ind].cind = 0;
716b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
717b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
718b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
719b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (i = 0; i < sa->headx[ind].head.len; i++) {
720b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->cbuf2[sa->cbuf2Len] = sa->cbuf1[cind1 + i];
721b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->cbuf2Len++;
722b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
723b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
724b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("%c item, len: %d",
725b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   sa->headx[ind].head.type, sa->headx[ind].head.len));
726b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
727b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return PICO_OK;
728b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
729b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
730b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
731b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************** lexindex ***************/
732b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
733b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t saLexIndLookup(register picodata_ProcessingUnit this,
734b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    register sa_subobj_t *sa,
735b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    picoklex_Lex lex,
736b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    picoos_uint16 ind) {
737b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 pos;
738b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 *phones;
739b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 plen;
740b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 i;
741b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
742b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (picoklex_lexIndLookup(lex, &(sa->cbuf1[sa->headx[ind].cind + 1]),
743b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                              PICOKLEX_IND_SIZE, &pos, &phones, &plen)) {
744b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->headx[ind].cind = sa->cbuf2Len;
745b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
746b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* check cbufLen */
747b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (plen > (sa->cbuf2BufSize - sa->cbuf2Len)) {
748b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            plen = sa->cbuf2BufSize - sa->cbuf2Len;
749b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_WARN(("phones skipped"));
750b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            picoos_emRaiseWarning(this->common->em,
751b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                  PICO_WARN_INCOMPLETE, NULL, NULL);
752b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (plen == 0) {
753b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                sa->headx[ind].cind = 0;
754b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
755b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
756b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
757b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* set item head, info1, info2 unchanged */
758b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON;
759b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->headx[ind].head.len = plen;
760b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
761b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        for (i = 0; i < plen; i++) {
762b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            sa->cbuf2[sa->cbuf2Len] = phones[i];
763b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            sa->cbuf2Len++;
764b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
765b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
766b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("%c item, pos: %d, plen: %d",
767b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODATA_ITEM_WORDPHON, pos, plen));
768b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
769b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } else {
770b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_WARN(("lexIndLookup problem"));
771b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM,
772b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                              NULL, NULL);
773b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
774b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return PICO_OK;
775b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
776b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
777b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
778b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
779b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************** g2p ***************/
780b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
781b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
782b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* Name    :   saGetNvowel
783b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen   Function:   returns vowel info in a word or word seq
784b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen   Input   :   sInChar         the grapheme string to be converted in phoneme
785b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               inLen           number of bytes in grapheme buffer
786b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               inPos           start position of current grapheme (0..inLen-1)
787b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen   Output  :   nVow            number of vowels in the word
788b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               nVord           vowel order in the word
789b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen   Returns :   TRUE: processing successful;  FALSE: errors
790b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen*/
791b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic picoos_uint8 saGetNrVowel(register picodata_ProcessingUnit this,
792b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                 register sa_subobj_t *sa,
793b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                 const picoos_uint8 *sInChar,
794b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                 const picoos_uint16 inLen,
795b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                 const picoos_uint8 inPos,
796b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                 picoos_uint8 *nVow,
797b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                 picoos_uint8 *nVord) {
798b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint32 nCount;
799b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint32 pos;
800b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 cstr[PICOBASE_UTF8_MAXLEN + 1];
801b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
802b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /*defaults*/
803b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    *nVow = 0;
804b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    *nVord = 0;
805b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /*1:check wether the current char is a vowel*/
806b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    pos = inPos;
807b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (!picobase_get_next_utf8char(sInChar, inLen, &pos, cstr) ||
808b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        !picoktab_hasVowellikeProp(sa->tabgraphs, cstr, PICOBASE_UTF8_MAXLEN)) {
809b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return FALSE;
810b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
811b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /*2:count number of vowels in current word and find vowel order*/
812b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (nCount = 0; nCount < inLen; ) {
813b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen      if (!picobase_get_next_utf8char(sInChar, inLen, &nCount, cstr)) {
814b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            return FALSE;
815b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen      }
816b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (picoktab_hasVowellikeProp(sa->tabgraphs, cstr,
817b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                      PICOBASE_UTF8_MAXLEN)) {
818b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            (*nVow)++;
819b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (nCount == pos) {
820b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                (*nVord) = (*nVow);
821b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
822b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
823b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
824b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return TRUE;
825b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
826b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
827b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
828b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* do g2p for a full word, right-to-left */
829b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic picoos_uint8 saDoG2P(register picodata_ProcessingUnit this,
830b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            register sa_subobj_t *sa,
831b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            const picoos_uint8 *graph,
832b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            const picoos_uint8 graphlen,
833b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            const picoos_uint8 pos,
834b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            picoos_uint8 *phones,
835b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            const picoos_uint16 phonesmaxlen,
836b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            picoos_uint16 *plen) {
837b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 outNp1Ch; /*last 3 outputs produced*/
838b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 outNp2Ch;
839b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 outNp3Ch;
840b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 nPrimary;
841b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 nCount;
842b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint32 utfpos;
843b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 nOutVal;
844b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 okay;
845b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 phonesind;
846b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 nrvow;
847b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 ordvow;
848b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picokdt_classify_vecresult_t dtresv;
849b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 i;
850b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
851b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    *plen = 0;
852b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    okay = TRUE;
853b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
854b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* use sa->tmpbuf[PICOSA_MAXITEMSIZE] to temporarly store the
855b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       phones which are predicted in reverse order. Once all are
856b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       available put them in phones in usuable order. phonesind is
857b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       used to fille item in reverse order starting at the end of
858b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       tmpbuf. */
859b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    phonesind = PICOSA_MAXITEMSIZE - 1;
860b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
861b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* prepare the data for loop operations */
862b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    outNp1Ch = PICOKDT_HISTORY_ZERO;
863b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    outNp2Ch = PICOKDT_HISTORY_ZERO;
864b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    outNp3Ch = PICOKDT_HISTORY_ZERO;
865b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
866b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* inner loop */
867b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    nPrimary = 0;
868b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
869b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* ************************************************/
870b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* go backward grapheme by grapheme, it's utf8... */
871b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* ************************************************/
872b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
873b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* set start nCount to position of start of last utfchar */
874b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* ! watch out! somethimes starting at 1, sometimes at 0,
875b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       ! sometimes counting per byte, sometimes per UTF8 char */
876b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* nCount is (start position + 1) of utf8 char */
877b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    utfpos = graphlen;
878b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (picobase_get_prev_utf8charpos(graph, 0, &utfpos)) {
879b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        nCount = utfpos + 1;
880b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } else {
881b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* should not occurr */
882b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_ERROR(("invalid utf8 string, graphlen: %d", graphlen));
883b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return FALSE;
884b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
885b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
886b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    while (nCount > 0) {
887b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_TRACE(("right-to-left g2p, count: %d", nCount));
888b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        okay = TRUE;
889b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
890b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (!saGetNrVowel(this, sa, graph, graphlen, nCount-1, &nrvow,
891b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                          &ordvow)) {
892b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            nrvow = 0;
893b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            ordvow = 0;
894b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
895b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
896b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* prepare input vector, set inside tree object invec,
897b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen         * g2pBuildVector will call the constructInVec tree method */
898b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (!picokdt_dtG2PconstructInVec(sa->dtg2p,
899b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                         graph, /*grapheme start*/
900b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                         graphlen, /*grapheme length*/
901b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                         nCount-1, /*grapheme current position*/
902b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                         pos, /*Word POS*/
903b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                         nrvow, /*nr vowels if vowel, 0 else */
904b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                         ordvow, /*ord of vowel if vowel, 0 el*/
905b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                         &nPrimary,  /*primary stress flag*/
906b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                         outNp1Ch, /*Right phoneme context +1*/
907b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                         outNp2Ch, /*Right phoneme context +2*/
908b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                         outNp3Ch)) { /*Right phon context +3*/
909b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /*Errors in preparing the input vector : skip processing*/
910b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_WARN(("problem with invec"));
911b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
912b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                  NULL, NULL);
913b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            okay = FALSE;
914b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
915b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
916b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* classify using the invec in the tree object and save the direct
917b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen           tree output also in the tree object */
918b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (okay && (!picokdt_dtG2Pclassify(sa->dtg2p, &nOutVal))) {
919b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* error doing classification */
920b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_WARN(("problem classifying"));
921b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
922b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                  NULL, NULL);
923b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            okay = FALSE;
924b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
925b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
926b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* decompose the invec in the tree object and return result in dtresv */
927b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (okay && (!picokdt_dtG2PdecomposeOutClass(sa->dtg2p, &dtresv))) {
928b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* error decomposing */
929b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_WARN(("problem decomposing"));
930b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
931b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                  NULL, NULL);
932b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            okay = FALSE;
933b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
934b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
935b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (okay) {
936b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if ((dtresv.nr == 0) || (dtresv.classvec[0] == PICOKDT_EPSILON)) {
937b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* no phones to be added */
938b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_TRACE(("epsilon, no phone added %c", graph[nCount-1]));
939b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                ;
940b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            } else {
941b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* add decomposed output to tmpbuf, reverse order */
942b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                for (i = dtresv.nr; ((((PICOSA_MAXITEMSIZE - 1) -
943b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                       phonesind)<phonesmaxlen) &&
944b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                     (i > 0)); ) {
945b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    i--;
946b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    PICODBG_TRACE(("%c %d",graph[nCount-1],dtresv.classvec[i]));
947b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    if (dtresv.classvec[i] > 255) {
948b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_WARN(("dt result outside valid range, "
949b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                      "skipping phone"));
950b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        continue;
951b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
952b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->tmpbuf[phonesind--] = (picoos_uint8)dtresv.classvec[i];
953b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    if (!nPrimary) {
954b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        if (picoktab_isPrimstress(sa->tabphones,
955b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                          (picoos_uint8)dtresv.classvec[i])) {
956b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            nPrimary = 1;
957b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
958b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
959b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    (*plen)++;
960b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
961b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                if (i > 0) {
962b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    PICODBG_WARN(("phones skipped"));
963b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    picoos_emRaiseWarning(this->common->em,
964b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                          PICO_WARN_INCOMPLETE, NULL, NULL);
965b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
966b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
967b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
968b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
969b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /*shift tree output history and update*/
970b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        outNp3Ch = outNp2Ch;
971b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        outNp2Ch = outNp1Ch;
972b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        outNp1Ch = nOutVal;
973b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
974b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* go backward one utf8 char */
975b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* nCount is in +1 domain */
976b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (nCount <= 1) {
977b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* end of str */
978b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            nCount = 0;
979b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        } else {
980b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            utfpos = nCount - 1;
981b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (!picobase_get_prev_utf8charpos(graph, 0, &utfpos)) {
982b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* should not occur */
983b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_ERROR(("invalid utf8 string, utfpos: %d", utfpos));
984b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                return FALSE;
985b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            } else {
986b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                nCount = utfpos + 1;
987b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
988b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
989b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
990b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
991b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* a must be: (PICOSA_MAXITEMSIZE-1) - phonesind == *plen */
992b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* now that we have all phone IDs, copy in correct order to phones */
993b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* phonesind point to next free slot in the reverse domainn,
994b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       ie. inc first */
995b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    phonesind++;
996b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (i = 0; i < *plen; i++, phonesind++) {
997b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        phones[i] = sa->tmpbuf[phonesind];
998b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
999b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return TRUE;
1000b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
1001b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1002b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1003b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* item in headx[ind]/cbuf1, out: modified headx and cbuf2 */
1004b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1005b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t saGraphemeToPhoneme(register picodata_ProcessingUnit this,
1006b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                         register sa_subobj_t *sa,
1007b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                         picoos_uint16 ind) {
1008b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 plen;
1009b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1010b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_TRACE(("starting g2p"));
1011b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1012b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (saDoG2P(this, sa, &(sa->cbuf1[sa->headx[ind].cind]),
1013b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                sa->headx[ind].head.len, sa->headx[ind].head.info1,
1014b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                &(sa->cbuf2[sa->cbuf2Len]), (sa->cbuf2BufSize - sa->cbuf2Len),
1015b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                &plen)) {
1016b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1017b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* check of cbuf2Len done in saDoG2P, phones skipped if needed */
1018b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (plen > 255) {
1019b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_WARN(("maximum number of phones exceeded (%d), skipping",
1020b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                          plen));
1021b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            plen = 255;
1022b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
1023b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1024b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* set item head, info1, info2 unchanged */
1025b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON;
1026b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->headx[ind].head.len = (picoos_uint8)plen;
1027b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->headx[ind].cind = sa->cbuf2Len;
1028b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->cbuf2Len += plen;
1029b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("%c item, plen: %d",
1030b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODATA_ITEM_WORDPHON, plen));
1031b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } else {
1032b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_WARN(("problem doing g2p"));
1033b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM,
1034b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                              NULL, NULL);
1035b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
1036b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return PICO_OK;
1037b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
1038b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1039b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1040b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ***********************************************************************/
1041b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/*                          extract phonemes of an item into a phonBuf   */
1042b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ***********************************************************************/
1043b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1044b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t saAddPhoneme(register sa_subobj_t *sa, picoos_uint16 pos, picoos_uint16 sym) {
1045b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* picoos_uint8 plane, unshifted; */
1046b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1047b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* just for debuging */
1048b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /*
1049b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    unshifted = picotrns_unplane(sym,&plane);
1050b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("adding %i/%i (%c on plane %i) at phonBuf[%i]",pos,sym,unshifted,plane,sa->phonWritePos));
1051b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    */
1052b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (PICOTRNS_MAX_NUM_POSSYM <= sa->phonWritePos) {
1053b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* not an error! */
1054b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("couldn't add because phon buffer full"));
1055b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return PICO_EXC_BUF_OVERFLOW;
1056b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } else {
1057b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->phonBuf[sa->phonWritePos].pos = pos;
1058b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->phonBuf[sa->phonWritePos].sym = sym;
1059b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->phonWritePos++;
1060b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return PICO_OK;
1061b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
1062b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
1063b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1064b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/*
1065b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t saAddStartPhoneme(register sa_subobj_t *sa) {
1066b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1067b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId);
1068b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
1069b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1070b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1071b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t saAddTermPhoneme(register sa_subobj_t *sa) {
1072b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1073b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId);
1074b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
1075b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1076b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen*/
1077b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1078b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t saExtractPhonemes(register picodata_ProcessingUnit this,
1079b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        register sa_subobj_t *sa, picoos_uint16 pos,
1080b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picodata_itemhead_t* head, const picoos_uint8* content)
1081b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{
1082b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    pico_status_t rv= PICO_OK;
1083b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 i;
1084b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_int16 fstSymbol;
1085b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if defined(PICO_DEBUG)
1086b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_char msgstr[SA_MSGSTR_SIZE];
1087b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
1088b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1089b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_TRACE(("doing item %s",
1090b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    picodata_head_to_string(head,msgstr,SA_MSGSTR_SIZE)));
1091b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /*
1092b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     Items  considered in a transduction are WORDPHON item. its starting offset within the inBuf is given as
1093b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     'pos'.
1094b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     Elements that go into the transduction receive "their" position in the buffer.
1095b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen     */
1096b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->phonWritePos = 0;
1097b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* WORDPHON(POS,WACC)phon */
1098b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1099b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId);
1100b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (i = 0; i < head->len; i++) {
1101b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        fstSymbol = /* (PICOKFST_PLANE_PHONEMES << 8) + */content[i];
1102b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /*  */
1103b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_TRACE(("adding phoneme %c",fstSymbol));
1104b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        rv = saAddPhoneme(sa, pos+PICODATA_ITEM_HEADSIZE+i, fstSymbol);
1105b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
1106b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1107b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId);
1108b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa->nextReadPos = pos + PICODATA_ITEM_HEADSIZE +  head->len;
1109b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return rv;
1110b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
1111b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1112b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1113b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define SA_POSSYM_OK           0
1114b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define SA_POSSYM_OUT_OF_RANGE 1
1115b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define SA_POSSYM_END          2
1116b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define SA_POSSYM_INVALID     -3
1117b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* *readPos is the next position in phonBuf to be read, and *writePos is the first position not to be read (may be outside
1118b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * buf).
1119b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 'rangeEnd' is the first possym position outside the desired range.
1120b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Possible return values:
1121b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * SA_POSSYM_OK            : 'pos' and 'sym' are set to the read possym, *readPos is advanced
1122b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * SA_POSSYM_OUT_OF_RANGE  : pos is out of range. 'pos' is set to that of the read possym, 'sym' is undefined
1123b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * SA_POSSYM_UNDERFLOW     : no more data in buf. 'pos' is set to PICOTRNS_POS_INVALID,    'sym' is undefined
1124b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * SA_POSSYM_INVALID       : "strange" pos.       'pos' is set to PICOTRNS_POS_INVALID,    'sym' is undefined
1125b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */
1126b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t getNextPosSym(sa_subobj_t * sa, picoos_int16 * pos, picoos_int16 * sym,
1127b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_int16 rangeEnd) {
1128b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* skip POS_IGNORE */
1129b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    while ((sa->phonReadPos < sa->phonWritePos) && (PICOTRNS_POS_IGNORE == sa->phonBuf[sa->phonReadPos].pos))  {
1130b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("ignoring phone at sa->phonBuf[%i] because it has pos==IGNORE",sa->phonReadPos));
1131b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sa->phonReadPos++;
1132b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
1133b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if ((sa->phonReadPos < sa->phonWritePos)) {
1134b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        *pos = sa->phonBuf[sa->phonReadPos].pos;
1135b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if ((PICOTRNS_POS_INSERT == *pos) || ((0 <= *pos) && (*pos < rangeEnd))) {
1136b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            *sym = sa->phonBuf[sa->phonReadPos++].sym;
1137b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            return SA_POSSYM_OK;
1138b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        } else if (*pos < 0){ /* *pos is "strange" (e.g. POS_INVALID) */
1139b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            return SA_POSSYM_INVALID;
1140b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        } else {
1141b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            return SA_POSSYM_OUT_OF_RANGE;
1142b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
1143b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } else {
1144b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* no more possyms to read */
1145b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        *pos = PICOTRNS_POS_INVALID;
1146b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return SA_POSSYM_END;
1147b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
1148b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
1149b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1150b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1151b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1152b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1153b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ***********************************************************************/
1154b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/*                          saStep function                              */
1155b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ***********************************************************************/
1156b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1157b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/*
1158b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chencomplete phrase processed in one step, if not fast enough -> rework
1159b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1160b190149a69b110e6719ce0a41877a683f8db7ae7Charles Cheninit, collect into internal buffer, process, and then feed to
1161b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenoutput buffer
1162b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1163b190149a69b110e6719ce0a41877a683f8db7ae7Charles Cheninit state: INIT ext           ext
1164b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstate trans:     in hc1  hc2   out
1165b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1166b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenINIT | putItem   =  0    0    +1      | BUSY  -> COLL (put B-SBEG item,
1167b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                                   set do-init to false)
1168b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1169b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    inspace-ok-hc1
1170b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                  needs-more-items-(phrase-or-flush)
1171b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenCOLL1 |getItems -n +n             0 1 | ATOMIC -> PPOSD     (got items,
1172b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                                      if flush set do-init)
1173b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenCOLL2 |getItems -n +n             1 0 | ATOMIC -> PPOSD (got items, forced)
1174b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenCOLL3 |getItems -n +n             1 1 | IDLE          (got items, need more)
1175b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenCOLL4 |getItems  =  =             1 1 | IDLE             (got no items)
1176b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1177b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenPPOSD | posd     = ~n~n               | BUSY     -> PWP     (posd done)
1178b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenPWP   | lex/g2p  = ~n-n  0+n          | BUSY     -> PPHR    (lex/g2p done)
1179b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenPPHR  | phr      = -n 0 +m=n          | BUSY     -> PACC    (phr done, m>=n)
1180b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenPACC  | acc      =  0 0 ~m=n          | BUSY     -> FEED    (acc done)
1181b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1182b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                  doinit-flag
1183b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenFEED | putItems  0  0 0 -m-n  +m  0   | BUSY -> COLL    (put items)
1184b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenFEED | putItems  0  0 0 -m-n  +m  1   | BUSY -> INIT    (put items)
1185b190149a69b110e6719ce0a41877a683f8db7ae7Charles ChenFEED | putItems  0  0 0 -d-d  +d      | OUT_FULL        (put some items)
1186b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen*/
1187b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1188b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic picodata_step_result_t saStep(register picodata_ProcessingUnit this,
1189b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                     picoos_int16 mode,
1190b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                     picoos_uint16 *numBytesOutput) {
1191b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    register sa_subobj_t *sa;
1192b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    pico_status_t rv = PICO_OK;
1193b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    pico_status_t rvP = PICO_OK;
1194b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 blen = 0;
1195b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 clen = 0;
1196b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 i;
1197b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoklex_Lex lex;
1198b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1199b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1200b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (NULL == this || NULL == this->subObj) {
1201b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return PICODATA_PU_ERROR;
1202b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
1203b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sa = (sa_subobj_t *) this->subObj;
1204b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    mode = mode;        /* avoid warning "var not used in this function"*/
1205b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    *numBytesOutput = 0;
1206b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    while (1) { /* exit via return */
1207b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("doing state %i, hLen|c1Len|c2Len: %d|%d|%d",
1208b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       sa->procState, sa->headxLen, sa->cbuf1Len,
1209b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       sa->cbuf2Len));
1210b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1211b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        switch (sa->procState) {
1212b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1213b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* *********************************************************/
1214b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* collect state: get item(s) from charBuf and store in
1215b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen             * internal buffers, need a complete punctuation-phrase
1216b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen             */
1217b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            case SA_STEPSTATE_COLLECT:
1218b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1219b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                while (sa->inspaceok && sa->needsmoreitems
1220b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       && (PICO_OK ==
1221b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                           (rv = picodata_cbGetItem(this->cbIn, sa->tmpbuf,
1222b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                            PICOSA_MAXITEMSIZE, &blen)))) {
1223b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    rvP = picodata_get_itemparts(sa->tmpbuf,
1224b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                            PICOSA_MAXITEMSIZE,
1225b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                            &(sa->headx[sa->headxLen].head),
1226b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                            &(sa->cbuf1[sa->cbuf1Len]),
1227b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                            sa->cbuf1BufSize-sa->cbuf1Len,
1228b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                            &clen);
1229b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    if (rvP != PICO_OK) {
1230b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_ERROR(("problem getting item parts"));
1231b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        picoos_emRaiseException(this->common->em, rvP,
1232b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                                NULL, NULL);
1233b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        return PICODATA_PU_ERROR;
1234b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
1235b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1236b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* if CMD(...FLUSH...) -> PUNC(...FLUSH...),
1237b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       construct PUNC-FLUSH item in headx */
1238b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    if ((sa->headx[sa->headxLen].head.type ==
1239b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                         PICODATA_ITEM_CMD) &&
1240b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        (sa->headx[sa->headxLen].head.info1 ==
1241b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                         PICODATA_ITEMINFO1_CMD_FLUSH)) {
1242b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->headx[sa->headxLen].head.type =
1243b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            PICODATA_ITEM_PUNC;
1244b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->headx[sa->headxLen].head.info1 =
1245b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            PICODATA_ITEMINFO1_PUNC_FLUSH;
1246b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->headx[sa->headxLen].head.info2 =
1247b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            PICODATA_ITEMINFO2_PUNC_SENT_T;
1248b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->headx[sa->headxLen].head.len = 0;
1249b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
1250b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1251b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* convert opening phoneme command to WORDPHON
1252b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                     * and assign user-POS XX to it (Bug 432) */
1253b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->headx[sa->headxLen].cind = sa->cbuf1Len;
1254b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* maybe overwritten later */
1255b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    if ((sa->headx[sa->headxLen].head.type ==
1256b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODATA_ITEM_CMD) &&
1257b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       (sa->headx[sa->headxLen].head.info1 ==
1258b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODATA_ITEMINFO1_CMD_PHONEME)&&
1259b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        (sa->headx[sa->headxLen].head.info2 ==
1260b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                         PICODATA_ITEMINFO2_CMD_START)) {
1261b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        picoos_uint8 i;
1262b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        picoos_uint8 wordsep = picoktab_getWordboundID(sa->tabphones);
1263b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_INFO(("wordsep id is %i",wordsep));
1264b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->headx[sa->headxLen].head.type = PICODATA_ITEM_WORDPHON;
1265b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->headx[sa->headxLen].head.info1 = PICODATA_POS_XX;
1266b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->headx[sa->headxLen].head.info2 = PICODATA_ITEMINFO2_NA;
1267b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        /* cut off additional words */
1268b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        i = 0;
1269b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        while ((i < sa->headx[sa->headxLen].head.len) && (wordsep != sa->cbuf1[sa->headx[sa->headxLen].cind+i])) {
1270b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            PICODBG_INFO(("accepting phoneme %i",sa->cbuf1[sa->headx[sa->headxLen].cind+i]));
1271b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1272b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            i++;
1273b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        }
1274b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        if (i < sa->headx[sa->headxLen].head.len) {
1275b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            PICODBG_INFO(("cutting off superfluous phonetic words at %i",i));
1276b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            sa->headx[sa->headxLen].head.len = i;
1277b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        }
1278b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
1279b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1280b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* check/set needsmoreitems */
1281b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    if (sa->headx[sa->headxLen].head.type ==
1282b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODATA_ITEM_PUNC) {
1283b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->needsmoreitems = FALSE;
1284b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
1285b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1286b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* check/set inspaceok, keep spare slot for forcing */
1287b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    if ((sa->headxLen >= (PICOSA_MAXNR_HEADX - 2)) ||
1288b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        ((sa->cbuf1BufSize - sa->cbuf1Len) <
1289b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                         PICOSA_MAXITEMSIZE)) {
1290b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->inspaceok = FALSE;
1291b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
1292b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1293b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    if (clen > 0) {
1294b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->headx[sa->headxLen].cind = sa->cbuf1Len;
1295b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->cbuf1Len += clen;
1296b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    } else {
1297b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->headx[sa->headxLen].cind = 0;
1298b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
1299b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->headxLen++;
1300b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
1301b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1302b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                if (!sa->needsmoreitems) {
1303b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* 1, phrase buffered */
1304b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->procState = SA_STEPSTATE_PROCESS_POSD;
1305b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    return PICODATA_PU_ATOMIC;
1306b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                } else if (!sa->inspaceok) {
1307b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* 2, forced phrase end */
1308b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* at least one slot is still free, use it to
1309b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       force a trailing PUNC item */
1310b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->headx[sa->headxLen].head.type = PICODATA_ITEM_PUNC;
1311b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->headx[sa->headxLen].head.info1 =
1312b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODATA_ITEMINFO1_PUNC_PHRASEEND;
1313b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->headx[sa->headxLen].head.info2 =
1314b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED;
1315b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->headx[sa->headxLen].head.len = 0;
1316b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->needsmoreitems = FALSE; /* not really needed for now */
1317b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->headxLen++;
1318b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    PICODBG_WARN(("forcing phrase end, added PUNC_PHRASEEND"));
1319b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    picoos_emRaiseWarning(this->common->em,
1320b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                          PICO_WARN_FALLBACK, NULL,
1321b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                          (picoos_char *)"forced phrase end");
1322b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->procState = SA_STEPSTATE_PROCESS_POSD;
1323b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    return PICODATA_PU_ATOMIC;
1324b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                } else if (rv == PICO_EOF) {
1325b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* 3, 4 */
1326b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    return PICODATA_PU_IDLE;
1327b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                } else if ((rv == PICO_EXC_BUF_UNDERFLOW) ||
1328b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                           (rv == PICO_EXC_BUF_OVERFLOW)) {
1329b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* error, no valid item in cb (UNDER) */
1330b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /*        or tmpbuf not large enough, not possible (OVER) */
1331b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* no exception raised, left for ctrl to handle */
1332b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    PICODBG_ERROR(("buffer under/overflow, rv: %d", rv));
1333b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    return PICODATA_PU_ERROR;
1334b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                } else {
1335b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* error, only possible if cbGetItem implementation
1336b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       changes without this function being adapted*/
1337b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    PICODBG_ERROR(("untreated return value, rv: %d", rv));
1338b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    return PICODATA_PU_ERROR;
1339b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
1340b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                break;
1341b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1342b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1343b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* *********************************************************/
1344b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* process posd state: process items in headx/cbuf1
1345b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen             * and change in place
1346b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen             */
1347b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            case SA_STEPSTATE_PROCESS_POSD:
1348b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* ensure there is an item in inBuf */
1349b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                if (sa->headxLen > 0) {
1350b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* we have a phrase in headx, cbuf1 (can be
1351b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       single PUNC item without POS), do pos disamb */
1352b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    if (PICO_OK != saDisambPos(this, sa)) {
1353b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        picoos_emRaiseException(this->common->em,
1354b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                                PICO_ERR_OTHER, NULL, NULL);
1355b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        return PICODATA_PU_ERROR;
1356b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
1357b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->procState = SA_STEPSTATE_PROCESS_WPHO;
1358b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1359b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                } else if (sa->headxLen == 0) {    /* no items in inBuf */
1360b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    PICODBG_WARN(("no items in inBuf"));
1361b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->procState = SA_STEPSTATE_COLLECT;
1362b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    return PICODATA_PU_BUSY;
1363b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
1364b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1365b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if defined (PICO_DEBUG)
1366b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                if (1) {
1367b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    picoos_uint8 i, j, ittype;
1368b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    for (i = 0; i < sa->headxLen; i++) {
1369b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        ittype = sa->headx[i].head.type;
1370b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_INFO_CTX();
1371b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_INFO_MSG(("sa-d: ("));
1372b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_INFO_MSG(("'%c',", ittype));
1373b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        if ((32 <= sa->headx[i].head.info1) &&
1374b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            (sa->headx[i].head.info1 < 127) &&
1375b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            (ittype != PICODATA_ITEM_WORDGRAPH) &&
1376b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            (ittype != PICODATA_ITEM_WORDINDEX)) {
1377b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1));
1378b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        } else {
1379b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1));
1380b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        }
1381b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        if ((32 <= sa->headx[i].head.info2) &&
1382b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            (sa->headx[i].head.info2 < 127)) {
1383b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2));
1384b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        } else {
1385b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2));
1386b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        }
1387b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len));
1388b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1389b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        for (j = 0; j < sa->headx[i].head.len; j++) {
1390b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            if ((ittype == PICODATA_ITEM_WORDGRAPH) ||
1391b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                (ittype == PICODATA_ITEM_CMD)) {
1392b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                PICODBG_INFO_MSG(("%c",
1393b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                        sa->cbuf1[sa->headx[i].cind+j]));
1394b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            } else {
1395b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                PICODBG_INFO_MSG(("%4d",
1396b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                        sa->cbuf1[sa->headx[i].cind+j]));
1397b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            }
1398b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        }
1399b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_INFO_MSG(("\n"));
1400b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
1401b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
1402b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
1403b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1404b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                break;
1405b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1406b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1407b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* *********************************************************/
1408b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* process wpho state: process items in headx/cbuf1 and modify
1409b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen             * headx in place and fill cbuf2
1410b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen             */
1411b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            case SA_STEPSTATE_PROCESS_WPHO:
1412b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* ensure there is an item in inBuf */
1413b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                if (sa->headxLen > 0) {
1414b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* we have a phrase in headx, cbuf1 (can be single
1415b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PUNC item), do lex lookup, g2p, or copy */
1416b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1417b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* check if cbuf2 is empty as it should be */
1418b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    if (sa->cbuf2Len > 0) {
1419b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        /* enforce emptyness */
1420b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_WARN(("forcing empty cbuf2, discarding buf"));
1421b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        picoos_emRaiseWarning(this->common->em,
1422b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                              PICO_WARN_PU_DISCARD_BUF,
1423b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                              NULL, NULL);
1424b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
1425b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1426b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* cbuf2 overflow avoided in saGrapheme*, saLexInd*,
1427b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       saCopyItem*, phones skipped if needed */
1428b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    for (i = 0; i < sa->headxLen; i++) {
1429b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        switch (sa->headx[i].head.type) {
1430b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            case PICODATA_ITEM_WORDGRAPH:
1431b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                if (PICO_OK != saGraphemeToPhoneme(this, sa,
1432b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                                                   i)) {
1433b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    /* not possible, phones skipped if needed */
1434b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    picoos_emRaiseException(this->common->em,
1435b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                                            PICO_ERR_OTHER,
1436b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                                            NULL, NULL);
1437b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    return PICODATA_PU_ERROR;
1438b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                }
1439b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                break;
1440b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            case PICODATA_ITEM_WORDINDEX:
1441b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                if (0 == sa->headx[i].head.info2) {
1442b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                  lex = sa->lex;
1443b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                } else {
1444b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    lex = sa->ulex[sa->headx[i].head.info2-1];
1445b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                }
1446b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                if (PICO_OK != saLexIndLookup(this, sa, lex, i)) {
1447b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    /* not possible, phones skipped if needed */
1448b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    picoos_emRaiseException(this->common->em,
1449b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                                            PICO_ERR_OTHER,
1450b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                                            NULL, NULL);
1451b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    return PICODATA_PU_ERROR;
1452b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                }
1453b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                break;
1454b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            default:
1455b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                /* copy item unmodified, ie. headx untouched,
1456b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                   content from cbuf1 to cbuf2 */
1457b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                if (PICO_OK != saCopyItemContent1to2(this, sa,
1458b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                                                     i)) {
1459b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    /* not possible, phones skipped if needed */
1460b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    picoos_emRaiseException(this->common->em,
1461b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                                            PICO_ERR_OTHER,
1462b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                                            NULL, NULL);
1463b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    return PICODATA_PU_ERROR;
1464b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                }
1465b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                break;
1466b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        }
1467b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
1468b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* set cbuf1 to empty */
1469b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->cbuf1Len = 0;
1470b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE;
1471b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1472b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                } else if (sa->headxLen == 0) {    /* no items in inBuf */
1473b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    PICODBG_WARN(("no items in inBuf"));
1474b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->procState = SA_STEPSTATE_COLLECT;
1475b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    return PICODATA_PU_BUSY;
1476b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
1477b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1478b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if defined (PICO_DEBUG)
1479b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                if (1) {
1480b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    picoos_uint8 i, j, ittype;
1481b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    for (i = 0; i < sa->headxLen; i++) {
1482b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        ittype = sa->headx[i].head.type;
1483b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_INFO_CTX();
1484b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_INFO_MSG(("sa-g: ("));
1485b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_INFO_MSG(("'%c',", ittype));
1486b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        if ((32 <= sa->headx[i].head.info1) &&
1487b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            (sa->headx[i].head.info1 < 127) &&
1488b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            (ittype != PICODATA_ITEM_WORDPHON)) {
1489b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1));
1490b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        } else {
1491b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1));
1492b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        }
1493b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        if ((32 <= sa->headx[i].head.info2) &&
1494b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            (sa->headx[i].head.info2 < 127)) {
1495b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2));
1496b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        } else {
1497b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2));
1498b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        }
1499b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len));
1500b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1501b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        for (j = 0; j < sa->headx[i].head.len; j++) {
1502b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            if ((ittype == PICODATA_ITEM_CMD)) {
1503b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                PICODBG_INFO_MSG(("%c",
1504b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                        sa->cbuf2[sa->headx[i].cind+j]));
1505b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            } else {
1506b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                PICODBG_INFO_MSG(("%4d",
1507b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                        sa->cbuf2[sa->headx[i].cind+j]));
1508b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            }
1509b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        }
1510b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICODBG_INFO_MSG(("\n"));
1511b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
1512b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
1513b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
1514b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1515b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                break;
1516b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1517b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1518b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* *********************************************************/
1519b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* transduction parse state: extract phonemes of item in internal outBuf */
1520b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen           case SA_STEPSTATE_PROCESS_TRNS_PARSE:
1521b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1522b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_DEBUG(("transduce item (bot, remain): (%d, %d)",
1523b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                sa->headxBottom, sa->headxLen));
1524b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1525b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* check for termination condition first */
1526b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                if (0 == sa->headxLen) {
1527b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* reset headx, cbuf2 */
1528b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->headxBottom = 0;
1529b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->cbuf2Len = 0;
1530b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* reset collect state support variables */
1531b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->inspaceok = TRUE;
1532b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->needsmoreitems = TRUE;
1533b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1534b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    sa->procState = SA_STEPSTATE_COLLECT;
1535b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    return PICODATA_PU_BUSY;
1536b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
1537b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1538b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                sa->procState = SA_STEPSTATE_FEED;
1539b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* copy item unmodified */
1540b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                rv = picodata_put_itemparts(
1541b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        &(sa->headx[sa->headxBottom].head),
1542b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        &(sa->cbuf2[sa->headx[sa->headxBottom].cind]),
1543b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->headx[sa->headxBottom].head.len, sa->tmpbuf,
1544b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        PICOSA_MAXITEMSIZE, &blen);
1545b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1546b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                if (PICODATA_ITEM_WORDPHON == sa->headx[sa->headxBottom].head.type) {
1547b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   PICODBG_DEBUG(("PARSE found WORDPHON"));
1548b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   rv = saExtractPhonemes(this, sa, 0, &(sa->headx[sa->headxBottom].head),
1549b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                           &(sa->cbuf2[sa->headx[sa->headxBottom].cind]));
1550b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   if (PICO_OK == rv) {
1551b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_DEBUG(("PARSE successfully returned from phoneme extraction"));
1552b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       sa->procState = SA_STEPSTATE_PROCESS_TRNS_FST;
1553b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   } else {
1554b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_WARN(("PARSE phone extraction returned exception %i, output WORDPHON untransduced",rv));
1555b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   }
1556b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               } else {
1557b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   PICODBG_DEBUG(("PARSE found other item, just copying"));
1558b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               }
1559b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               if (SA_STEPSTATE_FEED == sa->procState) {
1560b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1561b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            (picoos_uint8 *)"sa-p: ",
1562b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            sa->tmpbuf, PICOSA_MAXITEMSIZE);
1563b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1564b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
1565b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1566b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* consume item */
1567b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                sa->headxBottom++;
1568b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                sa->headxLen--;
1569b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1570b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                break;
1571b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1572b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* *********************************************************/
1573b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* transduce state: copy item in internal outBuf to tmpBuf and transduce */
1574b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen           case SA_STEPSTATE_PROCESS_TRNS_FST:
1575b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1576b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1577b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1578b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1579b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1580b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               /* if no word-level FSTs: doing trivial syllabification instead */
1581b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               if (0 == sa->numFsts) {
1582b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   PICODBG_DEBUG(("doing trivial sylabification with %i phones", sa->phonWritePos));
1583b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if defined(PICO_DEBUG)
1584b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   {
1585b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_INFO_CTX();
1586b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_INFO_MSG(("sa trying to trivially syllabify: "));
1587b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos);
1588b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_INFO_MSG(("\n"));
1589b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   }
1590b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
1591b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1592b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   picotrns_trivial_syllabify(sa->tabphones, sa->phonBuf,
1593b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                           sa->phonWritePos, sa->phonBufOut,
1594b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                           &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
1595b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   PICODBG_DEBUG(("returned from trivial sylabification with %i phones", sa->phonWritePos));
1596b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if defined(PICO_DEBUG)
1597b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   {
1598b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_INFO_CTX();
1599b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_INFO_MSG(("sa returned from syllabification: "));
1600b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos);
1601b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_INFO_MSG(("\n"));
1602b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   }
1603b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
1604b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1605b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   /* eliminate deep epsilons */
1606b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   PICODBG_DEBUG(("doing epsilon elimination with %i phones", sa->phonWritePos));
1607b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   picotrns_eliminate_epsilons(sa->phonBufOut,
1608b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                           sa->phonWritePos, sa->phonBuf,
1609b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                           &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
1610b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   PICODBG_DEBUG(("returning from epsilon elimination with %i phones", sa->phonWritePos));
1611b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   sa->phonReadPos = 0;
1612b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   sa->phonesTransduced = 1;
1613b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   sa->procState = SA_STEPSTATE_FEED;
1614b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   break;
1615b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               }
1616b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1617b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               /* there are word-level FSTs */
1618b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               /* termination condition first */
1619b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               if (sa->curFst >= sa->numFsts) {
1620b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   /* reset for next transduction */
1621b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   sa->curFst = 0;
1622b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   sa->phonReadPos = 0;
1623b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   sa->phonesTransduced = 1;
1624b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   sa->procState = SA_STEPSTATE_FEED;
1625b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   break;
1626b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               }
1627b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1628b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               /* transduce from phonBufIn to PhonBufOut */
1629b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               {
1630b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1631b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   picoos_uint32 nrSteps;
1632b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if defined(PICO_DEBUG)
1633b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   {
1634b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_INFO_CTX();
1635b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_INFO_MSG(("sa trying to transduce: "));
1636b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos);
1637b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_INFO_MSG(("\n"));
1638b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   }
1639b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
1640b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   picotrns_transduce(sa->fst[sa->curFst], FALSE,
1641b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                           picotrns_printSolution, sa->phonBuf, sa->phonWritePos, sa->phonBufOut,
1642b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                           &sa->phonWritePos,
1643b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                           PICOTRNS_MAX_NUM_POSSYM, sa->altDescBuf,
1644b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                           sa->maxAltDescLen, &nrSteps);
1645b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if defined(PICO_DEBUG)
1646b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   {
1647b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_INFO_CTX();
1648b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_INFO_MSG(("sa returned from transduction: "));
1649b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos);
1650b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_INFO_MSG(("\n"));
1651b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   }
1652b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
1653b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               }
1654b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1655b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1656b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1657b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               /*
1658b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                The trasduction output will contain equivalent items i.e. (x,y')  for each (x,y) plus inserted deep symbols (-1,d).
1659b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                In case of deletions, (x,0) might also be omitted...
1660b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                */
1661b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               /* eliminate deep epsilons */
1662b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               picotrns_eliminate_epsilons(sa->phonBufOut,
1663b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       sa->phonWritePos, sa->phonBuf, &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
1664b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               sa->phonesTransduced = 1;
1665b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1666b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               sa->curFst++;
1667b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1668b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               return PICODATA_PU_ATOMIC;
1669b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               /* break; */
1670b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1671b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* *********************************************************/
1672b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* feed state: copy item in internal outBuf to output charBuf */
1673b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1674b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen           case SA_STEPSTATE_FEED:
1675b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1676b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               PICODBG_DEBUG(("FEED"));
1677b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1678b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               if (sa->phonesTransduced) {
1679b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   /* replace original phones by transduced */
1680b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   picoos_uint16 phonWritePos = PICODATA_ITEM_HEADSIZE;
1681b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   picoos_uint8 plane;
1682b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   picoos_int16 sym, pos;
1683b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   while (SA_POSSYM_OK == (rv = getNextPosSym(sa,&pos,&sym,sa->nextReadPos))) {
1684b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_TRACE(("FEED inserting phoneme %c into inBuf[%i]",sym,phonWritePos));
1685b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       sym = picotrns_unplane(sym, &plane);
1686b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_ASSERT((PICOKFST_PLANE_PHONEMES == plane));
1687b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       sa->tmpbuf[phonWritePos++] = (picoos_uint8) sym;
1688b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   }
1689b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   PICODBG_DEBUG(("FEED setting item length to %i",phonWritePos - PICODATA_ITEM_HEADSIZE));
1690b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   picodata_set_itemlen(sa->tmpbuf,PICODATA_ITEM_HEADSIZE,phonWritePos - PICODATA_ITEM_HEADSIZE);
1691b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   if (SA_POSSYM_INVALID == rv) {
1692b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       PICODBG_ERROR(("FEED unexpected symbol or unexpected end of phoneme list"));
1693b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       return (picodata_step_result_t)picoos_emRaiseException(this->common->em, PICO_WARN_INCOMPLETE, NULL, NULL);
1694b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   }
1695b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   sa->phonesTransduced = 0;
1696b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1697b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               } /* if (sa->phonesTransduced) */
1698b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1699b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1700b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                rvP = picodata_cbPutItem(this->cbOut, sa->tmpbuf,
1701b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICOSA_MAXITEMSIZE, &clen);
1702b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1703b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                *numBytesOutput += clen;
1704b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1705b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODBG_DEBUG(("put item, status: %d", rvP));
1706b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1707b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                if (rvP == PICO_OK) {
1708b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                } else if (rvP == PICO_EXC_BUF_OVERFLOW) {
1709b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* try again next time */
1710b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    PICODBG_DEBUG(("feeding overflow"));
1711b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    return PICODATA_PU_OUT_FULL;
1712b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                } else {
1713b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* error, should never happen */
1714b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    PICODBG_ERROR(("untreated return value, rvP: %d", rvP));
1715b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    return PICODATA_PU_ERROR;
1716b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
1717b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1718b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1719b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        (picoos_uint8 *)"sana: ",
1720b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        sa->tmpbuf, PICOSA_MAXITEMSIZE);
1721b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1722b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE;
1723b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* return PICODATA_PU_BUSY; */
1724b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                break;
1725b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1726b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            default:
1727b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                break;
1728b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        } /* switch */
1729b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1730b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } /* while */
1731b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1732b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* should be never reached */
1733b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_ERROR(("reached end of function"));
1734b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL);
1735b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return PICODATA_PU_ERROR;
1736b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
1737b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1738b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus
1739b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
1740b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
1741b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1742b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
1743b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* end */
1744