1/*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/**
17 * @file picosa.c
18 *
19 * sentence analysis - POS disambiguation
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28
29#include "picoos.h"
30#include "picodbg.h"
31#include "picobase.h"
32#include "picokdt.h"
33#include "picoklex.h"
34#include "picoktab.h"
35#include "picokfst.h"
36#include "picotrns.h"
37#include "picodata.h"
38#include "picosa.h"
39
40#ifdef __cplusplus
41extern "C" {
42#endif
43#if 0
44}
45#endif
46
47
48/* PU saStep states */
49#define SA_STEPSTATE_COLLECT       0
50#define SA_STEPSTATE_PROCESS_POSD 10
51#define SA_STEPSTATE_PROCESS_WPHO 11
52#define SA_STEPSTATE_PROCESS_TRNS_PARSE 12
53#define SA_STEPSTATE_PROCESS_TRNS_FST 13
54#define SA_STEPSTATE_FEED          2
55
56#define SA_MAX_ALTDESC_SIZE (30*(PICOTRNS_MAX_NUM_POSSYM + 2))
57
58#define SA_MSGSTR_SIZE 32
59
60/*  subobject    : SentAnaUnit
61 *  shortcut     : sa
62 *  context size : one phrase, max. 30 non-PUNC items, for non-processed items
63 *                 one item if internal input empty
64 */
65
66/** @addtogroup picosa
67
68  internal buffers:
69
70  - headx: array for extended item heads of fixed size (head plus
71    index for content, plus two fields for boundary strength/type)
72
73  - cbuf1, cbuf2: buffers for item contents (referenced by index in
74    headx). Future: replace these two buffers by a single double-sided
75    buffer (double shrink-grow type)
76
77  0. bottom up filling of items in headx and cbuf1
78
79  1. POS disambiguation (right-to-left, top-to-bottom):
80  - number and sequence of items unchanged
81  - item content can only get smaller (reducing nr of results in WORDINDEX)
82  -> info stays in "headx, cbuf1" and changed in place                      \n
83     WORDGRAPH(POSes,NA)graph             -> WORDGRAPH(POS,NA)graph         \n
84     WORDINDEX(POSes,NA)POS1ind1...POSNindN  -> WORDINDEX(POS,NA)POS|ind    \n
85
86  2. lex-index lookup and G2P (both directions possible, left-to-right done):
87  - number and sequence of items unchanged, item head info and content
88    changes
89  -> headx changed in place; cbuf1 to cbuf2                                 \n
90     WORDGRAPH(POS,NA)graph    -> WORDPHON(POS,NA)phon                      \n
91     WORDINDEX(POS,NA)POS|ind  -> WORDPHON(POS,NA)phon                      \n
92
93  3. phrasing (right-to-left):
94
95     Previous (before introducing SBEG)\n
96     ----------------------------------
97                                           1|          2|             3|    4|    \n
98     e.g. from      WP WP WP       WP WP PUNC  WP WP PUNC  WP WP WP PUNC FLUSH    \n
99     e.g. to  BINIT WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND WP WP WP BSEND BTERM   \n
100              |1                         |2          |3             |4            \n
101
102     3-level bound state: to keep track of bound strength from end of
103     previous punc-phrase, then BOUND item output as first item
104     (strength from prev punc-phrase and type from current
105     punc-phrase).
106
107     trailing PUNC item       bound states
108                              INIT         SEND         PHR1
109       PUNC(SENTEND, T)       B(I,T)>SEND  B(S,T)>SEND  B(P1,T)>SEND
110       PUNC(SENTEND, Q)       B(I,Q)>SEND  B(S,Q)>SEND  B(P1,Q)>SEND
111       PUNC(SENTEND, E)       B(I,E)>SEND  B(S,E)>SEND  B(P1,E)>SEND
112       PUNC(PHRASEEND, P)     B(I,P)>PHR1  B(S,P)>PHR1  B(P1,P)>PHR1
113       PUNC(PHRASEEND, FORC)  B(I,P)>PHR1  B(S,P)>PHR1  B(P1,P)>PHR1
114       PUNC(FLUSH, T)         B(I,T)..     B(S,T)..     B(P1,T)..
115                                B(T,NA)      B(T,NA)      B(T,NA)
116                                >INIT        >INIT        >INIT
117
118     PHR2/3 case:
119     trailing PUNC item       bound states
120                          INIT              SEND              PHR1
121       PUNC(SENTEND, T)   B(I,P)B(P,T)>SEND B(S,P)B(P,T)>SEND B(P1,P)B(P,T)>SEND
122       PUNC(SENTEND, Q)   B(I,P)B(P,Q)>SEND B(S,P)B(P,Q)>SEND B(P1,P)B(P,Q)>SEND
123       PUNC(SENTEND, E)   B(I,P)B(P,E)>SEND B(S,P)B(P,E)>SEND B(P1,P)B(P,E)>SEND
124       PUNC(PHRASEEND, P) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1
125       PUNC(PHREND, FORC) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1
126       PUNC(FLUSH, T)     B(I,P)B(P,T)..    B(S,T)B(P,T)..    B(P1,T)B(P,T)..
127                            B(T,NA)             B(T,NA)             B(T,NA)
128                            >INIT               >INIT               >INIT
129
130     Current
131     --------
132     e.g. from      WP WP WP       WP WP PUNC  WP WP PUNC        WP WP WP PUNC  FLUSH
133     e.g. to  BSBEG WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND BSBEG WP WP WP BSEND BTERM
134              |1                         |2                |3                   |4
135
136     2-level bound state: The internal buffer contains one primary phrase (sometimes forced, if buffer
137     allmost full), with the trailing PUNCT item included (last item).
138     If the trailing PUNC is a a primary phrase separator, the
139       item is not output, but instead, the bound state is set to PPHR, so that the correct BOUND can
140       be output at the start of the next primary phrase.
141     Otherwise,
142       the item is converted to the corresponding BOUND and output. the bound state is set to SSEP,
143       so that a BOUND of type SBEG is output at the start of the next primary phrase.
144
145     trailing PUNC item       bound states
146                              SSEP           PPHR
147       PUNC(SENTEND, X)       B(B,X)>SSEP    B(P1,X)>SSEP  (X = T | Q | E)
148       PUNC(FLUSH, T)         B(B,T)>SSEP*    B(P1,T)>SSEP
149       PUNC(PHRASEEND, P)     B(B,P)>PPHR    B(P1,P)>PPHR
150       PUNC(PHRASEEND, FORC)  B(B,P)>PPHR    B(P1,P)>PPHR
151
152*    If more than one sentence separators follow each other (e.g. SEND-FLUSH, SEND-SEND) then
153     all but the first will be treated as an (empty) phrase containing just this item.
154     If this (single) item is a flush, creation of SBEG is suppressed.
155
156
157  - dtphr phrasing tree (rather subphrasing tree it should be called)
158    determines
159      BOUND_PHR2
160      BOUND_PHR3
161  - boundary strenghts are determined for every word (except the
162    first one) from right-to-left. The boundary types mark the phrase
163    type of the phrase following the boundary.
164  - number of items actually changed (new BOUND items added): because
165    of fixed size without content, two fields are contained in headx
166    to indicate if a BOUND needs to be added to the LEFT of the item.
167    -> headx further extended with boundary strength and type info to
168    indicate that to the left of the headx ele a BOUND needs to be
169    inserted when outputting.
170
171  4. accentuation:
172  - number of items unchanged, content unchanged, only head info changes
173  -> changed in place in headx
174*/
175
176
177typedef struct {
178    picodata_itemhead_t head;
179    picoos_uint16 cind;
180} picosa_headx_t;
181
182
183typedef struct sa_subobj {
184    picoos_uint8 procState; /* for next processing step decision */
185
186    picoos_uint8 inspaceok;      /* flag: headx/cbuf1 has space for an item */
187    picoos_uint8 needsmoreitems; /* flag: need more items */
188    picoos_uint8 phonesTransduced; /* flag: */
189
190    picoos_uint8 tmpbuf[PICODATA_MAX_ITEMSIZE];  /* tmp. location for an item */
191
192    picosa_headx_t headx[PICOSA_MAXNR_HEADX];
193    picoos_uint16 headxBottom; /* bottom */
194    picoos_uint16 headxLen;    /* length, 0 if empty */
195
196    picoos_uint8 cbuf1[PICOSA_MAXSIZE_CBUF];
197    picoos_uint16 cbuf1BufSize; /* actually allocated size */
198    picoos_uint16 cbuf1Len;     /* length, 0 if empty */
199
200    picoos_uint8 cbuf2[PICOSA_MAXSIZE_CBUF];
201    picoos_uint16 cbuf2BufSize; /* actually allocated size */
202    picoos_uint16 cbuf2Len;     /* length, 0 if empty */
203
204    picotrns_possym_t phonBufA[PICOTRNS_MAX_NUM_POSSYM+1];
205    picotrns_possym_t phonBufB[PICOTRNS_MAX_NUM_POSSYM+1];
206    picotrns_possym_t * phonBuf;
207    picotrns_possym_t * phonBufOut;
208    picoos_uint16 phonReadPos, phonWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */
209    picoos_uint16 nextReadPos; /* position of (potential) next item to read from */
210
211
212    /* buffer for internal calculation of transducer */
213    picotrns_AltDesc altDescBuf;
214    /* the number of AltDesc in the buffer */
215    picoos_uint16 maxAltDescLen;
216
217    /* tab knowledge base */
218    picoktab_Graphs tabgraphs;
219    picoktab_Phones tabphones;
220    picoktab_Pos tabpos;
221    picoktab_FixedIds fixedIds;
222
223    /* dtposd knowledge base */
224    picokdt_DtPosD dtposd;
225
226    /* dtg2p knowledge base */
227    picokdt_DtG2P dtg2p;
228
229    /* lex knowledge base */
230    picoklex_Lex lex;
231
232    /* ulex knowledge bases */
233    picoos_uint8 numUlex;
234    picoklex_Lex ulex[PICOKNOW_MAX_NUM_ULEX];
235
236    /* fst knowledge bases */
237    picoos_uint8 numFsts;
238    picokfst_FST fst[PICOKNOW_MAX_NUM_WPHO_FSTS];
239    picoos_uint8 curFst; /* the fst to be applied next */
240
241
242} sa_subobj_t;
243
244
245static pico_status_t saInitialize(register picodata_ProcessingUnit this, picoos_int32 resetMode) {
246    sa_subobj_t * sa;
247    picoos_uint16 i;
248    picokfst_FST fst;
249    picoknow_kb_id_t fstKbIds[PICOKNOW_MAX_NUM_WPHO_FSTS] = PICOKNOW_KBID_WPHO_ARRAY;
250    picoklex_Lex ulex;
251    picoknow_kb_id_t ulexKbIds[PICOKNOW_MAX_NUM_ULEX] = PICOKNOW_KBID_ULEX_ARRAY;
252
253    PICODBG_DEBUG(("calling"));
254
255    if (NULL == this || NULL == this->subObj) {
256        return picoos_emRaiseException(this->common->em,
257                                       PICO_ERR_NULLPTR_ACCESS, NULL, NULL);
258    }
259    sa = (sa_subobj_t *) this->subObj;
260
261    /*  sa->common = this->common; */
262
263    sa->procState = SA_STEPSTATE_COLLECT;
264
265    sa->inspaceok = TRUE;
266    sa->needsmoreitems = TRUE;
267
268    sa->headxBottom = 0;
269    sa->headxLen = 0;
270    sa->cbuf1BufSize = PICOSA_MAXSIZE_CBUF;
271    sa->cbuf2BufSize = PICOSA_MAXSIZE_CBUF;
272    sa->cbuf1Len = 0;
273    sa->cbuf2Len = 0;
274
275    /* init headx, cbuf1, cbuf2 */
276    for (i = 0; i < PICOSA_MAXNR_HEADX; i++){
277        sa->headx[i].head.type = 0;
278        sa->headx[i].head.info1 = PICODATA_ITEMINFO1_NA;
279        sa->headx[i].head.info2 = PICODATA_ITEMINFO2_NA;
280        sa->headx[i].head.len = 0;
281        sa->headx[i].cind = 0;
282    }
283    for (i = 0; i < PICOSA_MAXSIZE_CBUF; i++) {
284        sa->cbuf1[i] = 0;
285        sa->cbuf2[i] = 0;
286    }
287
288
289    /* possym buffer */
290    sa->phonesTransduced = FALSE;
291    sa->phonBuf = sa->phonBufA;
292    sa->phonBufOut = sa->phonBufB;
293    sa->phonReadPos = 0;
294    sa->phonWritePos = 0;
295    sa->nextReadPos = 0;
296
297    if (resetMode == PICO_RESET_SOFT) {
298        /*following initializations needed only at startup or after a full reset*/
299        return PICO_OK;
300    }
301
302    /* kb fst[] */
303    sa->numFsts = 0;
304    for (i = 0; i<PICOKNOW_MAX_NUM_WPHO_FSTS; i++) {
305        fst = picokfst_getFST(this->voice->kbArray[fstKbIds[i]]);
306        if (NULL != fst) {
307            sa->fst[sa->numFsts++] = fst;
308        }
309    }
310    sa->curFst = 0;
311    PICODBG_DEBUG(("got %i fsts", sa->numFsts));
312    /* kb fixedIds */
313    sa->fixedIds = picoktab_getFixedIds(this->voice->kbArray[PICOKNOW_KBID_FIXED_IDS]);
314
315    /* kb tabgraphs */
316    sa->tabgraphs =
317        picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]);
318    if (sa->tabgraphs == NULL) {
319        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
320                                       NULL, NULL);
321    }
322    PICODBG_DEBUG(("got tabgraphs"));
323
324    /* kb tabphones */
325    sa->tabphones =
326        picoktab_getPhones(this->voice->kbArray[PICOKNOW_KBID_TAB_PHONES]);
327    if (sa->tabphones == NULL) {
328        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
329                                       NULL, NULL);
330    }
331    PICODBG_DEBUG(("got tabphones"));
332
333#ifdef PICO_DEBU
334    {
335        picoos_uint16 itmp;
336        for (itmp = 0; itmp < 256; itmp++) {
337            if (picoktab_hasVowelProp(sa->tabphones, itmp)) {
338                PICODBG_DEBUG(("tabphones hasVowel: %d", itmp));
339            }
340            if (picoktab_hasDiphthProp(sa->tabphones, itmp)) {
341                PICODBG_DEBUG(("tabphones hasDiphth: %d", itmp));
342            }
343            if (picoktab_hasGlottProp(sa->tabphones, itmp)) {
344                PICODBG_DEBUG(("tabphones hasGlott: %d", itmp));
345            }
346            if (picoktab_hasNonsyllvowelProp(sa->tabphones, itmp)) {
347                PICODBG_DEBUG(("tabphones hasNonsyllvowel: %d", itmp));
348            }
349            if (picoktab_hasSyllconsProp(sa->tabphones, itmp)) {
350                PICODBG_DEBUG(("tabphones hasSyllcons: %d", itmp));
351            }
352            if (picoktab_isPrimstress(sa->tabphones, itmp)) {
353                PICODBG_DEBUG(("tabphones isPrimstress: %d", itmp));
354            }
355            if (picoktab_isSecstress(sa->tabphones, itmp)) {
356                PICODBG_DEBUG(("tabphones isSecstress: %d", itmp));
357            }
358            if (picoktab_isSyllbound(sa->tabphones, itmp)) {
359                PICODBG_DEBUG(("tabphones isSyllbound: %d", itmp));
360            }
361            if (picoktab_isPause(sa->tabphones, itmp)) {
362                PICODBG_DEBUG(("tabphones isPause: %d", itmp));
363            }
364        }
365
366        PICODBG_DEBUG(("tabphones primstressID: %d",
367                       picoktab_getPrimstressID(sa->tabphones)));
368        PICODBG_DEBUG(("tabphones secstressID: %d",
369                       picoktab_getSecstressID(sa->tabphones)));
370        PICODBG_DEBUG(("tabphones syllboundID: %d",
371                       picoktab_getSyllboundID(sa->tabphones)));
372        PICODBG_DEBUG(("tabphones pauseID: %d",
373                       picoktab_getPauseID(sa->tabphones)));
374    }
375#endif
376
377    /* kb tabpos */
378    sa->tabpos =
379        picoktab_getPos(this->voice->kbArray[PICOKNOW_KBID_TAB_POS]);
380    if (sa->tabpos == NULL) {
381        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
382                                       NULL, NULL);
383    }
384    PICODBG_DEBUG(("got tabpos"));
385
386    /* kb dtposd */
387    sa->dtposd = picokdt_getDtPosD(this->voice->kbArray[PICOKNOW_KBID_DT_POSD]);
388    if (sa->dtposd == NULL) {
389        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
390                                       NULL, NULL);
391    }
392    PICODBG_DEBUG(("got dtposd"));
393
394    /* kb dtg2p */
395    sa->dtg2p = picokdt_getDtG2P(this->voice->kbArray[PICOKNOW_KBID_DT_G2P]);
396    if (sa->dtg2p == NULL) {
397        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
398                                       NULL, NULL);
399    }
400    PICODBG_DEBUG(("got dtg2p"));
401
402    /* kb lex */
403    sa->lex = picoklex_getLex(this->voice->kbArray[PICOKNOW_KBID_LEX_MAIN]);
404    if (sa->lex == NULL) {
405        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
406                                       NULL, NULL);
407    }
408    PICODBG_DEBUG(("got lex"));
409
410    /* kb ulex[] */
411    sa->numUlex = 0;
412    for (i = 0; i<PICOKNOW_MAX_NUM_ULEX; i++) {
413        ulex = picoklex_getLex(this->voice->kbArray[ulexKbIds[i]]);
414        if (NULL != ulex) {
415            sa->ulex[sa->numUlex++] = ulex;
416        }
417    }
418    PICODBG_DEBUG(("got %i user lexica", sa->numUlex));
419
420    return PICO_OK;
421}
422
423static picodata_step_result_t saStep(register picodata_ProcessingUnit this,
424                                     picoos_int16 mode,
425                                     picoos_uint16 *numBytesOutput);
426
427static pico_status_t saTerminate(register picodata_ProcessingUnit this) {
428    return PICO_OK;
429}
430
431static pico_status_t saSubObjDeallocate(register picodata_ProcessingUnit this,
432                                        picoos_MemoryManager mm) {
433    sa_subobj_t * sa;
434    if (NULL != this) {
435        sa = (sa_subobj_t *) this->subObj;
436        picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf);
437        picoos_deallocate(mm, (void *) &this->subObj);
438    }
439    return PICO_OK;
440}
441
442
443picodata_ProcessingUnit picosa_newSentAnaUnit(picoos_MemoryManager mm,
444                                              picoos_Common common,
445                                              picodata_CharBuffer cbIn,
446                                              picodata_CharBuffer cbOut,
447                                              picorsrc_Voice voice) {
448    picodata_ProcessingUnit this;
449    sa_subobj_t * sa;
450    this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
451    if (this == NULL) {
452        return NULL;
453    }
454
455    this->initialize = saInitialize;
456    PICODBG_DEBUG(("set this->step to saStep"));
457    this->step = saStep;
458    this->terminate = saTerminate;
459    this->subDeallocate = saSubObjDeallocate;
460
461    this->subObj = picoos_allocate(mm, sizeof(sa_subobj_t));
462    if (this->subObj == NULL) {
463        picoos_deallocate(mm, (void *)&this);
464        picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
465        return NULL;
466    }
467
468    sa = (sa_subobj_t *) this->subObj;
469
470    sa->altDescBuf = picotrns_allocate_alt_desc_buf(mm, SA_MAX_ALTDESC_SIZE, &sa->maxAltDescLen);
471    if (NULL == sa->altDescBuf) {
472        picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf);
473        picoos_deallocate(mm, (void *)&sa);
474        picoos_deallocate(mm, (void *)&this);
475        picoos_emRaiseException(common->em,PICO_EXC_OUT_OF_MEM, NULL, NULL);
476    }
477
478
479    saInitialize(this, PICO_RESET_FULL);
480    return this;
481}
482
483
484/* ***********************************************************************/
485/* PROCESS_POSD disambiguation functions */
486/* ***********************************************************************/
487
488/* find next POS to the right of 'ind' and return its POS and index */
489static picoos_uint8 saPosDItemSeqGetPosRight(register picodata_ProcessingUnit this,
490                                            register sa_subobj_t *sa,
491                                            const picoos_uint16 ind,
492                                            const picoos_uint16 top,
493                                            picoos_uint16 *rightind) {
494    picoos_uint8 val;
495    picoos_int32 i;
496
497    val = PICOKDT_EPSILON;
498    for (i = ind + 1; ((val == PICOKDT_EPSILON) && (i < top)); i++) {
499        if ((sa->headx[i].head.type == PICODATA_ITEM_WORDGRAPH) ||
500                (sa->headx[i].head.type == PICODATA_ITEM_WORDINDEX)  ||
501                (sa->headx[i].head.type == PICODATA_ITEM_WORDPHON) ) {
502            val = sa->headx[i].head.info1;
503        }
504    }
505    *rightind = i - 1;
506    return val;
507}
508
509
510/* left-to-right, for each WORDGRAPH/WORDINDEX/WORDPHON do posd */
511static pico_status_t saDisambPos(register picodata_ProcessingUnit this,
512                                 register sa_subobj_t *sa) {
513    picokdt_classify_result_t dtres;
514    picoos_uint8 half_nratt_posd = PICOKDT_NRATT_POSD >> 1;
515    picoos_uint16 valbuf[PICOKDT_NRATT_POSD]; /* only [0..half_nratt_posd] can be >2^8 */
516    picoos_uint16 prevout;   /* direct dt output (hist.) or POS of prev word */
517    picoos_uint16 lastprev3; /* last index of POS(es) found to the left */
518    picoos_uint16 curPOS;     /* POS(es) of current word */
519    picoos_int32 first;    /* index of first item with POS(es) */
520    picoos_int32 ci;
521    picoos_uint8 okay;       /* two uses: processing okay and lexind resovled */
522    picoos_uint8 i;
523    picoos_uint16 inval;
524    picoos_uint16 fallback;
525
526    /* set initial values */
527    okay = TRUE;
528    prevout = PICOKDT_HISTORY_ZERO;
529    curPOS = PICODATA_ITEMINFO1_ERR;
530    first = 0;
531
532    while ((first < sa->headxLen) &&
533           (sa->headx[first].head.type != PICODATA_ITEM_WORDGRAPH) &&
534           (sa->headx[first].head.type != PICODATA_ITEM_WORDINDEX) &&
535           (sa->headx[first].head.type != PICODATA_ITEM_WORDPHON)) {
536        first++;
537    }
538    if (first >= sa->headxLen) {
539        /* phrase not containing an item with POSes info, e.g. single flush */
540        PICODBG_DEBUG(("no item with POSes found"));
541        return PICO_OK;
542    }
543
544    lastprev3 = first;
545
546    for (i = 0; i <= half_nratt_posd; i++) {
547        valbuf[i] = PICOKDT_HISTORY_ZERO;
548    }
549    /* set POS(es) of current word, will be shifted afterwards */
550    valbuf[half_nratt_posd+1] = sa->headx[first].head.info1;
551    for (i = half_nratt_posd+2; i < PICOKDT_NRATT_POSD; i++) {
552    /* find next POS to the right and set valbuf[i] */
553        valbuf[i] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3);
554    }
555
556    PICODBG_TRACE(("headxLen: %d", sa->headxLen));
557
558    /* process from left to right all items in headx */
559    for (ci = first; ci < sa->headxLen; ci++) {
560        okay = TRUE;
561
562        PICODBG_TRACE(("iter: %d, type: %c", ci, sa->headx[ci].head.type));
563
564        /* if not (WORDGRAPH or WORDINDEX) */
565        if ((sa->headx[ci].head.type != PICODATA_ITEM_WORDGRAPH) &&
566                (sa->headx[ci].head.type != PICODATA_ITEM_WORDINDEX)  &&
567                (sa->headx[ci].head.type != PICODATA_ITEM_WORDPHON)) {
568            continue;
569        }
570
571        PICODBG_TRACE(("iter: %d, curPOS: %d", ci, sa->headx[ci].head.info1));
572
573        /* no continue so far => at [ci] we have a WORDGRAPH / WORDINDEX item */
574        /* shift all elements one position to the left */
575        /* shift predicted values (history) */
576        for (i=1; i<half_nratt_posd; i++) {
577            valbuf[i-1] = valbuf[i];
578        }
579        /* insert previously predicted value (now history) */
580        valbuf[half_nratt_posd-1] = prevout;
581        /* shift not yet predicted values */
582        for (i=half_nratt_posd+1; i<PICOKDT_NRATT_POSD; i++) {
583            valbuf[i-1] = valbuf[i];
584        }
585        /* find next POS to the right and set valbuf[PICOKDT_NRATT_POSD-1] */
586        valbuf[PICOKDT_NRATT_POSD-1] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3);
587
588        /* just to be on the safe side; the following should never happen */
589        if (sa->headx[ci].head.info1 != valbuf[half_nratt_posd]) {
590            PICODBG_WARN(("syncing POS"));
591            picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
592                                  NULL, NULL);
593            valbuf[half_nratt_posd] = sa->headx[ci].head.info1;
594        }
595
596        curPOS = valbuf[half_nratt_posd];
597
598        /* Check if POS disambiguation not needed */
599        if (picoktab_isUniquePos(sa->tabpos, (picoos_uint8) curPOS)) {
600            /* not needed */
601            inval = 0;
602            fallback = 0;
603            if (!picokdt_dtPosDreverseMapOutFixed(sa->dtposd, curPOS,
604                                       &prevout, &fallback)) {
605                if (fallback) {
606                    prevout = fallback;
607
608                } else {
609                    PICODBG_ERROR(("problem doing reverse output mapping"));
610                    prevout = curPOS;
611                }
612            }
613            PICODBG_DEBUG(("keeping: %d", sa->headx[ci].head.info1));
614            continue;
615        }
616
617        /* assuming PICOKDT_NRATT_POSD == 7 */
618        PICODBG_DEBUG(("%d: [%d %d %d %d %d %d %d]",
619                       ci, valbuf[0], valbuf[1], valbuf[2],
620                       valbuf[3], valbuf[4], valbuf[5], valbuf[6]));
621
622        /* no continue so far => POS disambiguation needed */
623        /* construct input vector, which is set in dtposd */
624        if (!picokdt_dtPosDconstructInVec(sa->dtposd, valbuf)) {
625            /* error constructing invec */
626            PICODBG_WARN(("problem with invec"));
627            picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
628                                  NULL, NULL);
629            okay = FALSE;
630        }
631        /* classify */
632        if (okay && (!picokdt_dtPosDclassify(sa->dtposd, &prevout))) {
633            /* error doing classification */
634            PICODBG_WARN(("problem classifying"));
635            picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
636                                  NULL, NULL);
637            okay = FALSE;
638        }
639        /* decompose */
640        if (okay && (!picokdt_dtPosDdecomposeOutClass(sa->dtposd, &dtres))) {
641            /* error decomposing */
642            PICODBG_WARN(("problem decomposing"));
643            picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
644                                  NULL, NULL);
645            okay = FALSE;
646        }
647        if (okay && dtres.set) {
648            PICODBG_DEBUG(("in: %d, out: %d", valbuf[3], dtres.class));
649        } else {
650            PICODBG_WARN(("problem disambiguating POS"));
651            dtres.class = PICODATA_ITEMINFO1_ERR;
652        }
653
654        if (dtres.class > 255) {
655            PICODBG_WARN(("dt result outside valid range, setting pos to ERR"));
656            dtres.class = PICODATA_ITEMINFO1_ERR;
657        }
658
659        sa->headx[ci].head.info1 = (picoos_uint8)dtres.class;
660        if (sa->headx[ci].head.type == PICODATA_ITEM_WORDINDEX) {
661            /* find pos/ind entry in cbuf matching unique,
662               disambiguated POS, adapt current headx cind/len
663               accordingly */
664            PICODBG_DEBUG(("select phon based on POS disambiguation"));
665            okay = FALSE;
666            for (i = 0; i < sa->headx[ci].head.len; i += PICOKLEX_POSIND_SIZE) {
667                PICODBG_DEBUG(("comparing POS at cind + %d", i));
668                if (picoktab_isPartOfPosGroup(sa->tabpos,
669                            (picoos_uint8)dtres.class,
670                            sa->cbuf1[sa->headx[ci].cind + i])) {
671                    PICODBG_DEBUG(("found match for entry %d",
672                                   i/PICOKLEX_POSIND_SIZE + 1));
673                    sa->headx[ci].cind += i;
674                    okay = TRUE;
675                    break;
676                }
677            }
678            /* not finding a match is possible if posd predicts a POS that
679               is not part of any of the input POSes -> no warning */
680#if defined(PICO_DEBUG)
681            if (!okay) {
682                PICODBG_DEBUG(("no match found, selecting 1st entry"));
683            }
684#endif
685            sa->headx[ci].head.len = PICOKLEX_POSIND_SIZE;
686        }
687    }
688    return PICO_OK;
689}
690
691
692/* ***********************************************************************/
693/* PROCESS_WPHO functions, copy, lexindex, and g2p */
694/* ***********************************************************************/
695
696/* ************** copy ***************/
697
698static pico_status_t saCopyItemContent1to2(register picodata_ProcessingUnit this,
699                                           register sa_subobj_t *sa,
700                                           picoos_uint16 ind) {
701    picoos_uint16 i;
702    picoos_uint16 cind1;
703
704    /* set headx.cind, and copy content, head unchanged */
705    cind1 = sa->headx[ind].cind;
706    sa->headx[ind].cind = sa->cbuf2Len;
707
708    /* check cbufLen */
709    if (sa->headx[ind].head.len > (sa->cbuf2BufSize - sa->cbuf2Len)) {
710        sa->headx[ind].head.len = sa->cbuf2BufSize - sa->cbuf2Len;
711        PICODBG_WARN(("phones skipped"));
712        picoos_emRaiseWarning(this->common->em,
713                              PICO_WARN_INCOMPLETE, NULL, NULL);
714        if (sa->headx[ind].head.len == 0) {
715            sa->headx[ind].cind = 0;
716        }
717    }
718
719    for (i = 0; i < sa->headx[ind].head.len; i++) {
720        sa->cbuf2[sa->cbuf2Len] = sa->cbuf1[cind1 + i];
721        sa->cbuf2Len++;
722    }
723
724    PICODBG_DEBUG(("%c item, len: %d",
725                   sa->headx[ind].head.type, sa->headx[ind].head.len));
726
727    return PICO_OK;
728}
729
730
731/* ************** lexindex ***************/
732
733static pico_status_t saLexIndLookup(register picodata_ProcessingUnit this,
734                                    register sa_subobj_t *sa,
735                                    picoklex_Lex lex,
736                                    picoos_uint16 ind) {
737    picoos_uint8 pos;
738    picoos_uint8 *phones;
739    picoos_uint8 plen;
740    picoos_uint16 i;
741
742    if (picoklex_lexIndLookup(lex, &(sa->cbuf1[sa->headx[ind].cind + 1]),
743                              PICOKLEX_IND_SIZE, &pos, &phones, &plen)) {
744        sa->headx[ind].cind = sa->cbuf2Len;
745
746        /* check cbufLen */
747        if (plen > (sa->cbuf2BufSize - sa->cbuf2Len)) {
748            plen = sa->cbuf2BufSize - sa->cbuf2Len;
749            PICODBG_WARN(("phones skipped"));
750            picoos_emRaiseWarning(this->common->em,
751                                  PICO_WARN_INCOMPLETE, NULL, NULL);
752            if (plen == 0) {
753                sa->headx[ind].cind = 0;
754            }
755        }
756
757        /* set item head, info1, info2 unchanged */
758        sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON;
759        sa->headx[ind].head.len = plen;
760
761        for (i = 0; i < plen; i++) {
762            sa->cbuf2[sa->cbuf2Len] = phones[i];
763            sa->cbuf2Len++;
764        }
765
766        PICODBG_DEBUG(("%c item, pos: %d, plen: %d",
767                       PICODATA_ITEM_WORDPHON, pos, plen));
768
769    } else {
770        PICODBG_WARN(("lexIndLookup problem"));
771        picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM,
772                              NULL, NULL);
773    }
774    return PICO_OK;
775}
776
777
778
779/* ************** g2p ***************/
780
781
782/* Name    :   saGetNvowel
783   Function:   returns vowel info in a word or word seq
784   Input   :   sInChar         the grapheme string to be converted in phoneme
785               inLen           number of bytes in grapheme buffer
786               inPos           start position of current grapheme (0..inLen-1)
787   Output  :   nVow            number of vowels in the word
788               nVord           vowel order in the word
789   Returns :   TRUE: processing successful;  FALSE: errors
790*/
791static picoos_uint8 saGetNrVowel(register picodata_ProcessingUnit this,
792                                 register sa_subobj_t *sa,
793                                 const picoos_uint8 *sInChar,
794                                 const picoos_uint16 inLen,
795                                 const picoos_uint8 inPos,
796                                 picoos_uint8 *nVow,
797                                 picoos_uint8 *nVord) {
798    picoos_uint32 nCount;
799    picoos_uint32 pos;
800    picoos_uint8 cstr[PICOBASE_UTF8_MAXLEN + 1];
801
802    /*defaults*/
803    *nVow = 0;
804    *nVord = 0;
805    /*1:check wether the current char is a vowel*/
806    pos = inPos;
807    if (!picobase_get_next_utf8char(sInChar, inLen, &pos, cstr) ||
808        !picoktab_hasVowellikeProp(sa->tabgraphs, cstr, PICOBASE_UTF8_MAXLEN)) {
809        return FALSE;
810    }
811    /*2:count number of vowels in current word and find vowel order*/
812    for (nCount = 0; nCount < inLen; ) {
813      if (!picobase_get_next_utf8char(sInChar, inLen, &nCount, cstr)) {
814            return FALSE;
815      }
816        if (picoktab_hasVowellikeProp(sa->tabgraphs, cstr,
817                                      PICOBASE_UTF8_MAXLEN)) {
818            (*nVow)++;
819            if (nCount == pos) {
820                (*nVord) = (*nVow);
821        }
822        }
823    }
824    return TRUE;
825}
826
827
828/* do g2p for a full word, right-to-left */
829static picoos_uint8 saDoG2P(register picodata_ProcessingUnit this,
830                            register sa_subobj_t *sa,
831                            const picoos_uint8 *graph,
832                            const picoos_uint8 graphlen,
833                            const picoos_uint8 pos,
834                            picoos_uint8 *phones,
835                            const picoos_uint16 phonesmaxlen,
836                            picoos_uint16 *plen) {
837    picoos_uint16 outNp1Ch; /*last 3 outputs produced*/
838    picoos_uint16 outNp2Ch;
839    picoos_uint16 outNp3Ch;
840    picoos_uint8 nPrimary;
841    picoos_uint8 nCount;
842    picoos_uint32 utfpos;
843    picoos_uint16 nOutVal;
844    picoos_uint8 okay;
845    picoos_uint16 phonesind;
846    picoos_uint8 nrvow;
847    picoos_uint8 ordvow;
848    picokdt_classify_vecresult_t dtresv;
849    picoos_uint16 i;
850
851    *plen = 0;
852    okay = TRUE;
853
854    /* use sa->tmpbuf[PICOSA_MAXITEMSIZE] to temporarly store the
855       phones which are predicted in reverse order. Once all are
856       available put them in phones in usuable order. phonesind is
857       used to fille item in reverse order starting at the end of
858       tmpbuf. */
859    phonesind = PICOSA_MAXITEMSIZE - 1;
860
861    /* prepare the data for loop operations */
862    outNp1Ch = PICOKDT_HISTORY_ZERO;
863    outNp2Ch = PICOKDT_HISTORY_ZERO;
864    outNp3Ch = PICOKDT_HISTORY_ZERO;
865
866    /* inner loop */
867    nPrimary = 0;
868
869    /* ************************************************/
870    /* go backward grapheme by grapheme, it's utf8... */
871    /* ************************************************/
872
873    /* set start nCount to position of start of last utfchar */
874    /* ! watch out! somethimes starting at 1, sometimes at 0,
875       ! sometimes counting per byte, sometimes per UTF8 char */
876    /* nCount is (start position + 1) of utf8 char */
877    utfpos = graphlen;
878    if (picobase_get_prev_utf8charpos(graph, 0, &utfpos)) {
879        nCount = utfpos + 1;
880    } else {
881        /* should not occurr */
882        PICODBG_ERROR(("invalid utf8 string, graphlen: %d", graphlen));
883        return FALSE;
884    }
885
886    while (nCount > 0) {
887        PICODBG_TRACE(("right-to-left g2p, count: %d", nCount));
888        okay = TRUE;
889
890        if (!saGetNrVowel(this, sa, graph, graphlen, nCount-1, &nrvow,
891                          &ordvow)) {
892            nrvow = 0;
893            ordvow = 0;
894        }
895
896        /* prepare input vector, set inside tree object invec,
897         * g2pBuildVector will call the constructInVec tree method */
898        if (!picokdt_dtG2PconstructInVec(sa->dtg2p,
899                                         graph, /*grapheme start*/
900                                         graphlen, /*grapheme length*/
901                                         nCount-1, /*grapheme current position*/
902                                         pos, /*Word POS*/
903                                         nrvow, /*nr vowels if vowel, 0 else */
904                                         ordvow, /*ord of vowel if vowel, 0 el*/
905                                         &nPrimary,  /*primary stress flag*/
906                                         outNp1Ch, /*Right phoneme context +1*/
907                                         outNp2Ch, /*Right phoneme context +2*/
908                                         outNp3Ch)) { /*Right phon context +3*/
909            /*Errors in preparing the input vector : skip processing*/
910            PICODBG_WARN(("problem with invec"));
911            picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
912                                  NULL, NULL);
913            okay = FALSE;
914        }
915
916        /* classify using the invec in the tree object and save the direct
917           tree output also in the tree object */
918        if (okay && (!picokdt_dtG2Pclassify(sa->dtg2p, &nOutVal))) {
919            /* error doing classification */
920            PICODBG_WARN(("problem classifying"));
921            picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
922                                  NULL, NULL);
923            okay = FALSE;
924        }
925
926        /* decompose the invec in the tree object and return result in dtresv */
927        if (okay && (!picokdt_dtG2PdecomposeOutClass(sa->dtg2p, &dtresv))) {
928            /* error decomposing */
929            PICODBG_WARN(("problem decomposing"));
930            picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
931                                  NULL, NULL);
932            okay = FALSE;
933        }
934
935        if (okay) {
936            if ((dtresv.nr == 0) || (dtresv.classvec[0] == PICOKDT_EPSILON)) {
937                /* no phones to be added */
938                PICODBG_TRACE(("epsilon, no phone added %c", graph[nCount-1]));
939                ;
940            } else {
941                /* add decomposed output to tmpbuf, reverse order */
942                for (i = dtresv.nr; ((((PICOSA_MAXITEMSIZE - 1) -
943                                       phonesind)<phonesmaxlen) &&
944                                     (i > 0)); ) {
945                    i--;
946                    PICODBG_TRACE(("%c %d",graph[nCount-1],dtresv.classvec[i]));
947                    if (dtresv.classvec[i] > 255) {
948                        PICODBG_WARN(("dt result outside valid range, "
949                                      "skipping phone"));
950                        continue;
951                    }
952                    sa->tmpbuf[phonesind--] = (picoos_uint8)dtresv.classvec[i];
953                    if (!nPrimary) {
954                        if (picoktab_isPrimstress(sa->tabphones,
955                          (picoos_uint8)dtresv.classvec[i])) {
956                            nPrimary = 1;
957            }
958                    }
959                    (*plen)++;
960                }
961                if (i > 0) {
962                    PICODBG_WARN(("phones skipped"));
963                    picoos_emRaiseWarning(this->common->em,
964                                          PICO_WARN_INCOMPLETE, NULL, NULL);
965                }
966            }
967        }
968
969        /*shift tree output history and update*/
970        outNp3Ch = outNp2Ch;
971        outNp2Ch = outNp1Ch;
972        outNp1Ch = nOutVal;
973
974        /* go backward one utf8 char */
975        /* nCount is in +1 domain */
976        if (nCount <= 1) {
977            /* end of str */
978            nCount = 0;
979        } else {
980            utfpos = nCount - 1;
981            if (!picobase_get_prev_utf8charpos(graph, 0, &utfpos)) {
982                /* should not occur */
983                PICODBG_ERROR(("invalid utf8 string, utfpos: %d", utfpos));
984                return FALSE;
985            } else {
986                nCount = utfpos + 1;
987            }
988        }
989    }
990
991    /* a must be: (PICOSA_MAXITEMSIZE-1) - phonesind == *plen */
992    /* now that we have all phone IDs, copy in correct order to phones */
993    /* phonesind point to next free slot in the reverse domainn,
994       ie. inc first */
995    phonesind++;
996    for (i = 0; i < *plen; i++, phonesind++) {
997        phones[i] = sa->tmpbuf[phonesind];
998    }
999    return TRUE;
1000}
1001
1002
1003/* item in headx[ind]/cbuf1, out: modified headx and cbuf2 */
1004
1005static pico_status_t saGraphemeToPhoneme(register picodata_ProcessingUnit this,
1006                                         register sa_subobj_t *sa,
1007                                         picoos_uint16 ind) {
1008    picoos_uint16 plen;
1009
1010    PICODBG_TRACE(("starting g2p"));
1011
1012    if (saDoG2P(this, sa, &(sa->cbuf1[sa->headx[ind].cind]),
1013                sa->headx[ind].head.len, sa->headx[ind].head.info1,
1014                &(sa->cbuf2[sa->cbuf2Len]), (sa->cbuf2BufSize - sa->cbuf2Len),
1015                &plen)) {
1016
1017        /* check of cbuf2Len done in saDoG2P, phones skipped if needed */
1018        if (plen > 255) {
1019            PICODBG_WARN(("maximum number of phones exceeded (%d), skipping",
1020                          plen));
1021            plen = 255;
1022        }
1023
1024        /* set item head, info1, info2 unchanged */
1025        sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON;
1026        sa->headx[ind].head.len = (picoos_uint8)plen;
1027        sa->headx[ind].cind = sa->cbuf2Len;
1028        sa->cbuf2Len += plen;
1029        PICODBG_DEBUG(("%c item, plen: %d",
1030                       PICODATA_ITEM_WORDPHON, plen));
1031    } else {
1032        PICODBG_WARN(("problem doing g2p"));
1033        picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM,
1034                              NULL, NULL);
1035    }
1036    return PICO_OK;
1037}
1038
1039
1040/* ***********************************************************************/
1041/*                          extract phonemes of an item into a phonBuf   */
1042/* ***********************************************************************/
1043
1044static pico_status_t saAddPhoneme(register sa_subobj_t *sa, picoos_uint16 pos, picoos_uint16 sym) {
1045    /* picoos_uint8 plane, unshifted; */
1046
1047    /* just for debuging */
1048    /*
1049    unshifted = picotrns_unplane(sym,&plane);
1050    PICODBG_DEBUG(("adding %i/%i (%c on plane %i) at phonBuf[%i]",pos,sym,unshifted,plane,sa->phonWritePos));
1051    */
1052    if (PICOTRNS_MAX_NUM_POSSYM <= sa->phonWritePos) {
1053        /* not an error! */
1054        PICODBG_DEBUG(("couldn't add because phon buffer full"));
1055        return PICO_EXC_BUF_OVERFLOW;
1056    } else {
1057        sa->phonBuf[sa->phonWritePos].pos = pos;
1058        sa->phonBuf[sa->phonWritePos].sym = sym;
1059        sa->phonWritePos++;
1060        return PICO_OK;
1061    }
1062}
1063
1064/*
1065static pico_status_t saAddStartPhoneme(register sa_subobj_t *sa) {
1066    return saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1067            (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId);
1068}
1069
1070
1071static pico_status_t saAddTermPhoneme(register sa_subobj_t *sa) {
1072    return saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1073            (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId);
1074}
1075
1076*/
1077
1078static pico_status_t saExtractPhonemes(register picodata_ProcessingUnit this,
1079        register sa_subobj_t *sa, picoos_uint16 pos,
1080        picodata_itemhead_t* head, const picoos_uint8* content)
1081{
1082    pico_status_t rv= PICO_OK;
1083    picoos_uint8 i;
1084    picoos_int16 fstSymbol;
1085#if defined(PICO_DEBUG)
1086    picoos_char msgstr[SA_MSGSTR_SIZE];
1087#endif
1088
1089    PICODBG_TRACE(("doing item %s",
1090                    picodata_head_to_string(head,msgstr,SA_MSGSTR_SIZE)));
1091    /*
1092     Items  considered in a transduction are WORDPHON item. its starting offset within the inBuf is given as
1093     'pos'.
1094     Elements that go into the transduction receive "their" position in the buffer.
1095     */
1096    sa->phonWritePos = 0;
1097    /* WORDPHON(POS,WACC)phon */
1098    rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1099                (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId);
1100    for (i = 0; i < head->len; i++) {
1101        fstSymbol = /* (PICOKFST_PLANE_PHONEMES << 8) + */content[i];
1102        /*  */
1103        PICODBG_TRACE(("adding phoneme %c",fstSymbol));
1104        rv = saAddPhoneme(sa, pos+PICODATA_ITEM_HEADSIZE+i, fstSymbol);
1105    }
1106    rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1107                (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId);
1108    sa->nextReadPos = pos + PICODATA_ITEM_HEADSIZE +  head->len;
1109    return rv;
1110}
1111
1112
1113#define SA_POSSYM_OK           0
1114#define SA_POSSYM_OUT_OF_RANGE 1
1115#define SA_POSSYM_END          2
1116#define SA_POSSYM_INVALID     -3
1117/* *readPos is the next position in phonBuf to be read, and *writePos is the first position not to be read (may be outside
1118 * buf).
1119 * 'rangeEnd' is the first possym position outside the desired range.
1120 * Possible return values:
1121 * SA_POSSYM_OK            : 'pos' and 'sym' are set to the read possym, *readPos is advanced
1122 * SA_POSSYM_OUT_OF_RANGE  : pos is out of range. 'pos' is set to that of the read possym, 'sym' is undefined
1123 * SA_POSSYM_UNDERFLOW     : no more data in buf. 'pos' is set to PICOTRNS_POS_INVALID,    'sym' is undefined
1124 * SA_POSSYM_INVALID       : "strange" pos.       'pos' is set to PICOTRNS_POS_INVALID,    'sym' is undefined
1125 */
1126static pico_status_t getNextPosSym(sa_subobj_t * sa, picoos_int16 * pos, picoos_int16 * sym,
1127        picoos_int16 rangeEnd) {
1128    /* skip POS_IGNORE */
1129    while ((sa->phonReadPos < sa->phonWritePos) && (PICOTRNS_POS_IGNORE == sa->phonBuf[sa->phonReadPos].pos))  {
1130        PICODBG_DEBUG(("ignoring phone at sa->phonBuf[%i] because it has pos==IGNORE",sa->phonReadPos));
1131        sa->phonReadPos++;
1132    }
1133    if ((sa->phonReadPos < sa->phonWritePos)) {
1134        *pos = sa->phonBuf[sa->phonReadPos].pos;
1135        if ((PICOTRNS_POS_INSERT == *pos) || ((0 <= *pos) && (*pos < rangeEnd))) {
1136            *sym = sa->phonBuf[sa->phonReadPos++].sym;
1137            return SA_POSSYM_OK;
1138        } else if (*pos < 0){ /* *pos is "strange" (e.g. POS_INVALID) */
1139            return SA_POSSYM_INVALID;
1140        } else {
1141            return SA_POSSYM_OUT_OF_RANGE;
1142        }
1143    } else {
1144        /* no more possyms to read */
1145        *pos = PICOTRNS_POS_INVALID;
1146        return SA_POSSYM_END;
1147    }
1148}
1149
1150
1151
1152
1153/* ***********************************************************************/
1154/*                          saStep function                              */
1155/* ***********************************************************************/
1156
1157/*
1158complete phrase processed in one step, if not fast enough -> rework
1159
1160init, collect into internal buffer, process, and then feed to
1161output buffer
1162
1163init state: INIT ext           ext
1164state trans:     in hc1  hc2   out
1165
1166INIT | putItem   =  0    0    +1      | BUSY  -> COLL (put B-SBEG item,
1167                                                   set do-init to false)
1168
1169                                    inspace-ok-hc1
1170                                  needs-more-items-(phrase-or-flush)
1171COLL1 |getItems -n +n             0 1 | ATOMIC -> PPOSD     (got items,
1172                                                      if flush set do-init)
1173COLL2 |getItems -n +n             1 0 | ATOMIC -> PPOSD (got items, forced)
1174COLL3 |getItems -n +n             1 1 | IDLE          (got items, need more)
1175COLL4 |getItems  =  =             1 1 | IDLE             (got no items)
1176
1177PPOSD | posd     = ~n~n               | BUSY     -> PWP     (posd done)
1178PWP   | lex/g2p  = ~n-n  0+n          | BUSY     -> PPHR    (lex/g2p done)
1179PPHR  | phr      = -n 0 +m=n          | BUSY     -> PACC    (phr done, m>=n)
1180PACC  | acc      =  0 0 ~m=n          | BUSY     -> FEED    (acc done)
1181
1182                                  doinit-flag
1183FEED | putItems  0  0 0 -m-n  +m  0   | BUSY -> COLL    (put items)
1184FEED | putItems  0  0 0 -m-n  +m  1   | BUSY -> INIT    (put items)
1185FEED | putItems  0  0 0 -d-d  +d      | OUT_FULL        (put some items)
1186*/
1187
1188static picodata_step_result_t saStep(register picodata_ProcessingUnit this,
1189                                     picoos_int16 mode,
1190                                     picoos_uint16 *numBytesOutput) {
1191    register sa_subobj_t *sa;
1192    pico_status_t rv = PICO_OK;
1193    pico_status_t rvP = PICO_OK;
1194    picoos_uint16 blen = 0;
1195    picoos_uint16 clen = 0;
1196    picoos_uint16 i;
1197    picoklex_Lex lex;
1198
1199
1200    if (NULL == this || NULL == this->subObj) {
1201        return PICODATA_PU_ERROR;
1202    }
1203    sa = (sa_subobj_t *) this->subObj;
1204    mode = mode;        /* avoid warning "var not used in this function"*/
1205    *numBytesOutput = 0;
1206    while (1) { /* exit via return */
1207        PICODBG_DEBUG(("doing state %i, hLen|c1Len|c2Len: %d|%d|%d",
1208                       sa->procState, sa->headxLen, sa->cbuf1Len,
1209                       sa->cbuf2Len));
1210
1211        switch (sa->procState) {
1212
1213            /* *********************************************************/
1214            /* collect state: get item(s) from charBuf and store in
1215             * internal buffers, need a complete punctuation-phrase
1216             */
1217            case SA_STEPSTATE_COLLECT:
1218
1219                while (sa->inspaceok && sa->needsmoreitems
1220                       && (PICO_OK ==
1221                           (rv = picodata_cbGetItem(this->cbIn, sa->tmpbuf,
1222                                            PICOSA_MAXITEMSIZE, &blen)))) {
1223                    rvP = picodata_get_itemparts(sa->tmpbuf,
1224                                            PICOSA_MAXITEMSIZE,
1225                                            &(sa->headx[sa->headxLen].head),
1226                                            &(sa->cbuf1[sa->cbuf1Len]),
1227                                            sa->cbuf1BufSize-sa->cbuf1Len,
1228                                            &clen);
1229                    if (rvP != PICO_OK) {
1230                        PICODBG_ERROR(("problem getting item parts"));
1231                        picoos_emRaiseException(this->common->em, rvP,
1232                                                NULL, NULL);
1233                        return PICODATA_PU_ERROR;
1234                    }
1235
1236                    /* if CMD(...FLUSH...) -> PUNC(...FLUSH...),
1237                       construct PUNC-FLUSH item in headx */
1238                    if ((sa->headx[sa->headxLen].head.type ==
1239                         PICODATA_ITEM_CMD) &&
1240                        (sa->headx[sa->headxLen].head.info1 ==
1241                         PICODATA_ITEMINFO1_CMD_FLUSH)) {
1242                        sa->headx[sa->headxLen].head.type =
1243                            PICODATA_ITEM_PUNC;
1244                        sa->headx[sa->headxLen].head.info1 =
1245                            PICODATA_ITEMINFO1_PUNC_FLUSH;
1246                        sa->headx[sa->headxLen].head.info2 =
1247                            PICODATA_ITEMINFO2_PUNC_SENT_T;
1248                        sa->headx[sa->headxLen].head.len = 0;
1249                    }
1250
1251                    /* convert opening phoneme command to WORDPHON
1252                     * and assign user-POS XX to it (Bug 432) */
1253                    sa->headx[sa->headxLen].cind = sa->cbuf1Len;
1254                    /* maybe overwritten later */
1255                    if ((sa->headx[sa->headxLen].head.type ==
1256                        PICODATA_ITEM_CMD) &&
1257                       (sa->headx[sa->headxLen].head.info1 ==
1258                        PICODATA_ITEMINFO1_CMD_PHONEME)&&
1259                        (sa->headx[sa->headxLen].head.info2 ==
1260                         PICODATA_ITEMINFO2_CMD_START)) {
1261                        picoos_uint8 i;
1262                        picoos_uint8 wordsep = picoktab_getWordboundID(sa->tabphones);
1263                        PICODBG_INFO(("wordsep id is %i",wordsep));
1264                        sa->headx[sa->headxLen].head.type = PICODATA_ITEM_WORDPHON;
1265                        sa->headx[sa->headxLen].head.info1 = PICODATA_POS_XX;
1266                        sa->headx[sa->headxLen].head.info2 = PICODATA_ITEMINFO2_NA;
1267                        /* cut off additional words */
1268                        i = 0;
1269                        while ((i < sa->headx[sa->headxLen].head.len) && (wordsep != sa->cbuf1[sa->headx[sa->headxLen].cind+i])) {
1270                            PICODBG_INFO(("accepting phoneme %i",sa->cbuf1[sa->headx[sa->headxLen].cind+i]));
1271
1272                            i++;
1273                        }
1274                        if (i < sa->headx[sa->headxLen].head.len) {
1275                            PICODBG_INFO(("cutting off superfluous phonetic words at %i",i));
1276                            sa->headx[sa->headxLen].head.len = i;
1277                        }
1278                    }
1279
1280                    /* check/set needsmoreitems */
1281                    if (sa->headx[sa->headxLen].head.type ==
1282                        PICODATA_ITEM_PUNC) {
1283                        sa->needsmoreitems = FALSE;
1284                    }
1285
1286                    /* check/set inspaceok, keep spare slot for forcing */
1287                    if ((sa->headxLen >= (PICOSA_MAXNR_HEADX - 2)) ||
1288                        ((sa->cbuf1BufSize - sa->cbuf1Len) <
1289                         PICOSA_MAXITEMSIZE)) {
1290                        sa->inspaceok = FALSE;
1291                    }
1292
1293                    if (clen > 0) {
1294                        sa->headx[sa->headxLen].cind = sa->cbuf1Len;
1295                        sa->cbuf1Len += clen;
1296                    } else {
1297                        sa->headx[sa->headxLen].cind = 0;
1298                    }
1299                    sa->headxLen++;
1300                }
1301
1302                if (!sa->needsmoreitems) {
1303                    /* 1, phrase buffered */
1304                    sa->procState = SA_STEPSTATE_PROCESS_POSD;
1305                    return PICODATA_PU_ATOMIC;
1306                } else if (!sa->inspaceok) {
1307                    /* 2, forced phrase end */
1308                    /* at least one slot is still free, use it to
1309                       force a trailing PUNC item */
1310                    sa->headx[sa->headxLen].head.type = PICODATA_ITEM_PUNC;
1311                    sa->headx[sa->headxLen].head.info1 =
1312                        PICODATA_ITEMINFO1_PUNC_PHRASEEND;
1313                    sa->headx[sa->headxLen].head.info2 =
1314                        PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED;
1315                    sa->headx[sa->headxLen].head.len = 0;
1316                    sa->needsmoreitems = FALSE; /* not really needed for now */
1317                    sa->headxLen++;
1318                    PICODBG_WARN(("forcing phrase end, added PUNC_PHRASEEND"));
1319                    picoos_emRaiseWarning(this->common->em,
1320                                          PICO_WARN_FALLBACK, NULL,
1321                                          (picoos_char *)"forced phrase end");
1322                    sa->procState = SA_STEPSTATE_PROCESS_POSD;
1323                    return PICODATA_PU_ATOMIC;
1324                } else if (rv == PICO_EOF) {
1325                    /* 3, 4 */
1326                    return PICODATA_PU_IDLE;
1327                } else if ((rv == PICO_EXC_BUF_UNDERFLOW) ||
1328                           (rv == PICO_EXC_BUF_OVERFLOW)) {
1329                    /* error, no valid item in cb (UNDER) */
1330                    /*        or tmpbuf not large enough, not possible (OVER) */
1331                    /* no exception raised, left for ctrl to handle */
1332                    PICODBG_ERROR(("buffer under/overflow, rv: %d", rv));
1333                    return PICODATA_PU_ERROR;
1334                } else {
1335                    /* error, only possible if cbGetItem implementation
1336                       changes without this function being adapted*/
1337                    PICODBG_ERROR(("untreated return value, rv: %d", rv));
1338                    return PICODATA_PU_ERROR;
1339                }
1340                break;
1341
1342
1343            /* *********************************************************/
1344            /* process posd state: process items in headx/cbuf1
1345             * and change in place
1346             */
1347            case SA_STEPSTATE_PROCESS_POSD:
1348                /* ensure there is an item in inBuf */
1349                if (sa->headxLen > 0) {
1350                    /* we have a phrase in headx, cbuf1 (can be
1351                       single PUNC item without POS), do pos disamb */
1352                    if (PICO_OK != saDisambPos(this, sa)) {
1353                        picoos_emRaiseException(this->common->em,
1354                                                PICO_ERR_OTHER, NULL, NULL);
1355                        return PICODATA_PU_ERROR;
1356                    }
1357                    sa->procState = SA_STEPSTATE_PROCESS_WPHO;
1358
1359                } else if (sa->headxLen == 0) {    /* no items in inBuf */
1360                    PICODBG_WARN(("no items in inBuf"));
1361                    sa->procState = SA_STEPSTATE_COLLECT;
1362                    return PICODATA_PU_BUSY;
1363                }
1364
1365#if defined (PICO_DEBUG)
1366                if (1) {
1367                    picoos_uint8 i, j, ittype;
1368                    for (i = 0; i < sa->headxLen; i++) {
1369                        ittype = sa->headx[i].head.type;
1370                        PICODBG_INFO_CTX();
1371                        PICODBG_INFO_MSG(("sa-d: ("));
1372                        PICODBG_INFO_MSG(("'%c',", ittype));
1373                        if ((32 <= sa->headx[i].head.info1) &&
1374                            (sa->headx[i].head.info1 < 127) &&
1375                            (ittype != PICODATA_ITEM_WORDGRAPH) &&
1376                            (ittype != PICODATA_ITEM_WORDINDEX)) {
1377                            PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1));
1378                        } else {
1379                            PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1));
1380                        }
1381                        if ((32 <= sa->headx[i].head.info2) &&
1382                            (sa->headx[i].head.info2 < 127)) {
1383                            PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2));
1384                        } else {
1385                            PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2));
1386                        }
1387                        PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len));
1388
1389                        for (j = 0; j < sa->headx[i].head.len; j++) {
1390                            if ((ittype == PICODATA_ITEM_WORDGRAPH) ||
1391                                (ittype == PICODATA_ITEM_CMD)) {
1392                                PICODBG_INFO_MSG(("%c",
1393                                        sa->cbuf1[sa->headx[i].cind+j]));
1394                            } else {
1395                                PICODBG_INFO_MSG(("%4d",
1396                                        sa->cbuf1[sa->headx[i].cind+j]));
1397                            }
1398                        }
1399                        PICODBG_INFO_MSG(("\n"));
1400                    }
1401                }
1402#endif
1403
1404                break;
1405
1406
1407            /* *********************************************************/
1408            /* process wpho state: process items in headx/cbuf1 and modify
1409             * headx in place and fill cbuf2
1410             */
1411            case SA_STEPSTATE_PROCESS_WPHO:
1412                /* ensure there is an item in inBuf */
1413                if (sa->headxLen > 0) {
1414                    /* we have a phrase in headx, cbuf1 (can be single
1415                       PUNC item), do lex lookup, g2p, or copy */
1416
1417                    /* check if cbuf2 is empty as it should be */
1418                    if (sa->cbuf2Len > 0) {
1419                        /* enforce emptyness */
1420                        PICODBG_WARN(("forcing empty cbuf2, discarding buf"));
1421                        picoos_emRaiseWarning(this->common->em,
1422                                              PICO_WARN_PU_DISCARD_BUF,
1423                                              NULL, NULL);
1424                    }
1425
1426                    /* cbuf2 overflow avoided in saGrapheme*, saLexInd*,
1427                       saCopyItem*, phones skipped if needed */
1428                    for (i = 0; i < sa->headxLen; i++) {
1429                        switch (sa->headx[i].head.type) {
1430                            case PICODATA_ITEM_WORDGRAPH:
1431                                if (PICO_OK != saGraphemeToPhoneme(this, sa,
1432                                                                   i)) {
1433                                    /* not possible, phones skipped if needed */
1434                                    picoos_emRaiseException(this->common->em,
1435                                                            PICO_ERR_OTHER,
1436                                                            NULL, NULL);
1437                                    return PICODATA_PU_ERROR;
1438                                }
1439                                break;
1440                            case PICODATA_ITEM_WORDINDEX:
1441                                if (0 == sa->headx[i].head.info2) {
1442                                  lex = sa->lex;
1443                                } else {
1444                                    lex = sa->ulex[sa->headx[i].head.info2-1];
1445                                }
1446                                if (PICO_OK != saLexIndLookup(this, sa, lex, i)) {
1447                                    /* not possible, phones skipped if needed */
1448                                    picoos_emRaiseException(this->common->em,
1449                                                            PICO_ERR_OTHER,
1450                                                            NULL, NULL);
1451                                    return PICODATA_PU_ERROR;
1452                                }
1453                                break;
1454                            default:
1455                                /* copy item unmodified, ie. headx untouched,
1456                                   content from cbuf1 to cbuf2 */
1457                                if (PICO_OK != saCopyItemContent1to2(this, sa,
1458                                                                     i)) {
1459                                    /* not possible, phones skipped if needed */
1460                                    picoos_emRaiseException(this->common->em,
1461                                                            PICO_ERR_OTHER,
1462                                                            NULL, NULL);
1463                                    return PICODATA_PU_ERROR;
1464                                }
1465                                break;
1466                        }
1467                    }
1468                    /* set cbuf1 to empty */
1469                    sa->cbuf1Len = 0;
1470                    sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE;
1471
1472                } else if (sa->headxLen == 0) {    /* no items in inBuf */
1473                    PICODBG_WARN(("no items in inBuf"));
1474                    sa->procState = SA_STEPSTATE_COLLECT;
1475                    return PICODATA_PU_BUSY;
1476                }
1477
1478#if defined (PICO_DEBUG)
1479                if (1) {
1480                    picoos_uint8 i, j, ittype;
1481                    for (i = 0; i < sa->headxLen; i++) {
1482                        ittype = sa->headx[i].head.type;
1483                        PICODBG_INFO_CTX();
1484                        PICODBG_INFO_MSG(("sa-g: ("));
1485                        PICODBG_INFO_MSG(("'%c',", ittype));
1486                        if ((32 <= sa->headx[i].head.info1) &&
1487                            (sa->headx[i].head.info1 < 127) &&
1488                            (ittype != PICODATA_ITEM_WORDPHON)) {
1489                            PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1));
1490                        } else {
1491                            PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1));
1492                        }
1493                        if ((32 <= sa->headx[i].head.info2) &&
1494                            (sa->headx[i].head.info2 < 127)) {
1495                            PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2));
1496                        } else {
1497                            PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2));
1498                        }
1499                        PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len));
1500
1501                        for (j = 0; j < sa->headx[i].head.len; j++) {
1502                            if ((ittype == PICODATA_ITEM_CMD)) {
1503                                PICODBG_INFO_MSG(("%c",
1504                                        sa->cbuf2[sa->headx[i].cind+j]));
1505                            } else {
1506                                PICODBG_INFO_MSG(("%4d",
1507                                        sa->cbuf2[sa->headx[i].cind+j]));
1508                            }
1509                        }
1510                        PICODBG_INFO_MSG(("\n"));
1511                    }
1512                }
1513#endif
1514
1515                break;
1516
1517
1518                /* *********************************************************/
1519                /* transduction parse state: extract phonemes of item in internal outBuf */
1520           case SA_STEPSTATE_PROCESS_TRNS_PARSE:
1521
1522                PICODBG_DEBUG(("transduce item (bot, remain): (%d, %d)",
1523                                sa->headxBottom, sa->headxLen));
1524
1525                /* check for termination condition first */
1526                if (0 == sa->headxLen) {
1527                    /* reset headx, cbuf2 */
1528                    sa->headxBottom = 0;
1529                    sa->cbuf2Len = 0;
1530                    /* reset collect state support variables */
1531                    sa->inspaceok = TRUE;
1532                    sa->needsmoreitems = TRUE;
1533
1534                    sa->procState = SA_STEPSTATE_COLLECT;
1535                    return PICODATA_PU_BUSY;
1536                }
1537
1538                sa->procState = SA_STEPSTATE_FEED;
1539                /* copy item unmodified */
1540                rv = picodata_put_itemparts(
1541                        &(sa->headx[sa->headxBottom].head),
1542                        &(sa->cbuf2[sa->headx[sa->headxBottom].cind]),
1543                        sa->headx[sa->headxBottom].head.len, sa->tmpbuf,
1544                        PICOSA_MAXITEMSIZE, &blen);
1545
1546                if (PICODATA_ITEM_WORDPHON == sa->headx[sa->headxBottom].head.type) {
1547                   PICODBG_DEBUG(("PARSE found WORDPHON"));
1548                   rv = saExtractPhonemes(this, sa, 0, &(sa->headx[sa->headxBottom].head),
1549                           &(sa->cbuf2[sa->headx[sa->headxBottom].cind]));
1550                   if (PICO_OK == rv) {
1551                       PICODBG_DEBUG(("PARSE successfully returned from phoneme extraction"));
1552                       sa->procState = SA_STEPSTATE_PROCESS_TRNS_FST;
1553                   } else {
1554                       PICODBG_WARN(("PARSE phone extraction returned exception %i, output WORDPHON untransduced",rv));
1555                   }
1556               } else {
1557                   PICODBG_DEBUG(("PARSE found other item, just copying"));
1558               }
1559               if (SA_STEPSTATE_FEED == sa->procState) {
1560                    PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1561                            (picoos_uint8 *)"sa-p: ",
1562                            sa->tmpbuf, PICOSA_MAXITEMSIZE);
1563
1564                }
1565
1566                /* consume item */
1567                sa->headxBottom++;
1568                sa->headxLen--;
1569
1570                break;
1571
1572                /* *********************************************************/
1573                /* transduce state: copy item in internal outBuf to tmpBuf and transduce */
1574           case SA_STEPSTATE_PROCESS_TRNS_FST:
1575
1576
1577
1578
1579
1580               /* if no word-level FSTs: doing trivial syllabification instead */
1581               if (0 == sa->numFsts) {
1582                   PICODBG_DEBUG(("doing trivial sylabification with %i phones", sa->phonWritePos));
1583#if defined(PICO_DEBUG)
1584                   {
1585                       PICODBG_INFO_CTX();
1586                       PICODBG_INFO_MSG(("sa trying to trivially syllabify: "));
1587                       PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos);
1588                       PICODBG_INFO_MSG(("\n"));
1589                   }
1590#endif
1591
1592                   picotrns_trivial_syllabify(sa->tabphones, sa->phonBuf,
1593                           sa->phonWritePos, sa->phonBufOut,
1594                           &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
1595                   PICODBG_DEBUG(("returned from trivial sylabification with %i phones", sa->phonWritePos));
1596#if defined(PICO_DEBUG)
1597                   {
1598                       PICODBG_INFO_CTX();
1599                       PICODBG_INFO_MSG(("sa returned from syllabification: "));
1600                       PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos);
1601                       PICODBG_INFO_MSG(("\n"));
1602                   }
1603#endif
1604
1605                   /* eliminate deep epsilons */
1606                   PICODBG_DEBUG(("doing epsilon elimination with %i phones", sa->phonWritePos));
1607                   picotrns_eliminate_epsilons(sa->phonBufOut,
1608                           sa->phonWritePos, sa->phonBuf,
1609                           &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
1610                   PICODBG_DEBUG(("returning from epsilon elimination with %i phones", sa->phonWritePos));
1611                   sa->phonReadPos = 0;
1612                   sa->phonesTransduced = 1;
1613                   sa->procState = SA_STEPSTATE_FEED;
1614                   break;
1615               }
1616
1617               /* there are word-level FSTs */
1618               /* termination condition first */
1619               if (sa->curFst >= sa->numFsts) {
1620                   /* reset for next transduction */
1621                   sa->curFst = 0;
1622                   sa->phonReadPos = 0;
1623                   sa->phonesTransduced = 1;
1624                   sa->procState = SA_STEPSTATE_FEED;
1625                   break;
1626               }
1627
1628               /* transduce from phonBufIn to PhonBufOut */
1629               {
1630
1631                   picoos_uint32 nrSteps;
1632#if defined(PICO_DEBUG)
1633                   {
1634                       PICODBG_INFO_CTX();
1635                       PICODBG_INFO_MSG(("sa trying to transduce: "));
1636                       PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos);
1637                       PICODBG_INFO_MSG(("\n"));
1638                   }
1639#endif
1640                   picotrns_transduce(sa->fst[sa->curFst], FALSE,
1641                           picotrns_printSolution, sa->phonBuf, sa->phonWritePos, sa->phonBufOut,
1642                           &sa->phonWritePos,
1643                           PICOTRNS_MAX_NUM_POSSYM, sa->altDescBuf,
1644                           sa->maxAltDescLen, &nrSteps);
1645#if defined(PICO_DEBUG)
1646                   {
1647                       PICODBG_INFO_CTX();
1648                       PICODBG_INFO_MSG(("sa returned from transduction: "));
1649                       PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos);
1650                       PICODBG_INFO_MSG(("\n"));
1651                   }
1652#endif
1653               }
1654
1655
1656
1657               /*
1658                The trasduction output will contain equivalent items i.e. (x,y')  for each (x,y) plus inserted deep symbols (-1,d).
1659                In case of deletions, (x,0) might also be omitted...
1660                */
1661               /* eliminate deep epsilons */
1662               picotrns_eliminate_epsilons(sa->phonBufOut,
1663                       sa->phonWritePos, sa->phonBuf, &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
1664               sa->phonesTransduced = 1;
1665
1666               sa->curFst++;
1667
1668               return PICODATA_PU_ATOMIC;
1669               /* break; */
1670
1671                /* *********************************************************/
1672                /* feed state: copy item in internal outBuf to output charBuf */
1673
1674           case SA_STEPSTATE_FEED:
1675
1676               PICODBG_DEBUG(("FEED"));
1677
1678               if (sa->phonesTransduced) {
1679                   /* replace original phones by transduced */
1680                   picoos_uint16 phonWritePos = PICODATA_ITEM_HEADSIZE;
1681                   picoos_uint8 plane;
1682                   picoos_int16 sym, pos;
1683                   while (SA_POSSYM_OK == (rv = getNextPosSym(sa,&pos,&sym,sa->nextReadPos))) {
1684                       PICODBG_TRACE(("FEED inserting phoneme %c into inBuf[%i]",sym,phonWritePos));
1685                       sym = picotrns_unplane(sym, &plane);
1686                       PICODBG_ASSERT((PICOKFST_PLANE_PHONEMES == plane));
1687                       sa->tmpbuf[phonWritePos++] = (picoos_uint8) sym;
1688                   }
1689                   PICODBG_DEBUG(("FEED setting item length to %i",phonWritePos - PICODATA_ITEM_HEADSIZE));
1690                   picodata_set_itemlen(sa->tmpbuf,PICODATA_ITEM_HEADSIZE,phonWritePos - PICODATA_ITEM_HEADSIZE);
1691                   if (SA_POSSYM_INVALID == rv) {
1692                       PICODBG_ERROR(("FEED unexpected symbol or unexpected end of phoneme list"));
1693                       return (picodata_step_result_t)picoos_emRaiseException(this->common->em, PICO_WARN_INCOMPLETE, NULL, NULL);
1694                   }
1695                   sa->phonesTransduced = 0;
1696
1697               } /* if (sa->phonesTransduced) */
1698
1699
1700                rvP = picodata_cbPutItem(this->cbOut, sa->tmpbuf,
1701                PICOSA_MAXITEMSIZE, &clen);
1702
1703                *numBytesOutput += clen;
1704
1705                PICODBG_DEBUG(("put item, status: %d", rvP));
1706
1707                if (rvP == PICO_OK) {
1708                } else if (rvP == PICO_EXC_BUF_OVERFLOW) {
1709                    /* try again next time */
1710                    PICODBG_DEBUG(("feeding overflow"));
1711                    return PICODATA_PU_OUT_FULL;
1712                } else {
1713                    /* error, should never happen */
1714                    PICODBG_ERROR(("untreated return value, rvP: %d", rvP));
1715                    return PICODATA_PU_ERROR;
1716                }
1717
1718                PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1719                        (picoos_uint8 *)"sana: ",
1720                        sa->tmpbuf, PICOSA_MAXITEMSIZE);
1721
1722                sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE;
1723                /* return PICODATA_PU_BUSY; */
1724                break;
1725
1726            default:
1727                break;
1728        } /* switch */
1729
1730    } /* while */
1731
1732    /* should be never reached */
1733    PICODBG_ERROR(("reached end of function"));
1734    picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL);
1735    return PICODATA_PU_ERROR;
1736}
1737
1738#ifdef __cplusplus
1739}
1740#endif
1741
1742
1743/* end */
1744