1/*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/**
17 * @file picoacph.c
18 *
19 * accentuation and phrasing
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28
29#include "picoos.h"
30#include "picodbg.h"
31#include "picobase.h"
32#include "picodata.h"
33#include "picoacph.h"
34#include "picokdt.h"
35#include "picoklex.h"
36#include "picoktab.h"
37
38#ifdef __cplusplus
39extern "C" {
40#endif
41#if 0
42}
43#endif
44
45/* PU acphStep states */
46#define SA_STEPSTATE_COLLECT       0
47#define SA_STEPSTATE_PROCESS_PHR  12
48#define SA_STEPSTATE_PROCESS_ACC  13
49#define SA_STEPSTATE_FEED          2
50
51
52/* boundary strength state */
53#define SA_BOUNDSTRENGTH_SSEP      0 /* sentence separator */
54#define SA_BOUNDSTRENGTH_PPHR      1 /* primary phrase separator */
55
56
57/*  subobject    : AccPhrUnit
58 *  shortcut     : acph
59 *  context size : one phrase, max. 30 non-PUNC items, for non-processed items
60 *                 one item if internal input empty
61 */
62
63/**
64 * @addtogroup picoacph
65 *
66 * <b> Pico Accentuation and Phrasing </b>\n
67 *
68  internal buffers:
69
70  - headx : array for extended item heads of fixed size (head plus
71    index for content, plus two fields for boundary strength/type)
72  - cbuf : buffer for item contents (referenced by index in
73    headx).
74
75  0. bottom up filling of items in headx and cbuf
76
77  1. phrasing (right-to-left):
78
79     e.g. from      WP WP WP       WP WP PUNC  WP WP PUNC        WP WP WP PUNC  FLUSH    \n
80     e.g. to  BSBEG WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND BSBEG WP WP WP BSEND BTERM    \n
81              |1                         |2                |3                   |4        \n
82
83     2-level bound state: The internal buffer contains one primary phrase (sometimes forced, if buffer
84     allmost full), with the trailing PUNCT item included (last item).\n
85     If the trailing PUNC is a a primary phrase separator, the
86       item is not output, but instead, the bound state is set to PPHR, so that the correct BOUND can
87       be output at the start of the next primary phrase.\n
88     Otherwise,
89       the item is converted to the corresponding BOUND and output. the bound state is set to SSEP,
90       so that a BOUND of type SBEG is output at the start of the next primary phrase.
91
92     trailing PUNC item       bound states                                    \n
93                              SSEP           PPHR                            \n
94       PUNC(SENTEND, X)       B(B,X)>SSEP    B(P1,X)>SSEP  (X = T | Q | E)    \n
95       PUNC(FLUSH, T)         B(B,T)>SSEP*    B(P1,T)>SSEP                    \n
96       PUNC(PHRASEEND, P)     B(B,P)>PPHR    B(P1,P)>PPHR                    \n
97       PUNC(PHRASEEND, FORC)  B(B,P)>PPHR    B(P1,P)>PPHR                    \n
98
99    If more than one sentence separators follow each other (e.g. SEND-FLUSH, SEND-SEND) then
100     all but the first will be treated as an (empty) phrase containing just this item.
101     If this (single) item is a flush, creation of SBEG is suppressed.
102
103
104  - dtphr phrasing tree ("subphrasing")
105    determines
106      - BOUND_PHR2
107      - BOUND_PHR3
108  - boundary strenghts are determined for every word (except the
109    first one) from right-to-left. The boundary types mark the phrase
110    type of the phrase following the boundary.
111  - number of items actually changed (new BOUND items added): because
112    of fixed size without content, two fields are contained in headx
113    to indicate if a BOUND needs to be added to the LEFT of the item.
114    -> headx further extended with boundary strength and type info to
115    indicate that to the left of the headx ele a BOUND needs to be
116    inserted when outputting.
117
118  2. accentuation:
119  - number of items unchanged, content unchanged, only head info changes
120  -> changed in place in headx
121*/
122
123
124typedef struct {
125    picodata_itemhead_t head;
126    picoos_uint16 cind;
127    picoos_uint8 boundstrength;  /* bstrength to the left, 0 if not set */
128    picoos_uint8 boundtype;      /* btype for following phrase, 0 if not set */
129} picoacph_headx_t;
130
131
132typedef struct acph_subobj {
133    picoos_uint8 procState; /* for next processing step decision */
134    picoos_uint8 boundStrengthState;    /* boundary strength state */
135
136    picoos_uint8 inspaceok;      /* flag: headx/cbuf has space for an item */
137    picoos_uint8 needsmoreitems; /* flag: need more items */
138
139    picoos_uint8 tmpbuf[PICODATA_MAX_ITEMSIZE];  /* tmp. location for an item */
140
141    picoacph_headx_t headx[PICOACPH_MAXNR_HEADX];
142    picoos_uint16 headxBottom; /* bottom */
143    picoos_uint16 headxLen;    /* length, 0 if empty */
144
145    picoos_uint8 cbuf[PICOACPH_MAXSIZE_CBUF];
146    picoos_uint16 cbufBufSize; /* actually allocated size */
147    picoos_uint16 cbufLen;     /* length, 0 if empty */
148
149    /* tab knowledge base */
150    picoktab_Phones tabphones;
151
152    /* dtphr knowledge base */
153    picokdt_DtPHR dtphr;
154
155    /* dtacc knowledge base */
156    picokdt_DtACC dtacc;
157} acph_subobj_t;
158
159
160static pico_status_t acphInitialize(register picodata_ProcessingUnit this, picoos_int32 resetMode) {
161    acph_subobj_t * acph;
162    picoos_uint16 i;
163
164    PICODBG_DEBUG(("calling"));
165
166    if (NULL == this || NULL == this->subObj) {
167        return picoos_emRaiseException(this->common->em,
168                                       PICO_ERR_NULLPTR_ACCESS, NULL, NULL);
169    }
170    acph = (acph_subobj_t *) this->subObj;
171    acph->procState = SA_STEPSTATE_COLLECT;
172    acph->boundStrengthState = SA_BOUNDSTRENGTH_SSEP;
173
174    acph->inspaceok = TRUE;
175    acph->needsmoreitems = TRUE;
176
177    acph->headxBottom = 0;
178    acph->headxLen = 0;
179    acph->cbufBufSize = PICOACPH_MAXSIZE_CBUF;
180    acph->cbufLen = 0;
181
182    /* init headx, cbuf */
183    for (i = 0; i < PICOACPH_MAXNR_HEADX; i++){
184        acph->headx[i].head.type = 0;
185        acph->headx[i].head.info1 = 0;
186        acph->headx[i].head.info2 = 0;
187        acph->headx[i].head.len = 0;
188        acph->headx[i].cind = 0;
189        acph->headx[i].boundstrength = 0;
190        acph->headx[i].boundtype = 0;
191    }
192    for (i = 0; i < PICOACPH_MAXSIZE_CBUF; i++) {
193        acph->cbuf[i] = 0;
194    }
195
196    if (resetMode == PICO_RESET_SOFT) {
197        /*following initializations needed only at startup or after a full reset*/
198        return PICO_OK;
199    }
200
201    /* kb tabphones */
202    acph->tabphones =
203        picoktab_getPhones(this->voice->kbArray[PICOKNOW_KBID_TAB_PHONES]);
204    if (acph->tabphones == NULL) {
205        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
206                                       NULL, NULL);
207    }
208    PICODBG_DEBUG(("got tabphones"));
209
210#ifdef PICO_DEBUG_1
211    {
212        picoos_uint16 itmp;
213        for (itmp = 0; itmp < 256; itmp++) {
214            if (picoktab_hasVowelProp(acph->tabphones, itmp)) {
215                PICODBG_DEBUG(("tabphones hasVowel: %d", itmp));
216            }
217            if (picoktab_hasDiphthProp(acph->tabphones, itmp)) {
218                PICODBG_DEBUG(("tabphones hasDiphth: %d", itmp));
219            }
220            if (picoktab_hasGlottProp(acph->tabphones, itmp)) {
221                PICODBG_DEBUG(("tabphones hasGlott: %d", itmp));
222            }
223            if (picoktab_hasNonsyllvowelProp(acph->tabphones, itmp)) {
224                PICODBG_DEBUG(("tabphones hasNonsyllvowel: %d", itmp));
225            }
226            if (picoktab_hasSyllconsProp(acph->tabphones, itmp)) {
227                PICODBG_DEBUG(("tabphones hasSyllcons: %d", itmp));
228            }
229
230            if (picoktab_isPrimstress(acph->tabphones, itmp)) {
231                PICODBG_DEBUG(("tabphones isPrimstress: %d", itmp));
232            }
233            if (picoktab_isSecstress(acph->tabphones, itmp)) {
234                PICODBG_DEBUG(("tabphones isSecstress: %d", itmp));
235            }
236            if (picoktab_isSyllbound(acph->tabphones, itmp)) {
237                PICODBG_DEBUG(("tabphones isSyllbound: %d", itmp));
238            }
239            if (picoktab_isPause(acph->tabphones, itmp)) {
240                PICODBG_DEBUG(("tabphones isPause: %d", itmp));
241            }
242        }
243
244        PICODBG_DEBUG(("tabphones primstressID: %d",
245                       picoktab_getPrimstressID(acph->tabphones)));
246        PICODBG_DEBUG(("tabphones secstressID: %d",
247                       picoktab_getSecstressID(acph->tabphones)));
248        PICODBG_DEBUG(("tabphones syllboundID: %d",
249                       picoktab_getSyllboundID(acph->tabphones)));
250        PICODBG_DEBUG(("tabphones pauseID: %d",
251                       picoktab_getPauseID(acph->tabphones)));
252    }
253#endif
254
255
256    /* kb dtphr */
257    acph->dtphr = picokdt_getDtPHR(this->voice->kbArray[PICOKNOW_KBID_DT_PHR]);
258    if (acph->dtphr == NULL) {
259        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
260                                       NULL, NULL);
261    }
262    PICODBG_DEBUG(("got dtphr"));
263
264    /* kb dtacc */
265    acph->dtacc = picokdt_getDtACC(this->voice->kbArray[PICOKNOW_KBID_DT_ACC]);
266    if (acph->dtacc == NULL) {
267        return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
268                                       NULL, NULL);
269    }
270    PICODBG_DEBUG(("got dtacc"));
271
272    return PICO_OK;
273}
274
275static picodata_step_result_t acphStep(register picodata_ProcessingUnit this,
276                                     picoos_int16 mode,
277                                     picoos_uint16 *numBytesOutput);
278
279static pico_status_t acphTerminate(register picodata_ProcessingUnit this)
280{
281    return PICO_OK;
282}
283
284static pico_status_t acphSubObjDeallocate(register picodata_ProcessingUnit this,
285                                        picoos_MemoryManager mm) {
286    mm = mm;        /* avoid warning "var not used in this function"*/
287    if (NULL != this) {
288        picoos_deallocate(this->common->mm, (void *) &this->subObj);
289    }
290    return PICO_OK;
291}
292
293
294picodata_ProcessingUnit picoacph_newAccPhrUnit(picoos_MemoryManager mm,
295                                              picoos_Common common,
296                                              picodata_CharBuffer cbIn,
297                                              picodata_CharBuffer cbOut,
298                                              picorsrc_Voice voice) {
299    picodata_ProcessingUnit this;
300
301    this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
302    if (this == NULL) {
303        return NULL;
304    }
305
306    this->initialize = acphInitialize;
307    PICODBG_DEBUG(("set this->step to acphStep"));
308    this->step = acphStep;
309    this->terminate = acphTerminate;
310    this->subDeallocate = acphSubObjDeallocate;
311    this->subObj = picoos_allocate(mm, sizeof(acph_subobj_t));
312    if (this->subObj == NULL) {
313        picoos_deallocate(mm, (void *)&this);
314        picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
315        return NULL;
316    }
317
318    acphInitialize(this, PICO_RESET_FULL);
319    return this;
320}
321
322
323/* ***********************************************************************/
324/* PROCESS_PHR/ACC support functions */
325/* ***********************************************************************/
326
327
328static picoos_uint8 acphGetNrSylls(register picodata_ProcessingUnit this,
329                                 register acph_subobj_t *acph,
330                                 const picoos_uint16 ind) {
331    picoos_uint8 i;
332    picoos_uint8 ch;
333    picoos_uint8 count;
334
335    count = 1;
336    for (i = 0; i < acph->headx[ind].head.len; i++) {
337        ch = acph->cbuf[acph->headx[ind].cind + i];
338        if (picoktab_isSyllbound(acph->tabphones, ch)) {
339            count++;
340        }
341    }
342    return count;
343}
344
345
346/* ***********************************************************************/
347/* PROCESS_PHR functions */
348/* ***********************************************************************/
349
350
351/* find next POS to the left of 'ind' and return its POS and index */
352static picoos_uint8 acphPhrItemSeqGetPosLeft(register picodata_ProcessingUnit this,
353                                           register acph_subobj_t *acph,
354                                           const picoos_uint16 ind,
355                                           picoos_uint16 *leftind) {
356    picoos_uint8 val;
357    picoos_int32 i;
358
359    val = PICOKDT_EPSILON;
360    for (i = ind - 1; ((val == PICOKDT_EPSILON) && (i >= 0)); i--) {
361        if ((acph->headx[i].head.type == PICODATA_ITEM_WORDPHON)) {
362            val = acph->headx[i].head.info1;
363        }
364    }
365    *leftind = i + 1;
366    return val;
367}
368
369
370/* right-to-left, for each WORDPHON do phr */
371static pico_status_t acphSubPhrasing(register picodata_ProcessingUnit this,
372                                   register acph_subobj_t *acph) {
373    picokdt_classify_result_t dtres;
374    picoos_uint8 valbuf[5];
375    picoos_uint16 nrwordspre;
376    picoos_uint16 nrwordsfol;
377    picoos_uint16 nrsyllsfol;
378    picoos_uint16 lastprev2; /* last index of POS(es) found to the left */
379    picoos_uint8 curpos;     /* POS(es) of current word */
380    picoos_uint16 upbound;   /* index of last WORDPHON item (with POS) */
381    picoos_uint8 okay;
382    picoos_uint8 nosubphrases;
383    picoos_int32 i;
384
385    /* set initial values */
386    okay = TRUE;
387    nosubphrases = TRUE;
388    curpos = PICOKDT_EPSILON;   /* needs to be in 2^8 */
389
390    /* set upbound to last WORDPHON, don't worry about first one */
391    upbound = acph->headxLen - 1;
392    while ((upbound > 0) &&
393           (acph->headx[upbound].head.type != PICODATA_ITEM_WORDPHON)) {
394        upbound--;
395    }
396
397    /* zero or one WORDPHON, no subphrasing needed, but handling of
398       BOUND strength state is needed */
399    if (upbound <= 0) {
400        /* phrase not containing more than one WORDPHON */
401        PICODBG_DEBUG(("less than two WORDPHON in phrase -> no subphrasing"));
402    }
403
404    lastprev2 = upbound;
405
406    /* set initial nr pre/fol words/sylls, upbound is ind of last WORDPHON */
407    nrwordsfol = 0;
408    nrsyllsfol = 0;
409    nrwordspre = 0;
410    for (i = 0; i < upbound; i++) {
411        if (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON) {
412            nrwordspre++;
413        }
414    }
415
416    nrwordspre++;    /* because we later have a decrement before being used */
417
418
419    /* set POS of current word in valbuf[1], will be shifted right afterwards */
420    valbuf[1] = acph->headx[upbound].head.info1;
421    /* find first POS to the left and set valbuf[0] */
422    valbuf[0] = acphPhrItemSeqGetPosLeft(this, acph, lastprev2, &lastprev2);
423    for (i = 2; i < 5; i++) {
424        valbuf[i] = PICOKDT_EPSILON;
425    }
426
427    PICODBG_TRACE(("headxLen: %d", acph->headxLen));
428
429    /* at least two WORDPHON items */
430    /* process from right-to-left all items in headx, except for 1st WORDPHON */
431    for (i = upbound; (i > 0) && (nrwordspre > 1); i--) {
432        okay = TRUE;
433
434        PICODBG_TRACE(("iter: %d, type: %c", i, acph->headx[i].head.type));
435
436        /* if not (WORDPHON) */
437        if ((acph->headx[i].head.type != PICODATA_ITEM_WORDPHON)) {
438            continue;
439        }
440
441        PICODBG_TRACE(("iter: %d, curpos: %d", i, acph->headx[i].head.info1));
442
443        /* get and set POS of current item, must be WORDPHON */
444        curpos = acph->headx[i].head.info1;
445
446        /* no continue so far => at [i] we have a WORDPHON item */
447        /* shift all POS elements one position to the right */
448        valbuf[4] = valbuf[3];
449        valbuf[3] = valbuf[2];
450        valbuf[2] = valbuf[1];
451        valbuf[1] = valbuf[0];
452        /* find next POS to the left and set valbuf[0] */
453        valbuf[0] = acphPhrItemSeqGetPosLeft(this, acph, lastprev2, &lastprev2);
454
455        /* better check double than never */
456        if (curpos != valbuf[2]) {
457            PICODBG_WARN(("syncing POS"));
458            picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
459                                  NULL, NULL);
460            valbuf[2] = curpos;
461        }
462
463        nrwordsfol++;
464        nrsyllsfol += acphGetNrSylls(this, acph, i);
465        nrwordspre--;
466
467        PICODBG_TRACE(("%d: [%d,%d|%d|%d,%d|%d,%d,%d]",
468                       i, valbuf[0], valbuf[1], valbuf[2], valbuf[3],
469                       valbuf[4], nrwordspre, nrwordsfol, nrsyllsfol));
470
471        /* no continue so far => subphrasing needed */
472        /* construct input vector, which is set in dtphr */
473        if (!picokdt_dtPHRconstructInVec(acph->dtphr, valbuf[0], valbuf[1],
474                                         valbuf[2], valbuf[3], valbuf[4],
475                                         nrwordspre, nrwordsfol, nrsyllsfol)) {
476            /* error constructing invec */
477            PICODBG_WARN(("problem with invec"));
478            picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
479                                  NULL, NULL);
480            okay = FALSE;
481        }
482        /* classify */
483        if (okay && (!picokdt_dtPHRclassify(acph->dtphr))) {
484            /* error doing classification */
485            PICODBG_WARN(("problem classifying"));
486            picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
487                                  NULL, NULL);
488            okay = FALSE;
489        }
490        /* decompose */
491        if (okay && (!picokdt_dtPHRdecomposeOutClass(acph->dtphr, &dtres))) {
492            /* error decomposing */
493            PICODBG_WARN(("problem decomposing"));
494            picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
495                                  NULL, NULL);
496            okay = FALSE;
497        }
498
499        if (okay && dtres.set) {
500            PICODBG_DEBUG(("%d - inpos: %d, out: %d", i,valbuf[2],dtres.class));
501        } else {
502            PICODBG_WARN(("problem determining subphrase boundary strength"));
503            dtres.class = PICODATA_ITEMINFO1_ERR;
504        }
505
506        if (dtres.class > 255) {
507            PICODBG_WARN(("dt class outside valid range, setting to PHR0"));
508            dtres.class = PICODATA_ITEMINFO1_BOUND_PHR0;
509        }
510        acph->headx[i].boundstrength = (picoos_uint8)dtres.class;
511        if ((dtres.class == PICODATA_ITEMINFO1_BOUND_PHR2) ||
512            (dtres.class == PICODATA_ITEMINFO1_BOUND_PHR3)) {
513            if (nosubphrases) {
514                /* it's the last secondary phrase in the primary phrase */
515                /* add type info */
516                switch (acph->headx[acph->headxLen - 1].head.info2) {
517                    case PICODATA_ITEMINFO2_PUNC_SENT_T:
518                        acph->headx[i].boundtype =
519                            PICODATA_ITEMINFO2_BOUNDTYPE_T;
520                        break;
521                    case PICODATA_ITEMINFO2_PUNC_SENT_Q:
522                        acph->headx[i].boundtype =
523                            PICODATA_ITEMINFO2_BOUNDTYPE_Q;
524                        break;
525                    case PICODATA_ITEMINFO2_PUNC_SENT_E:
526                        acph->headx[i].boundtype =
527                            PICODATA_ITEMINFO2_BOUNDTYPE_E;
528                        break;
529                    case PICODATA_ITEMINFO2_PUNC_PHRASE:
530                    case PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED:
531                        acph->headx[i].boundtype =
532                            PICODATA_ITEMINFO2_BOUNDTYPE_P;
533                        break;
534                    default:
535                        PICODBG_WARN(("invalid boundary type, not set"));
536                        break;
537                }
538                nosubphrases = FALSE;
539
540            } else {
541                acph->headx[i].boundtype =
542                    PICODATA_ITEMINFO2_BOUNDTYPE_P;
543            }
544            /* reset nr following words and sylls counters */
545            nrwordsfol = 0;
546            nrsyllsfol = 0;
547        }
548    }
549
550    /* process first item, add bound-info */
551    switch (acph->boundStrengthState) {
552        case SA_BOUNDSTRENGTH_SSEP:
553            acph->headx[0].boundstrength =
554                PICODATA_ITEMINFO1_BOUND_SBEG;
555            break;
556        case SA_BOUNDSTRENGTH_PPHR:
557            acph->headx[0].boundstrength =
558                PICODATA_ITEMINFO1_BOUND_PHR1;
559            break;
560        default:
561            PICODBG_WARN(("invalid boundary strength, not set"));
562            break;
563    }
564
565    /* set boundary strength state */
566    switch (acph->headx[acph->headxLen - 1].head.info1) {
567        case PICODATA_ITEMINFO1_PUNC_SENTEND:
568        case PICODATA_ITEMINFO1_PUNC_FLUSH:
569            acph->boundStrengthState = SA_BOUNDSTRENGTH_SSEP;
570            break;
571        case PICODATA_ITEMINFO1_PUNC_PHRASEEND:
572            acph->boundStrengthState = SA_BOUNDSTRENGTH_PPHR;
573            break;
574        default:
575            PICODBG_WARN(("invalid boundary strength state, not changed"));
576            break;
577    }
578
579    if (nosubphrases) {
580        /* process first item, add type info */
581        switch (acph->headx[acph->headxLen - 1].head.info2) {
582            case PICODATA_ITEMINFO2_PUNC_SENT_T:
583                acph->headx[0].boundtype =
584                    PICODATA_ITEMINFO2_BOUNDTYPE_T;
585                break;
586            case PICODATA_ITEMINFO2_PUNC_SENT_Q:
587                acph->headx[0].boundtype =
588                    PICODATA_ITEMINFO2_BOUNDTYPE_Q;
589                break;
590            case PICODATA_ITEMINFO2_PUNC_SENT_E:
591                acph->headx[0].boundtype =
592                    PICODATA_ITEMINFO2_BOUNDTYPE_E;
593                break;
594            case PICODATA_ITEMINFO2_PUNC_PHRASE:
595            case PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED:
596                acph->headx[0].boundtype =
597                    PICODATA_ITEMINFO2_BOUNDTYPE_P;
598                break;
599            default:
600                PICODBG_WARN(("invalid boundary type, not set"));
601                break;
602        }
603    } else {
604        acph->headx[0].boundtype =
605            PICODATA_ITEMINFO2_BOUNDTYPE_P;
606    }
607
608    return PICO_OK;
609}
610
611
612/* ***********************************************************************/
613/* PROCESS_ACC functions */
614/* ***********************************************************************/
615
616/* find next POS to the left of 'ind' and return its POS and index */
617static picoos_uint8 acphAccItemSeqGetPosLeft(register picodata_ProcessingUnit this,
618                                           register acph_subobj_t *acph,
619                                           const picoos_uint16 ind,
620                                           picoos_uint16 *leftind) {
621    picoos_uint8 val;
622    picoos_int32 i;
623
624    val = PICOKDT_EPSILON;
625    for (i = ind - 1; ((val == PICOKDT_EPSILON) && (i >= 0)); i--) {
626        if ((acph->headx[i].head.type == PICODATA_ITEM_WORDPHON)) {
627            val = acph->headx[i].head.info1;
628        }
629    }
630    *leftind = i + 1;
631    return val;
632}
633
634
635/* s1: nr sylls in word before the first primary stressed syll,
636   s2: nr sylls in word after (but excluding) the first primary stressed syll */
637static picoos_uint8 acphAccNrSyllParts(register picodata_ProcessingUnit this,
638                                     register acph_subobj_t *acph,
639                                     const picoos_uint16 ind,
640                                     picoos_uint8 *s1,
641                                     picoos_uint8 *s2) {
642    picoos_uint16 pind;
643    picoos_uint16 pend;    /* phone string start+len */
644    picoos_uint8 afterprim;
645
646    /* check ind is in valid range */
647    if (ind >= acph->headxLen) {
648        return FALSE;
649    }
650
651    *s1 = 0;
652    *s2 = 0;
653    afterprim = FALSE;
654    pend = acph->headx[ind].cind + acph->headx[ind].head.len;
655    for (pind = acph->headx[ind].cind; pind < pend; pind++) {
656        if (picoktab_isPrimstress(acph->tabphones, acph->cbuf[pind])) {
657            afterprim = TRUE;
658        } else if (picoktab_isSyllbound(acph->tabphones, acph->cbuf[pind])) {
659            if (afterprim) {
660                (*s2)++;
661            } else {
662                (*s1)++;
663            }
664        }
665    }
666    if (afterprim) {
667        (*s2)++;
668    } else {
669        (*s1)++;
670    }
671
672    /* exclude the stressed syllable */
673    if ((*s2) > 0) {
674        (*s2)--;
675    }
676    /* handle the case when there is no primstress */
677    if (!afterprim) {
678        (*s2) = (*s1);
679    }
680    return TRUE;
681}
682
683
684static picoos_uint8 acphAccGetNrsRight(register picodata_ProcessingUnit this,
685                                     register acph_subobj_t *acph,
686                                     const picoos_uint16 ind,
687                                     picoos_uint16 *nrwordsfol,
688                                     picoos_uint16 *nrsyllsfol,
689                                     picoos_uint16 *footwordsfol,
690                                     picoos_uint16 *footsyllsfol) {
691    picoos_uint16 i;
692    picoos_uint8 s1;
693    picoos_uint8 s2;
694
695    if (!acphAccNrSyllParts(this, acph, ind, &s1, &s2)) {
696        return FALSE;
697    }
698
699    *nrwordsfol = 0;
700    *nrsyllsfol = s2;
701    i = ind + 1;
702    while ((i < acph->headxLen) &&
703           (acph->headx[i].boundstrength == PICODATA_ITEMINFO1_BOUND_PHR0)) {
704        if (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON) {
705            (*nrwordsfol)++;
706            *nrsyllsfol += acphGetNrSylls(this, acph, i);
707        }
708        i++;
709    }
710
711    *footwordsfol = 0;
712    *footsyllsfol = s2;
713    i = ind + 1;
714    while ((i < acph->headxLen) &&
715           (acph->headx[i].head.info2 != PICODATA_ACC1)) {
716        if (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON) {
717            (*footwordsfol)++;
718            *footsyllsfol += acphGetNrSylls(this, acph, i);
719        }
720        i++;
721    }
722    if ((i < acph->headxLen) && (acph->headx[i].head.info2 == PICODATA_ACC1)) {
723        if (!acphAccNrSyllParts(this, acph, i, &s1, &s2)) {
724            return FALSE;
725        }
726        *footsyllsfol += s1;
727    }
728    return TRUE;
729}
730
731
732static picoos_uint8 acphAccGetNrsLeft(register picodata_ProcessingUnit this,
733                                    register acph_subobj_t *acph,
734                                    const picoos_uint16 ind,
735                                    picoos_uint16 *nrwordspre,
736                                    picoos_uint16 *nrsyllspre) {
737    picoos_int32 i;
738    picoos_uint8 s1;
739    picoos_uint8 s2;
740
741    if (!acphAccNrSyllParts(this, acph, ind, &s1, &s2)) {
742        return FALSE;
743    }
744
745    *nrwordspre = 0;
746    *nrsyllspre = s1;
747    i = ind - 1;
748    while ((i >= 0) &&
749           (acph->headx[i].boundstrength == PICODATA_ITEMINFO1_BOUND_PHR0)) {
750        if (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON) {
751            (*nrwordspre)++;
752            *nrsyllspre += acphGetNrSylls(this, acph, i);
753        }
754        i--;
755    }
756
757    if ((acph->headx[i].boundstrength != PICODATA_ITEMINFO1_BOUND_PHR0) &&
758        (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON)) {
759        (*nrwordspre)++;
760        *nrsyllspre += acphGetNrSylls(this, acph, i);
761    }
762    return TRUE;
763}
764
765
766/* return TRUE if wordphon contains no stress, FALSE otherwise */
767static picoos_uint8 acphIsWordWithoutStress(register picodata_ProcessingUnit this,
768                                          register acph_subobj_t *acph,
769                                          const picoos_uint16 ind) {
770    picoos_uint8 i;
771    picoos_uint16 pos;
772
773    pos = acph->headx[ind].cind;
774    for (i = 0; i < acph->headx[ind].head.len; i++) {
775        if (picoktab_isPrimstress(acph->tabphones, acph->cbuf[pos + i]) ||
776            picoktab_isSecstress(acph->tabphones, acph->cbuf[pos + i])) {
777            return FALSE;
778        }
779    }
780    return TRUE;
781}
782
783
784/* right-to-left, for each WORDPHON do acc */
785static pico_status_t acphAccentuation(register picodata_ProcessingUnit this,
786                                    register acph_subobj_t *acph) {
787    picokdt_classify_result_t dtres;
788    picoos_uint8 valbuf[5];
789    picoos_uint16 hist1;
790    picoos_uint16 hist2;
791    picoos_uint16 nrwordspre;
792    picoos_uint16 nrsyllspre;
793    picoos_uint16 nrwordsfol;
794    picoos_uint16 nrsyllsfol;
795    picoos_uint16 footwordsfol;
796    picoos_uint16 footsyllsfol;
797    picoos_uint16 lastprev2; /* last index of POS(es) found to the left */
798    picoos_uint8 curpos;     /* POS(es) of current word */
799    picoos_uint16 prevout;
800    picoos_uint8 okay;
801    picoos_int32 upbound;   /* index of last WORDPHON item (with POS) */
802    picoos_uint16 i;
803
804    /* set initial values */
805    okay = TRUE;
806    curpos = PICOKDT_EPSILON;    /* needs to be < 2^8 */
807
808    /* set upbound to last WORDPHON */
809    upbound = acph->headxLen - 1;
810    while ((upbound >= 0) &&
811           (acph->headx[upbound].head.type != PICODATA_ITEM_WORDPHON)) {
812        upbound--;
813    }
814
815    if (upbound < 0) {
816        /* phrase containing zero WORDPHON */
817        PICODBG_DEBUG(("no WORDPHON in phrase -> no accentuation"));
818        return PICO_OK;
819    }
820
821    lastprev2 = upbound;
822
823    /* set initial history values */
824    prevout = PICOKDT_HISTORY_ZERO;
825    hist1 = PICOKDT_HISTORY_ZERO;
826    hist2 = PICOKDT_HISTORY_ZERO;
827
828    /* set initial nr pre/fol words/sylls, upbound is ind of last WORDPHON */
829    nrwordsfol = 0;
830    nrsyllsfol = 0;
831    footwordsfol = 0;
832    footsyllsfol = 0;
833    nrwordspre = 0;
834    nrsyllspre = 0;
835
836    /* set POS of current word in valbuf[1], will be shifted right afterwards */
837    valbuf[1] = acph->headx[upbound].head.info1;
838    /* find first POS to the left and set valbuf[0] */
839    valbuf[0] = acphAccItemSeqGetPosLeft(this, acph, lastprev2, &lastprev2);
840    for (i = 2; i < 5; i++) {
841        valbuf[i] = PICOKDT_EPSILON;
842    }
843
844    PICODBG_TRACE(("headxLen: %d", acph->headxLen));
845
846    /* process from right-to-left all items in headx */
847    for (i = upbound+1; i > 0; ) {
848        i--;
849
850        okay = TRUE;
851
852        PICODBG_TRACE(("iter: %d, type: %c", i, acph->headx[i].head.type));
853
854        /* if not (WORDPHON) */
855        if ((acph->headx[i].head.type != PICODATA_ITEM_WORDPHON)) {
856            continue;
857        }
858
859        PICODBG_TRACE(("iter: %d, curpos: %d", i, acph->headx[i].head.info1));
860
861        /* get and set POS of current item, must be WORDPHON */
862        curpos = acph->headx[i].head.info1;
863
864        /* no continue so far => at [i] we have a WORDPHON item */
865        /* shift all POS elements one position to the right */
866        valbuf[4] = valbuf[3];
867        valbuf[3] = valbuf[2];
868        valbuf[2] = valbuf[1];
869        valbuf[1] = valbuf[0];
870        /* find next POS to the left and set valbuf[0] */
871        valbuf[0] = acphAccItemSeqGetPosLeft(this, acph, lastprev2, &lastprev2);
872
873        /* better check double than never */
874        if (curpos != valbuf[2]) {
875            PICODBG_WARN(("syncing POS"));
876            picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
877                                  NULL, NULL);
878            valbuf[2] = curpos;
879        }
880
881        /* set history values */
882        hist2 = hist1;
883        hist1 = prevout;
884
885        /* ************************************************************ */
886        /* many speedups possible by avoiding double calc of attribtues */
887        /* ************************************************************ */
888
889        /* get distances */
890        if ((!acphAccGetNrsRight(this, acph, i, &nrwordsfol, &nrsyllsfol,
891                               &footwordsfol, &footsyllsfol)) ||
892            (!acphAccGetNrsLeft(this, acph, i, &nrwordspre, &nrsyllspre))) {
893            PICODBG_WARN(("problem setting distances in invec"));
894            picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
895                                  NULL, NULL);
896            okay = FALSE;
897        }
898
899        PICODBG_TRACE(("%d: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]", i,
900                       valbuf[0], valbuf[1], valbuf[2], valbuf[3], valbuf[4],
901                       hist1, hist2, nrwordspre, nrsyllspre,
902                       nrwordsfol, nrsyllsfol, footwordsfol, footsyllsfol));
903
904        /* no continue so far => accentuation needed */
905        /* construct input vector, which is set in dtacc */
906        if (!picokdt_dtACCconstructInVec(acph->dtacc, valbuf[0], valbuf[1],
907                                         valbuf[2], valbuf[3], valbuf[4],
908                                         hist1, hist2, nrwordspre, nrsyllspre,
909                                         nrwordsfol, nrsyllsfol, footwordsfol,
910                                         footsyllsfol)) {
911            /* error constructing invec */
912            PICODBG_WARN(("problem with invec"));
913            picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
914                                  NULL, NULL);
915            okay = FALSE;
916        }
917        /* classify */
918        if (okay && (!picokdt_dtACCclassify(acph->dtacc, &prevout))) {
919            /* error doing classification */
920            PICODBG_WARN(("problem classifying"));
921            picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
922                                  NULL, NULL);
923            okay = FALSE;
924        }
925        /* decompose */
926        if (okay && (!picokdt_dtACCdecomposeOutClass(acph->dtacc, &dtres))) {
927            /* error decomposing */
928            PICODBG_WARN(("problem decomposing"));
929            picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
930                                  NULL, NULL);
931            okay = FALSE;
932        }
933
934        if (dtres.class > 255) {
935            PICODBG_WARN(("dt class outside valid range, setting to ACC0"));
936            dtres.class = PICODATA_ACC0;
937        }
938
939        if (okay && dtres.set) {
940            PICODBG_DEBUG(("%d - inpos: %d, out: %d", i,valbuf[2],dtres.class));
941            if (acphIsWordWithoutStress(this, acph, i)) {
942                if (dtres.class != PICODATA_ACC0) {
943                    acph->headx[i].head.info2 = PICODATA_ACC3;
944                } else {
945                    acph->headx[i].head.info2 = (picoos_uint8)dtres.class;
946                }
947            } else {
948                acph->headx[i].head.info2 = (picoos_uint8)dtres.class;
949            }
950            PICODBG_DEBUG(("%d - after-nostress-corr: %d",
951                           i, acph->headx[i].head.info2));
952        } else {
953            PICODBG_WARN(("problem determining accentuation level"));
954            dtres.class = PICODATA_ITEMINFO1_ERR;
955        }
956    }
957    return PICO_OK;
958}
959
960
961
962/* ***********************************************************************/
963/* acphStep support functions */
964/* ***********************************************************************/
965
966static picoos_uint8 acphPutBoundItem(register picodata_ProcessingUnit this,
967                                   register acph_subobj_t *acph,
968                                   const picoos_uint8 strength,
969                                   const picoos_uint8 type,
970                                   picoos_uint8 *dopuoutfull,
971                                   picoos_uint16 *numBytesOutput) {
972    pico_status_t rv = PICO_OK;
973    picoos_uint16 blen = 0;
974    picodata_itemhead_t tmphead;
975
976    *dopuoutfull = FALSE;
977
978    /* construct BOUND item in tmpbuf and put item */
979    tmphead.type = PICODATA_ITEM_BOUND;
980    tmphead.info1 = strength;
981    tmphead.info2 = type;
982    tmphead.len = 0;
983    rv = picodata_put_itemparts(&tmphead, NULL, 0, acph->tmpbuf,
984                                PICODATA_MAX_ITEMSIZE, &blen);
985    if (rv != PICO_OK) {
986        PICODBG_ERROR(("problem creating BOUND item"));
987        picoos_emRaiseException(this->common->em, rv, NULL, NULL);
988        return FALSE;
989    }
990    /* put constructed item to ext. charbuf */
991    rv = picodata_cbPutItem(this->cbOut, acph->tmpbuf, blen, &blen);
992
993    *numBytesOutput += blen;
994    if (rv == PICO_EXC_BUF_OVERFLOW) {
995        PICODBG_DEBUG(("overflow in cb output buffer"));
996        *dopuoutfull = TRUE;    /* ie. do PU_OUT_FULL later */
997        return FALSE;
998    } else if (rv != PICO_OK) {
999        PICODBG_ERROR(("problem putting BOUND item"));
1000        picoos_emRaiseException(this->common->em, rv, NULL, NULL);
1001        return FALSE;
1002    }
1003
1004    PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1005                       (picoos_uint8 *)"acph: ", acph->tmpbuf, blen);
1006
1007    return TRUE;
1008}
1009
1010
1011
1012/* ***********************************************************************/
1013/*                          acphStep function                              */
1014/* ***********************************************************************/
1015
1016/*
1017complete phrase processed in one step, if not fast enough -> rework
1018
1019init, collect into internal buffer, process, and then feed to
1020output buffer
1021
1022init state: INIT ext           ext
1023state trans:     in hc1  hc2   out
1024
1025INIT | putItem   =  0    0    +1      | BUSY  -> COLL (put B-SBEG item,
1026                                                   set do-init to false)
1027
1028                                    inspace-ok-hc1
1029                                  needs-more-items-(phrase-or-flush)
1030COLL1 |getItems -n +n             0 1 | ATOMIC -> PPOSD     (got items,
1031                                                      if flush set do-init)
1032COLL2 |getItems -n +n             1 0 | ATOMIC -> PPOSD (got items, forced)
1033COLL3 |getItems -n +n             1 1 | IDLE          (got items, need more)
1034COLL4 |getItems  =  =             1 1 | IDLE             (got no items)
1035
1036PPOSD | posd     = ~n~n               | BUSY     -> PWP     (posd done)
1037PWP   | lex/g2p  = ~n-n  0+n          | BUSY     -> PPHR    (lex/g2p done)
1038PPHR  | phr      = -n 0 +m=n          | BUSY     -> PACC    (phr done, m>=n)
1039PACC  | acc      =  0 0 ~m=n          | BUSY     -> FEED    (acc done)
1040
1041                                  doinit-flag
1042FEED | putItems  0  0 0 -m-n  +m  0   | BUSY -> COLL    (put items)
1043FEED | putItems  0  0 0 -m-n  +m  1   | BUSY -> INIT    (put items)
1044FEED | putItems  0  0 0 -d-d  +d      | OUT_FULL        (put some items)
1045*/
1046
1047static picodata_step_result_t acphStep(register picodata_ProcessingUnit this,
1048                                     picoos_int16 mode,
1049                                     picoos_uint16 *numBytesOutput) {
1050    register acph_subobj_t *acph;
1051    pico_status_t rv = PICO_OK;
1052    pico_status_t rvP = PICO_OK;
1053    picoos_uint16 blen = 0;
1054    picoos_uint16 clen = 0;
1055    picoos_uint16 i;
1056
1057
1058    if (NULL == this || NULL == this->subObj) {
1059        return PICODATA_PU_ERROR;
1060    }
1061    acph = (acph_subobj_t *) this->subObj;
1062    mode = mode;        /* avoid warning "var not used in this function"*/
1063    *numBytesOutput = 0;
1064    while (1) { /* exit via return */
1065        PICODBG_DEBUG(("doing state %i, hLen|c1Len: %d|%d",
1066                       acph->procState, acph->headxLen, acph->cbufLen));
1067
1068        switch (acph->procState) {
1069
1070            /* *********************************************************/
1071            /* collect state: get item(s) from charBuf and store in
1072             * internal buffers, need a complete punctuation-phrase
1073             */
1074            case SA_STEPSTATE_COLLECT:
1075
1076                while (acph->inspaceok && acph->needsmoreitems && (PICO_OK ==
1077                (rv = picodata_cbGetItem(this->cbIn, acph->tmpbuf,
1078                                PICODATA_MAX_ITEMSIZE, &blen)))) {
1079                    rvP = picodata_get_itemparts(acph->tmpbuf,
1080                    PICODATA_MAX_ITEMSIZE, &(acph->headx[acph->headxLen].head),
1081                            &(acph->cbuf[acph->cbufLen]), acph->cbufBufSize
1082                                    - acph->cbufLen, &clen);
1083                    if (rvP != PICO_OK) {
1084                        PICODBG_ERROR(("problem getting item parts"));
1085                        picoos_emRaiseException(this->common->em, rvP,
1086                        NULL, NULL);
1087                        return PICODATA_PU_ERROR;
1088                    }
1089
1090                    /* if CMD(...FLUSH...) -> PUNC(...FLUSH...),
1091                     construct PUNC-FLUSH item in headx */
1092                    if ((acph->headx[acph->headxLen].head.type
1093                            == PICODATA_ITEM_CMD)
1094                            && (acph->headx[acph->headxLen].head.info1
1095                                    == PICODATA_ITEMINFO1_CMD_FLUSH)) {
1096                        acph->headx[acph->headxLen].head.type
1097                                = PICODATA_ITEM_PUNC;
1098                        acph->headx[acph->headxLen].head.info1
1099                                = PICODATA_ITEMINFO1_PUNC_FLUSH;
1100                        acph->headx[acph->headxLen].head.info2
1101                                = PICODATA_ITEMINFO2_PUNC_SENT_T;
1102                        acph->headx[acph->headxLen].head.len = 0;
1103                    }
1104
1105                    /* check/set needsmoreitems */
1106                    if (acph->headx[acph->headxLen].head.type
1107                            == PICODATA_ITEM_PUNC) {
1108                        acph->needsmoreitems = FALSE;
1109                    }
1110
1111                    /* check/set inspaceok, keep spare slot for forcing */
1112                    if ((acph->headxLen >= (PICOACPH_MAXNR_HEADX - 2))
1113                            || ((acph->cbufBufSize - acph->cbufLen)
1114                                    < PICODATA_MAX_ITEMSIZE)) {
1115                        acph->inspaceok = FALSE;
1116                    }
1117
1118                    if (clen > 0) {
1119                        acph->headx[acph->headxLen].cind = acph->cbufLen;
1120                        acph->cbufLen += clen;
1121                    } else {
1122                        acph->headx[acph->headxLen].cind = 0;
1123                    }
1124                    acph->headxLen++;
1125                }
1126
1127                if (!acph->needsmoreitems) {
1128                    /* 1, phrase buffered */
1129                    acph->procState = SA_STEPSTATE_PROCESS_PHR;
1130                    return PICODATA_PU_ATOMIC;
1131                } else if (!acph->inspaceok) {
1132                    /* 2, forced phrase end */
1133                    /* at least one slot is still free, use it to
1134                       force a trailing PUNC item */
1135                    acph->headx[acph->headxLen].head.type = PICODATA_ITEM_PUNC;
1136                    acph->headx[acph->headxLen].head.info1 =
1137                        PICODATA_ITEMINFO1_PUNC_PHRASEEND;
1138                    acph->headx[acph->headxLen].head.info2 =
1139                        PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED;
1140                    acph->headx[acph->headxLen].head.len = 0;
1141                    acph->needsmoreitems = FALSE; /* not really needed for now */
1142                    acph->headxLen++;
1143                    PICODBG_WARN(("forcing phrase end, added PUNC_PHRASEEND"));
1144                    picoos_emRaiseWarning(this->common->em,
1145                                          PICO_WARN_FALLBACK, NULL,
1146                                          (picoos_char *)"forced phrase end");
1147                    acph->procState = SA_STEPSTATE_PROCESS_PHR;
1148                    return PICODATA_PU_ATOMIC;
1149                } else if (rv == PICO_EOF) {
1150                    /* 3, 4 */
1151                    return PICODATA_PU_IDLE;
1152                } else if ((rv == PICO_EXC_BUF_UNDERFLOW) ||
1153                           (rv == PICO_EXC_BUF_OVERFLOW)) {
1154                    /* error, no valid item in cb (UNDER) */
1155                    /*        or tmpbuf not large enough, not possible (OVER) */
1156                    /* no exception raised, left for ctrl to handle */
1157                    PICODBG_ERROR(("buffer under/overflow, rv: %d", rv));
1158                    return PICODATA_PU_ERROR;
1159                } else {
1160                    /* error, only possible if cbGetItem implementation
1161                       changes without this function being adapted*/
1162                    PICODBG_ERROR(("untreated return value, rv: %d", rv));
1163                    return PICODATA_PU_ERROR;
1164                }
1165                break;
1166
1167
1168
1169
1170            /* *********************************************************/
1171            /* process phr state: process items in headx and modify
1172             * headx in place
1173             */
1174            case SA_STEPSTATE_PROCESS_PHR:
1175                /* ensure there is an item in inBuf */
1176                if (acph->headxLen > 0) {
1177                    /* we have a phrase in headx, cbuf1 (can be
1178                       single PUNC item), do phrasing and modify headx */
1179
1180                    if (PICO_OK != acphSubPhrasing(this, acph)) {
1181                        picoos_emRaiseException(this->common->em,
1182                                                PICO_ERR_OTHER, NULL, NULL);
1183                        return PICODATA_PU_ERROR;
1184                    }
1185                    acph->procState = SA_STEPSTATE_PROCESS_ACC;
1186                } else if (acph->headxLen == 0) {    /* no items in inBuf */
1187                    PICODBG_WARN(("no items in inBuf"));
1188                    acph->procState = SA_STEPSTATE_COLLECT;
1189                    return PICODATA_PU_BUSY;
1190                }
1191
1192#if defined (PICO_DEBUG_NOTNEEDED)
1193                if (1) {
1194                    picoos_uint8 i, j, ittype;
1195                    for (i = 0; i < acph->headxLen; i++) {
1196                        if ((acph->headx[i].boundstrength != 0) &&
1197                            (acph->headx[i].boundstrength !=
1198                             PICODATA_ITEMINFO1_BOUND_PHR0)) {
1199                            PICODBG_INFO(("acph-p: boundstrength '%c', "
1200                                          "boundtype '%c'",
1201                                          acph->headx[i].boundstrength,
1202                                          acph->headx[i].boundtype));
1203                        }
1204
1205                        ittype = acph->headx[i].head.type;
1206                        PICODBG_INFO_CTX();
1207                        PICODBG_INFO_MSG(("acph-p: ("));
1208                        PICODBG_INFO_MSG(("'%c',", ittype));
1209                        if ((32 <= acph->headx[i].head.info1) &&
1210                            (acph->headx[i].head.info1 < 127) &&
1211                            (ittype != PICODATA_ITEM_WORDPHON)) {
1212                            PICODBG_INFO_MSG(("'%c',",acph->headx[i].head.info1));
1213                        } else {
1214                            PICODBG_INFO_MSG(("%3d,", acph->headx[i].head.info1));
1215                        }
1216                        if ((32 <= acph->headx[i].head.info2) &&
1217                            (acph->headx[i].head.info2 < 127)) {
1218                            PICODBG_INFO_MSG(("'%c',",acph->headx[i].head.info2));
1219                        } else {
1220                            PICODBG_INFO_MSG(("%3d,", acph->headx[i].head.info2));
1221                        }
1222                        PICODBG_INFO_MSG(("%3d)", acph->headx[i].head.len));
1223
1224                        for (j = 0; j < acph->headx[i].head.len; j++) {
1225                            if ((ittype == PICODATA_ITEM_CMD)) {
1226                                PICODBG_INFO_MSG(("%c",
1227                                        acph->cbuf[acph->headx[i].cind+j]));
1228                            } else {
1229                                PICODBG_INFO_MSG(("%4d",
1230                                        acph->cbuf[acph->headx[i].cind+j]));
1231                            }
1232                        }
1233                        PICODBG_INFO_MSG(("\n"));
1234                    }
1235                }
1236#endif
1237
1238                break;
1239
1240
1241            /* *********************************************************/
1242            /* process acc state: process items in headx and modify
1243             * headx in place
1244             */
1245            case SA_STEPSTATE_PROCESS_ACC:
1246                /* ensure there is an item in inBuf */
1247                if (acph->headxLen > 0) {
1248                    /* we have a phrase in headx, cbuf (can be
1249                       single PUNC item), do accentuation and modify headx */
1250                    if (PICO_OK != acphAccentuation(this, acph)) {
1251                        picoos_emRaiseException(this->common->em,
1252                                                PICO_ERR_OTHER, NULL, NULL);
1253                        return PICODATA_PU_ERROR;
1254                    }
1255                    acph->procState = SA_STEPSTATE_FEED;
1256                } else if (acph->headxLen == 0) {    /* no items in inBuf */
1257                    PICODBG_WARN(("no items in inBuf"));
1258                    acph->procState = SA_STEPSTATE_COLLECT;
1259                    return PICODATA_PU_BUSY;
1260                }
1261                break;
1262
1263
1264            /* *********************************************************/
1265            /* feed state: copy item in internal outBuf to output charBuf */
1266            case SA_STEPSTATE_FEED: {
1267                picoos_uint16 indupbound;
1268                picoos_uint8 dopuoutfull;
1269
1270                PICODBG_DEBUG(("put out items (bot, len): (%d, %d)",
1271                               acph->headxBottom, acph->headxLen));
1272
1273                indupbound = acph->headxBottom + acph->headxLen;
1274                dopuoutfull = FALSE;
1275
1276                if (acph->headxBottom == 0) {
1277                    /* construct first BOUND item in tmpbuf and put item */
1278                    /* produce BOUND unless it is followed by a term/flush) */
1279                    if (acph->headx[0].head.info1
1280                            != PICODATA_ITEMINFO1_PUNC_FLUSH) {
1281                        if (!acphPutBoundItem(this, acph,
1282                                acph->headx[0].boundstrength,
1283                                acph->headx[0].boundtype, &dopuoutfull,
1284                                numBytesOutput)) {
1285                            if (dopuoutfull) {
1286                                PICODBG_DEBUG(("feeding overflow"));
1287                                return PICODATA_PU_OUT_FULL;
1288                            } else {
1289                                /* ERR-msg and exception done in acphPutBoundItem */
1290                                return PICODATA_PU_ERROR;
1291                            }
1292                        }
1293                    }
1294                }
1295
1296                /* for all items in headx, cbuf */
1297                for (i = acph->headxBottom; i < indupbound; i++) {
1298
1299                    switch (acph->headx[i].head.type) {
1300                        case PICODATA_ITEM_PUNC:
1301                            /* if sentence end, put SEND bound */
1302                            if ((acph->headx[i].head.info1 ==
1303                                 PICODATA_ITEMINFO1_PUNC_SENTEND) &&
1304                                (i == (indupbound - 1))) {
1305                                /* construct and put BOUND item */
1306                                if (!acphPutBoundItem(this, acph,
1307                                            PICODATA_ITEMINFO1_BOUND_SEND,
1308                                            PICODATA_ITEMINFO2_NA,
1309                                            &dopuoutfull, numBytesOutput)) {
1310                                    if (dopuoutfull) {
1311                                        PICODBG_DEBUG(("feeding overflow"));
1312                                        return PICODATA_PU_OUT_FULL;
1313                                    } else {
1314                                        /* ERR-msg and exception done
1315                                           in acphPutBoundItem */
1316                                        return PICODATA_PU_ERROR;
1317                                    }
1318                                }
1319                            } else if ((acph->headx[i].head.info1 ==
1320                                 PICODATA_ITEMINFO1_PUNC_FLUSH) &&
1321                                (i == (indupbound - 1))) {
1322                                /* construct and put BOUND item */
1323                                if (!acphPutBoundItem(this, acph,
1324                                            PICODATA_ITEMINFO1_BOUND_TERM,
1325                                            PICODATA_ITEMINFO2_NA,
1326                                            &dopuoutfull, numBytesOutput)) {
1327                                    if (dopuoutfull) {
1328                                        PICODBG_DEBUG(("feeding overflow"));
1329                                        return PICODATA_PU_OUT_FULL;
1330                                    } else {
1331                                        /* ERR-msg and exception done
1332                                           in acphPutBoundItem */
1333                                        return PICODATA_PU_ERROR;
1334                                    }
1335                                }
1336                            }
1337                            /* else, good-bye PUNC, not needed anymore */
1338                            break;
1339                        default:
1340
1341                            /* PHR2/3 maybe existing, check and add
1342                               BOUND item now, if needed */
1343                            if ((acph->headx[i].boundstrength ==
1344                                 PICODATA_ITEMINFO1_BOUND_PHR2) ||
1345                                (acph->headx[i].boundstrength ==
1346                                 PICODATA_ITEMINFO1_BOUND_PHR3)) {
1347                                if (!acphPutBoundItem(this, acph,
1348                                            acph->headx[i].boundstrength,
1349                                            acph->headx[i].boundtype,
1350                                            &dopuoutfull, numBytesOutput)) {
1351                                    if (dopuoutfull) {
1352                                        PICODBG_DEBUG(("feeding overflow"));
1353                                        return PICODATA_PU_OUT_FULL;
1354                                    } else {
1355                                        /* ERR-msg and exception done
1356                                           in acphPutBoundItem */
1357                                        return PICODATA_PU_ERROR;
1358                                    }
1359                                }
1360                            }
1361
1362                            /* copy item unmodified */
1363                            rv = picodata_put_itemparts(&(acph->headx[i].head),
1364                                     &(acph->cbuf[acph->headx[i].cind]),
1365                                     acph->headx[i].head.len,
1366                                     acph->tmpbuf, PICODATA_MAX_ITEMSIZE,
1367                                     &blen);
1368
1369                            rvP = picodata_cbPutItem(this->cbOut, acph->tmpbuf,
1370                                    PICODATA_MAX_ITEMSIZE, &clen);
1371
1372                            *numBytesOutput += clen;
1373
1374                            PICODBG_DEBUG(("put item, status: %d", rvP));
1375
1376                            if (rvP == PICO_OK) {
1377                                acph->headxBottom++;
1378                                acph->headxLen--;
1379                            } else if (rvP == PICO_EXC_BUF_OVERFLOW) {
1380                                /* try again next time, but PHR2/3
1381                                   bound already added if existing,
1382                                   ensure it's not output a 2nd
1383                                   time */
1384                                PICODBG_DEBUG(("feeding overflow"));
1385                                acph->headx[i].boundstrength = 0;
1386                                return PICODATA_PU_OUT_FULL;
1387                            } else {
1388                                /* error, should never happen */
1389                                PICODBG_ERROR(("untreated return value, rvP: %d", rvP));
1390                                return PICODATA_PU_ERROR;
1391                            }
1392
1393                            PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1394                                               (picoos_uint8 *)"acph: ",
1395                                               acph->tmpbuf, PICODATA_MAX_ITEMSIZE);
1396
1397                            break;
1398                    } /*switch*/
1399                } /*for*/
1400
1401                /* reset headx, cbuf */
1402                acph->headxBottom = 0;
1403                acph->headxLen = 0;
1404                acph->cbufLen = 0;
1405                for (i = 0; i < PICOACPH_MAXNR_HEADX; i++) {
1406                    acph->headx[i].boundstrength = 0;
1407                }
1408
1409                /* reset collect state support variables */
1410                acph->inspaceok = TRUE;
1411                acph->needsmoreitems = TRUE;
1412
1413                acph->procState = SA_STEPSTATE_COLLECT;
1414                return PICODATA_PU_BUSY;
1415                break;
1416            }
1417
1418            default:
1419                break;
1420        } /* switch */
1421
1422    } /* while */
1423
1424    /* should be never reached */
1425    PICODBG_ERROR(("reached end of function"));
1426    picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL);
1427    return PICODATA_PU_ERROR;
1428}
1429
1430#ifdef __cplusplus
1431}
1432#endif
1433
1434
1435/* end */
1436