1/*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/**
17 * @file picotrns.c
18 *
19 * fst processing
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28
29#include "picoos.h"
30#include "picodbg.h"
31/* #include "picodata.h" */
32/* #include "picoknow.h" */
33#include "picoktab.h"
34#include "picokfst.h"
35#include "picotrns.h"
36
37#ifdef __cplusplus
38extern "C" {
39#endif
40#if 0
41}
42#endif
43
44
45
46picoos_uint8 picotrns_unplane(picoos_int16 symIn, picoos_uint8 * plane) {
47    if (symIn < 0) {
48        (*plane) = 0;
49        return (picoos_uint8) symIn;
50    } else {
51        (*plane) = symIn >> 8;
52        return (picoos_uint8) (symIn & 0xFF);
53    }
54}
55
56#if defined(PICO_DEBUG)
57
58void PICOTRNS_PRINTSYM1(picoknow_KnowledgeBase kbdbg, picoos_int16 insym, picoos_uint8 phonemic)
59{
60#include "picokdbg.h"
61    picoos_int16 sym;
62    picoos_uint8 plane;
63    picokdbg_Dbg dbg = (NULL == kbdbg) ? NULL :  picokdbg_getDbg(kbdbg);
64    sym = picotrns_unplane(insym, &plane);
65    switch (plane) {
66        case PICOKFST_PLANE_PHONEMES: /* phones */
67            if ((NULL == dbg) || !phonemic) {
68                PICODBG_INFO_MSG((" %c", sym));
69            } else {
70                PICODBG_INFO_MSG((" %s", picokdbg_getPhoneSym(dbg, (picoos_uint8) sym)));
71            }
72            break;
73        case PICOKFST_PLANE_ACCENTS: /* accents */
74            PICODBG_INFO_MSG((" {A%c}", sym));
75            break;
76        case PICOKFST_PLANE_XSAMPA: /* xsampa symbols */
77            PICODBG_INFO_MSG((" {XS:(%i)}", sym));
78            break;
79        case PICOKFST_PLANE_POS: /* part of speech */
80            PICODBG_INFO_MSG((" {P:%d}", sym));
81            break;
82        case PICOKFST_PLANE_PB_STRENGTHS: /* phrases */
83            if (sym == 48) {
84                PICODBG_INFO_MSG((" {WB}", sym));
85            } else if (sym == 115) {
86                PICODBG_INFO_MSG((" {P0}", sym));
87            } else {
88                PICODBG_INFO_MSG((" {P%c}", sym));
89            }
90            break;
91        case PICOKFST_PLANE_INTERN: /* intern */
92            PICODBG_INFO_MSG((" [%c]", sym));
93            break;
94    }
95}
96
97void PICOTRNS_PRINTSYM(picoknow_KnowledgeBase kbdbg, picoos_int16 insym)
98{
99    PICOTRNS_PRINTSYM1(kbdbg,insym,1);
100}
101
102void PICOTRNS_PRINTSYMSEQ1(picoknow_KnowledgeBase kbdbg, const picotrns_possym_t seq[], const picoos_uint16 seqLen,
103                           picoos_uint8 phonemic) {
104    picoos_uint16 i;
105    for (i=0; i<seqLen; i++) {
106        PICOTRNS_PRINTSYM1(kbdbg, seq[i].sym, phonemic);
107    }
108}
109
110void PICOTRNS_PRINTSYMSEQ(picoknow_KnowledgeBase kbdbg, const picotrns_possym_t seq[], const picoos_uint16 seqLen) {
111    PICOTRNS_PRINTSYMSEQ1(kbdbg,seq, seqLen, 1);
112}
113
114void picotrns_printSolution(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen)
115{
116    PICODBG_INFO_CTX();
117    PICODBG_INFO_MSG(("solution: "));
118        PICOTRNS_PRINTSYMSEQ(NULL, outSeq, outSeqLen);
119    PICODBG_INFO_MSG(("\n"));
120}
121
122void picotrns_printSolutionAscii(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen)
123{
124    PICODBG_INFO_CTX();
125    PICODBG_INFO_MSG(("solution: "));
126        PICOTRNS_PRINTSYMSEQ1(NULL, outSeq, outSeqLen,0);
127    PICODBG_INFO_MSG(("\n"));
128}
129
130#endif
131
132
133
134
135/* * +CT+ ***/
136struct picotrns_transductionState {
137    picoos_uint16 phase;   /* transduction phase:
138                              0 = before start
139                              1 = before regular recursion step
140                              2 = before finish
141                              3 = after finish */
142    picoos_uint32 nrSol;   /* nr of solutions so far */
143    picoos_int16  recPos;  /* recursion position; must be signed! */
144};
145
146typedef struct picotrns_altDesc {
147    picokfst_state_t startFSTState;   /**< starting FST state in current recursion position */
148    picoos_int32     inPos;           /**< corresponding position in input string */
149    picokfst_state_t altState;        /**< state of alternatives search;
150                                         - 0 = before pair search
151                                         - 1 = search state is a valid pair search state
152                                         - 2 = before inEps search
153                                         - 3 = search state is a valid inEps trans search state
154                                         - 4 = no more alternatives */
155    picoos_int32     searchState;     /**< pair search state or inEps trans search state */
156    picokfst_symid_t altOutSym;       /**< current output symbol at this recursion position */
157    picoos_int32     altOutRefPos;    /**< output reference position at this recursion position */
158} picotrns_altDesc_t;
159
160
161picotrns_AltDesc picotrns_allocate_alt_desc_buf(picoos_MemoryManager mm, picoos_uint32 maxByteSize, picoos_uint16 * numAltDescs)
162{
163    picotrns_AltDesc buf;
164    (*numAltDescs) = (picoos_uint32) (maxByteSize / sizeof(picotrns_altDesc_t));
165    buf =  (picotrns_AltDesc) picoos_allocate(mm, (*numAltDescs) * sizeof(picotrns_altDesc_t));
166    if (NULL == buf) {
167        (*numAltDescs) = 0;
168        return NULL;
169    } else {
170        return buf;
171    }
172}
173
174 void picotrns_deallocate_alt_desc_buf(picoos_MemoryManager mm, picotrns_AltDesc * altDescBuf)
175{
176    picoos_deallocate(mm, (void *) altDescBuf);
177}
178
179/* copy elements from inSeq to outSeq, ignoring elements with epsilon symbol */
180pico_status_t picotrns_eliminate_epsilons(const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
181        picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen)
182{
183    picoos_uint16 i, j = 0;
184
185    for (i=0; i < inSeqLen; i++) {
186        /* it is assumed that PICOKFST_SYMID_EPS is a hardwired value and not shifted */
187        if (PICOKFST_SYMID_EPS != inSeq[i].sym) {
188            if (j < maxOutSeqLen) {
189                outSeq[j].pos = inSeq[i].pos;
190                outSeq[j].sym = inSeq[i].sym;
191                j++;
192            }
193        }
194        *outSeqLen = j;
195    }
196    return PICO_OK;
197}
198
199
200static void insertSym(picotrns_possym_t inSeq[], picoos_uint16 pos, picoos_int16 sym) {
201    inSeq[pos].sym = sym;
202    inSeq[pos].pos = PICOTRNS_POS_INSERT;
203}
204
205/* copy elements from inSeq to outSeq, inserting syllable separators in some trivial way.
206 * inSeq is assumed to be at most PICOTRNS_MAX_NUM_POSSYM, outSeq at least of size PICOTRNS_MAX_NUM_POSSYM  */
207pico_status_t picotrns_trivial_syllabify(picoktab_Phones phones,
208        const picotrns_possym_t inSeq[], const picoos_uint16 inSeqLen,
209        picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen)
210{
211    picoos_uint16 i = 0, j = 0, out = 0, numInserted = 0;
212    picoos_uint8 vowelFound = FALSE;
213    picoos_uint16 accentpos = 0;
214    picoos_int16 accent = 0;
215
216    PICODBG_TRACE(("start"));
217
218
219    while (i < inSeqLen) {
220        /* make sure that at least one more sylSep can be inserted */
221        if (inSeqLen+numInserted+1 >= maxOutSeqLen) {
222            return PICO_EXC_BUF_OVERFLOW;
223        }
224       /* let j skip consonant cluster */
225        accent = 0;
226        accentpos = 0;
227        while ((j < inSeqLen) && !picoktab_isSyllCarrier(phones,(picoos_uint8)inSeq[j].sym)) {
228            if ((inSeq[j].sym == picoktab_getPrimstressID(phones))
229                    || (inSeq[j].sym == picoktab_getPrimstressID(phones))) {
230                PICODBG_TRACE(("j skipping stress symbol inSeq[%i].sym = %c", j, inSeq[j].sym));
231                accent = inSeq[j].sym;
232                accentpos = j;
233            } else {
234                PICODBG_TRACE(("j skipping consonant inSeq[%i].sym = %c", j, inSeq[j].sym));
235            }
236            j++;
237        }
238        if (j < inSeqLen) { /* j is at the start of a new vowel */
239            /* copy consonant cluster (moving i) to output, insert syll separator if between vowels */
240            while (i < j-1) {
241                if ((accent > 0) && (i == accentpos)) {
242                    PICODBG_TRACE(("skipping inSeq[%i].sym = %c (stress)", i, inSeq[i].sym));
243                  i++;
244                } else {
245                PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant) into output buffer", i, inSeq[i].sym));
246                 outSeq[out++] = inSeq[i++];
247                }
248            }
249            if (vowelFound) { /* we're between vowels */
250                PICODBG_TRACE(("inserting syllable separator into output buffer"));
251                insertSym(outSeq,out++,picoktab_getSyllboundID(phones));
252                if (accent > 0) {
253                    insertSym(outSeq,out++,accent);
254                }
255                numInserted++;
256            }
257            if ((accent > 0) && (i == accentpos)) {
258                PICODBG_TRACE(("skipping inSeq[%i].sym = %c (stress)", i, inSeq[i].sym));
259              i++;
260            } else {
261            PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant) into output buffer", i, inSeq[i].sym));
262             outSeq[out++] = inSeq[i++];
263            }
264            vowelFound = TRUE;
265            /* now copy vowel cluster */
266            while ((i < inSeqLen) && picoktab_isSyllCarrier(phones,(picoos_uint8)inSeq[i].sym)) {
267                PICODBG_TRACE(("copying inSeq[%i].sym = %c (vowel) into output buffer", i, inSeq[i].sym));
268                outSeq[out++] = inSeq[i++];
269            }
270            j = i;
271        } else { /* j is at end of word or end of input */
272            while (i < j) {
273                PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant or stress) into output buffer", i, inSeq[i].sym));
274                outSeq[out++] = inSeq[i++];
275            }
276        }
277        *outSeqLen = out;
278    }
279    PICODBG_ASSERT((out == inSeqLen + numInserted));
280
281    return PICO_OK;
282}
283
284
285/* ******** +CT+: full transduction procedure **********/
286
287
288/* Gets next acceptable alternative for output symbol '*outSym' at current recursion position
289   starting from previous alternative in 'altDesc'; possibly uses input symbol
290   given by 'inSeq'/'inSeq'; returns whether alterative was found in '*found';
291   if '*found', the other output values ('*outRefPos', '*endFSTstate', '*nextInPos'*)
292   return the characteristics for next recursion step;
293   if '*found' is false, the output values are undefined. */
294
295static void GetNextAlternative (picokfst_FST fst, picotrns_AltDesc altDesc,
296                                const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
297                                picokfst_symid_t * outSym, picoos_int32 * outRefPos,
298                                picokfst_state_t * endFSTState, picoos_int32 * nextInPos, picoos_bool * found)
299{
300
301    picoos_bool inSymFound;
302    picoos_bool pairFound;
303    picokfst_class_t pairClass;
304    picoos_bool inEpsTransFound;
305    picokfst_symid_t inSym;
306
307    (*found) = 0;
308    do {
309        switch (altDesc->altState) {
310            case 0:   /* before pair search */
311                if (altDesc->inPos < inSeqLen) {
312                    inSym = inSeq[altDesc->inPos].sym;
313                    if (inSym == PICOKFST_SYMID_EPS) {
314                        /* very special case: input epsilon simply produces eps in output
315                           without fst state change */
316                        (*found) = 1;
317                        (*outSym) = PICOKFST_SYMID_EPS;
318                        (*outRefPos) = inSeq[altDesc->inPos].pos;
319                        (*endFSTState) = altDesc->startFSTState;
320                        (*nextInPos) = altDesc->inPos + 1;
321                        altDesc->altState = 2;
322                    } else {
323                        /* start search for alternatives using input symbol */
324                        picokfst_kfstStartPairSearch(fst,inSeq[altDesc->inPos].sym,& inSymFound,& altDesc->searchState);
325                        if (!inSymFound) {
326                            altDesc->altState = 2;
327                            PICODBG_INFO_CTX();
328                            PICODBG_INFO_MSG((" didnt find symbol "));
329                            PICOTRNS_PRINTSYM(NULL, inSeq[altDesc->inPos].sym);
330                            PICODBG_INFO_MSG(("\n"));
331
332                        } else {
333                            altDesc->altState = 1;
334                        }
335                    }
336                } else {
337                    altDesc->altState = 2;
338                }
339                break;
340            case 1:   /* within pair search */
341                picokfst_kfstGetNextPair(fst,& altDesc->searchState,& pairFound,& (*outSym),& pairClass);
342                if (pairFound) {
343                    picokfst_kfstGetTrans(fst,altDesc->startFSTState,pairClass,& (*endFSTState));
344                    if ((*endFSTState) > 0) {
345                        (*found) = 1;
346                        (*outRefPos) = inSeq[altDesc->inPos].pos;
347                        (*nextInPos) = altDesc->inPos + 1;
348                    }
349                } else {
350                    /* no more pair found */
351                    altDesc->altState = 2;
352                }
353                break;
354            case 2:   /* before inEps trans search */
355                picokfst_kfstStartInEpsTransSearch(fst,altDesc->startFSTState,& inEpsTransFound,& altDesc->searchState);
356                if (inEpsTransFound) {
357                    altDesc->altState = 3;
358                } else {
359                    altDesc->altState = 4;
360                }
361                break;
362            case 3:   /* within inEps trans search */
363                picokfst_kfstGetNextInEpsTrans(fst,& altDesc->searchState,& inEpsTransFound,& (*outSym),& (*endFSTState));
364                if (inEpsTransFound) {
365                    (*found) = 1;
366                    (*outRefPos) =  PICOTRNS_POS_INSERT;
367                    (*nextInPos) = altDesc->inPos;
368                } else {
369                    altDesc->altState = 4;
370                }
371                break;
372            case 4:   /* no more alternatives */
373                break;
374        }
375    } while (! ((*found) || (altDesc->altState == 4)) );  /* i.e., until (*found) || (altState == 4) */
376}
377
378
379
380/* Transfers current alternatives path stored in 'altDesc' with current path length 'pathLen'
381   into 'outSeq'/'outSeqLen'. The number of solutions is incremented. */
382
383static void NoteSolution (picoos_uint32 * nrSol, picotrns_printSolutionFct printSolution,
384                          picotrns_altDesc_t altDesc[], picoos_uint16 pathLen,
385                          picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen)
386{
387    register picotrns_AltDesc ap;
388    picoos_uint32 i;
389
390    (*nrSol)++;
391    (*outSeqLen) = 0;
392    for (i = 0; i < pathLen; i++) {
393        if (i < maxOutSeqLen) {
394            ap = &altDesc[i];
395            outSeq[i].sym = ap->altOutSym;
396            outSeq[i].pos = ap->altOutRefPos;
397            (*outSeqLen)++;
398        }
399    }
400    if (pathLen > maxOutSeqLen) {
401        PICODBG_WARN(("**** output symbol array too small to hold full solution\n"));
402    }
403    if (printSolution != NULL) {
404        printSolution(outSeq,(*outSeqLen));
405    }
406}
407
408
409
410/* *
411    general scheme to get all solutions ("position" refers to abstract backtracking recursion depth,
412    which in the current solution is equal to the output symbol position):
413
414    "set position to first position";
415    "initialize alternatives in first position";
416    REPEAT
417      IF "current state in current position is a solution" THEN
418        "note solution";
419      END;
420      "get first or next acceptable alternative in current position";
421      IF "acceptable alternative found" THEN
422        "note alternative";
423        "go to next position";
424        "initialize alternatives in that position";
425      ELSE
426        "step back to previous position";
427      END;
428    UNTIL "current position is before first position"
429***/
430
431
432/* Initializes transduction state for further use in repeated application
433   of 'TransductionStep'. */
434
435static void StartTransduction (struct picotrns_transductionState * transductionState)
436{
437    (*transductionState).phase = 0;
438}
439
440
441
442/* Performs one step in the transduction of 'inSeqLen' input symbols with corresponding
443   reference positions in 'inSeq'. '*transductionState' must have been
444   initialized by 'StartTransduction'. Repeat calls to this procedure until '*finished' returns true.
445   The output is returned in 'outSeqLen' symbols and reference positions in 'outSeq'.
446   The output reference positions refer to the corresponding input reference positions.
447   Inserted output symbols receive the reference position -1. If several solutions are possible,
448   only the last found solution is returned.
449   'altDesc' is a temporary workspace which should be at least one cell longer than 'outSeq'.
450   'firstSolOnly' determines whether only the first solution should be found or if
451   the search should go on to find all solutions (mainly for testing purposes).
452
453   NOTE: current version written for use in single repetitive steps;
454   could be simplified if full transduction can be done as an atomic operation */
455
456static void TransductionStep (picokfst_FST fst, struct picotrns_transductionState * transductionState,
457                              picotrns_altDesc_t altDesc[], picoos_uint16 maxAltDescLen,
458                              picoos_bool firstSolOnly, picotrns_printSolutionFct printSolution,
459                              const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
460                              picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen,
461                              picoos_bool * finished)
462{
463    register picotrns_AltDesc ap;
464    picoos_int32 i;
465    picokfst_state_t endFSTState;
466    picoos_int32 nextInPos;
467    picoos_bool found;
468    picokfst_symid_t outSym;
469    picoos_int32 outRefPos;
470    picoos_int32 tmpRecPos;
471
472    (*finished) = 0;
473    tmpRecPos = (*transductionState).recPos;
474    switch ((*transductionState).phase) {
475        case 0:   /* before initialization */
476            (*transductionState).nrSol = 0;
477
478            /* check for initial solution (empty strings are always accepted) */
479            if (inSeqLen == 0) {
480                NoteSolution(& (*transductionState).nrSol,printSolution,altDesc,0,outSeq,outSeqLen,maxOutSeqLen);
481            }
482
483            /* initialize first recursion position */
484            tmpRecPos = 0;
485            ap = & altDesc[0];
486            ap->startFSTState = 1;
487            ap->inPos = 0;
488            ap->altState = 0;
489            (*transductionState).phase = 1;
490            break;
491
492        case 1:   /* before regular recursion step */
493            if ((tmpRecPos < 0) || (firstSolOnly && ((*transductionState).nrSol > 0))) {
494                /* end reached */
495                (*transductionState).phase = 2;
496            } else {
497                /* not finished; do regular step */
498
499                /* get first or next acceptable alternative in current position */
500                GetNextAlternative(fst,& altDesc[tmpRecPos],inSeq,inSeqLen,& outSym,& outRefPos,& endFSTState,& nextInPos,& found);
501                if (found) {
502                    /* note alternative in current position */
503                    ap = & altDesc[tmpRecPos];
504                    ap->altOutSym = outSym;
505                    ap->altOutRefPos = outRefPos;
506
507                    /* check for solution after found alternative */
508                    if ((nextInPos == inSeqLen) && picokfst_kfstIsAcceptingState(fst,endFSTState)) {
509                        NoteSolution(& (*transductionState).nrSol,printSolution,altDesc,tmpRecPos+1,
510                                     outSeq,outSeqLen,maxOutSeqLen);
511                    }
512
513                    /* go to next position if possible, start search for follower alternative symbols */
514                    if (tmpRecPos < maxAltDescLen-1) {
515                        /* got to next position */
516                        tmpRecPos = tmpRecPos + 1;
517
518                        /* initialize alternatives in new position */
519                        ap = & altDesc[tmpRecPos];
520                        ap->startFSTState = endFSTState;
521                        ap->inPos = nextInPos;
522                        ap->altState = 0;
523
524                    } else {
525                        /* do not go on due to limited path but still treat alternatives in current position */
526                        PICODBG_WARN(("--- transduction path too long; may fail to find solution\n"));
527                    }
528                } else {  /* no more acceptable alternative found in current position */
529                    /* backtrack to previous recursion */
530                    tmpRecPos = tmpRecPos - 1;
531                }
532            }
533            break;
534
535        case 2:   /* before finish */
536            if ((*transductionState).nrSol == 0) {
537                PICODBG_WARN(("--- no transduction solution found, using input as output\n"));
538                i = 0;
539                while ((i < inSeqLen) && (i < maxOutSeqLen)) {
540                    outSeq[i].sym = inSeq[i].sym;
541                    outSeq[i].pos = inSeq[i].pos;
542                    i++;
543                }
544                (*outSeqLen) = i;
545            } else if ((*transductionState).nrSol > 1) {
546                PICODBG_WARN(("--- more than one transducer solutions found\n"));
547            }
548            (*transductionState).phase = 3;
549            break;
550
551        case 3:   /* after finish */
552            (*finished) = 1;
553            break;
554    }
555    (*transductionState).recPos = tmpRecPos;
556}
557
558
559
560/* see description in header */
561pico_status_t picotrns_transduce (picokfst_FST fst, picoos_bool firstSolOnly,
562                                         picotrns_printSolutionFct printSolution,
563                                         const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
564                                         picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen,
565                                         picotrns_AltDesc altDescBuf, picoos_uint16 maxAltDescLen,
566                                         picoos_uint32 *nrSteps)
567{
568    struct picotrns_transductionState transductionState;
569    picoos_bool finished;
570
571#if defined(PICO_DEBUG)
572    {
573        picoos_uint16 i;
574
575        PICODBG_INFO_CTX();
576        PICODBG_INFO_MSG(("got input: "));
577        for (i=0; i<inSeqLen; i++) {
578            PICODBG_INFO_MSG((" %d", inSeq[i].sym));
579        }
580        PICODBG_INFO_MSG((" ("));
581        PICOTRNS_PRINTSYMSEQ(NULL,inSeq,inSeqLen);
582        PICODBG_INFO_MSG((")\n"));
583    }
584#endif
585   StartTransduction(&transductionState);
586    finished = 0;
587    *nrSteps = 0;
588    while (!finished) {
589        TransductionStep(fst,&transductionState,altDescBuf,maxAltDescLen,firstSolOnly,printSolution,
590                         inSeq,inSeqLen,outSeq,outSeqLen,maxOutSeqLen,&finished);
591        (*nrSteps)++;
592    }
593
594    return PICO_OK;
595}
596
597
598/**
599 * Data structure for picotrns_SimpleTransducer object.
600 */
601typedef struct picotrns_simple_transducer {
602    picoos_Common common;
603    picotrns_possym_t possymBufA[PICOTRNS_MAX_NUM_POSSYM+1];
604    picotrns_possym_t possymBufB[PICOTRNS_MAX_NUM_POSSYM+1];
605    picotrns_possym_t * possymBuf; /**< the buffer of the pos/sym pairs */
606    picotrns_possym_t * possymBufTmp;
607    picoos_uint16 possymReadPos, possymWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */
608
609    /* buffer for internal calculation of transducer */
610    picotrns_AltDesc altDescBuf;
611    /* the number of AltDesc in the buffer */
612    picoos_uint16 maxAltDescLen;
613} picotrns_simple_transducer_t;
614
615
616pico_status_t  picotrns_stInitialize(picotrns_SimpleTransducer transducer)
617{
618    transducer->possymBuf = transducer->possymBufA;
619    transducer->possymBufTmp = transducer->possymBufB;
620    transducer->possymReadPos = 0;
621    transducer->possymWritePos = 0;
622    return PICO_OK;
623}
624/** creates a SimpleTranducer with a working buffer of given size
625 *
626 * @param mm      MemoryManager handle
627 * @param common  Common handle
628 * @param maxAltDescLen maximal size for working buffer (in bytes)
629 * @return handle to new SimpleTransducer or NULL if error
630 */
631picotrns_SimpleTransducer picotrns_newSimpleTransducer(picoos_MemoryManager mm,
632                                              picoos_Common common,
633                                              picoos_uint16 maxAltDescLen)
634{
635    picotrns_SimpleTransducer this;
636    this = picoos_allocate(mm, sizeof(picotrns_simple_transducer_t));
637    if (this == NULL) {
638        picoos_deallocate(mm, (void *)&this);
639        picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
640        return NULL;
641    }
642
643    /* allocate working buffer */
644    this->altDescBuf = picotrns_allocate_alt_desc_buf(mm, maxAltDescLen, &this->maxAltDescLen);
645    if (this->altDescBuf == NULL) {
646        picoos_deallocate(mm, (void *)&this);
647        picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
648        return NULL;
649    }
650    this->common = common;
651    picotrns_stInitialize(this);
652    return this;
653}
654/** disposes a SimpleTransducer
655 *
656 * @param this
657 * @param mm
658 * @return PICO_OK
659 */
660pico_status_t picotrns_disposeSimpleTransducer(picotrns_SimpleTransducer * this,
661                                        picoos_MemoryManager mm)
662{
663    if (NULL != (*this)) {
664        picotrns_deallocate_alt_desc_buf(mm,&(*this)->altDescBuf);
665        picoos_deallocate(mm, (void *) this);
666        (*this) = NULL;
667    }
668    return PICO_OK;
669}
670
671/** transduces the contents previously inserted via @ref picotrns_newSimpleTransducer and @ref
672 *  picotrns_disposeSimpleTransducer.
673 *
674 * @param this
675 * @param fst
676 * @return
677 */
678pico_status_t picotrns_stTransduce(picotrns_SimpleTransducer this, picokfst_FST fst)
679{
680    picoos_uint16 outSeqLen;
681    picoos_uint32 nrSteps;
682    pico_status_t status;
683
684    status = picotrns_transduce(fst,TRUE,NULL,
685            this->possymBuf, this->possymWritePos,
686            this->possymBufTmp,&outSeqLen, PICOTRNS_MAX_NUM_POSSYM,
687            this->altDescBuf,this->maxAltDescLen,&nrSteps);
688    if (PICO_OK != status) {
689        return status;
690    }
691    return picotrns_eliminate_epsilons(this->possymBufTmp,outSeqLen,this->possymBuf,&this->possymWritePos,PICOTRNS_MAX_NUM_POSSYM);
692}
693
694/**
695 * Add chars from NULLC-terminated string \c inStr, shifted to plane \c plane, to internal input buffer of
696 *  \c transducer.
697 *
698 * @param this is an initialized picotrns_SimpleTransducer
699 * @param inStr NULLC-terminated byte sequence
700 * @param plane
701 * @return PICO_OK, if all bytes fit into buffer, or PICO_EXC_BUF_OVERFLOW otherwise
702 */
703pico_status_t picotrns_stAddWithPlane(picotrns_SimpleTransducer this, picoos_char * inStr, picoos_uint8 plane)
704{
705    while ((*inStr) && (this->possymWritePos < PICOTRNS_MAX_NUM_POSSYM)) {
706        this->possymBuf[this->possymWritePos].pos = PICOTRNS_POS_INSERT;
707        this->possymBuf[this->possymWritePos].sym = (plane << 8) + (*inStr);
708        PICODBG_DEBUG(("inserting pos/sym = %i/'%c' at pos %i",
709                this->possymBuf[this->possymWritePos].pos,
710                this->possymBuf[this->possymWritePos].sym,
711                this->possymWritePos));
712        this->possymWritePos++;
713        inStr++;
714    }
715    if (!(*inStr)) {
716        return PICO_OK;
717    } else {
718        return PICO_EXC_BUF_OVERFLOW;
719    }
720}
721
722pico_status_t picotrns_stGetSymSequence(
723        picotrns_SimpleTransducer this,
724        picoos_uint8 * outputSymIds,
725        picoos_uint32 maxOutputSymIds)
726{
727    picoos_uint8 plane;
728    picoos_uint32 outputCount = 0;
729    while ((this->possymReadPos < this->possymWritePos) && (outputCount < maxOutputSymIds)) {
730        *outputSymIds++ = picotrns_unplane(this->possymBuf[this->possymReadPos++].sym, &plane);
731        outputCount++;
732    }
733    *outputSymIds = NULLC;
734    if (outputCount <= maxOutputSymIds) {
735        return PICO_OK;
736    } else {
737        return PICO_EXC_BUF_OVERFLOW;
738    }
739}
740
741#ifdef __cplusplus
742}
743#endif
744
745/* end picotrns.c */
746