1/*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/**
17 * @file picotrns.h
18 *
19 * fst processing
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28
29/** @addtogroup picotrns
30 *
31 * Conventions:
32 *
33 * - The input to the transducer is a list of pos/sym pairs, where pos are arbitrary position markers
34 * - All positions are allowed on input (in particular all those coming as an output of a previous transduction)
35 * - A phone sequence to be transduced has to begin with PICOKNOW_PHON_START_ID and end with PICOKNOW_PHON_TERM_ID
36 *   These special symbols are kept in the transduction output (as first and last symbol)
37 * - Symbols inserted by the transduction process allways get their position marker pos=PICOTRNS_POS_INSERT
38 * - The order of positions on output must be the same as that on input, i.e. apart from inserted pairs, the
39 *   output position sequence must be a sub-sequence of the input position sequence.
40 * - Inserted symbols are allways preceded by a positioned pos/sym pair, e.g.
41 *   if the sequence pos1/sym1, pos2/sym2 should be tranduced to x/sym3, y/sym4, z/sym5, then x must be pos1 or pos2
42 *   and not PICOTRNS_POS_INSERT
43 *
44 *   For lingware developers: Insertions are always interpreted "to the right"
45 *     - E.g.: The original sequence is phon1 , command , phon2
46 *          - The input to the transducer is then  pos1/phon1 , pos2/phon2
47 *          - The output is pos1/phon1'  -1/phon_ins pos2/phon2'  [assuming -1 is the special insertion pos]
48 *     - Then the new sequence will be recomposed as phon1' , phon_ins , command , phon2'  [note position of command!]
49 *     - To overwrite this behaviour, rules must be formulated such that the transduction output is
50 *     pos1/phon1'  pos2/phon_ins  -1/phon2'
51 */
52#ifndef PICOTRNS_H_
53#define PICOTRNS_H_
54
55#include "picoos.h"
56#include "picokfst.h"
57#include "picoktab.h"
58
59#ifdef __cplusplus
60extern "C" {
61#endif
62#if 0
63}
64#endif
65
66#define PICOTRNS_MAX_NUM_POSSYM 255
67
68#define PICOTRNS_POS_INSERT   (picoos_int16) -1    /* position returned by transducer to mark symbols inserted by the transducer */
69#define PICOTRNS_POS_INVALID  (picoos_int16) -2    /* value to mark an invalid (e.g. uninitiated) position */
70#define PICOTRNS_POS_IGNORE   (picoos_int16) -3    /* value to mark a pos/sym pair to be ignored (e.g. start/term symbols only used by the transducer) */
71
72
73typedef struct picotrns_possym {
74    picoos_int16 pos;
75    picoos_int16 sym;
76} picotrns_possym_t;
77
78picoos_uint8 picotrns_unplane(picoos_int16 symIn, picoos_uint8 * plane);
79
80
81#if defined(PICO_DEBUG)
82
83void PICOTRNS_PRINTSYM(picoknow_KnowledgeBase kbdbg, picoos_int16 insym);
84
85void PICOTRNS_PRINTSYMSEQ(picoknow_KnowledgeBase kbdbg, const picotrns_possym_t seq[], const picoos_uint16 seqLen);
86
87void picotrns_printSolution(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen);
88
89#else
90#define PICOTRNS_PRINTSYM(x,y)
91#define PICOTRNS_PRINTSYMSEQ(x,y,z)
92#define picotrns_printSolution NULL
93#endif
94
95
96typedef struct picotrns_altDesc * picotrns_AltDesc;
97
98
99picotrns_AltDesc picotrns_allocate_alt_desc_buf(picoos_MemoryManager mm, picoos_uint32 maxByteSize, picoos_uint16 * numAltDescs);
100
101void picotrns_deallocate_alt_desc_buf(picoos_MemoryManager mm, picotrns_AltDesc * altDescBuf);
102
103
104/* type of function for printing transduction solutions;
105   only for testing purposes in transduction mode where all solutions
106   are produced */
107typedef void picotrns_printSolutionFct(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen);
108
109
110
111/** overall transduction; transduces 'inSeq' with 'inSeqLen' elements
112   to '*outSeqLen' elements in 'outSeq';
113 *
114 * @param fst the finite-state transducer used for transduction
115 * @param firstSolOnly determines whether only the first solution (usually)
116   or all solutions should be produced (for testing); only the last found
117   solution is returned in 'outSeq';
118 * @param printSolution if not NULL, every found solution is displayed using
119   the given function
120 * @param inSeq the input sequence
121 * @param inSeqLen the input sequence length
122 * @retval outSeq the output sequence
123 * @retval outSeqLen the output sequence length
124 * @param maxOutSeqLen   must provide the maximum length of 'outSeq'
125 * @param altDescBuf must provide a working array of length 'maxAltDescLen'
126 * @param maxAltDescLen should be chosen at least 'maxOutSeqLen' + 1
127 * @retval nrSteps returns the overall internal number of iterative steps done
128 * @return status of the transduction: PICO_OK, if transduction successful
129   @note if 'outSeq' or 'altDesc' are too small to hold a solution,
130   an error occurs and the input is simply transfered to the output
131   (up to maximum possible length)
132 */
133extern pico_status_t picotrns_transduce (picokfst_FST fst, picoos_bool firstSolOnly,
134                                         picotrns_printSolutionFct printSolution,
135                                         const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
136                                         picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen,
137                                         picotrns_AltDesc altDescBuf, picoos_uint16 maxAltDescLen,
138                                         picoos_uint32 *nrSteps);
139
140
141
142/* transduce 'inSeq' into 'outSeq' 'inSeq' has to be terminated with the id for symbol '#'. 'outSeq' is terminated in the same way. */
143/*
144pico_status_t picotrns_transduce_sequence(picokfst_FST fst, const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
145        picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen);
146*/
147
148/* copy elements from inSeq to outSeq, ignoring elements with epsilon symbol */
149pico_status_t picotrns_eliminate_epsilons(const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
150        picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen);
151
152/* copy elements from inSeq to outSeq, inserting syllable separators in some trivial way.
153 * inSeq is assumed to be at most, outSeq at least of size PICOTRNS_MAX_NUM_POSSYM  */
154pico_status_t picotrns_trivial_syllabify(picoktab_Phones phones,
155        const picotrns_possym_t inSeq[], const picoos_uint16 inSeqLen,
156        picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen);
157
158
159/**  object   : SimpleTransducer
160 *   shortcut : st
161 *
162 */
163typedef struct picotrns_simple_transducer * picotrns_SimpleTransducer;
164
165picotrns_SimpleTransducer picotrns_newSimpleTransducer(picoos_MemoryManager mm,
166                                              picoos_Common common,
167                                              picoos_uint16 maxAltDescLen);
168
169pico_status_t picotrns_disposeSimpleTransducer(picotrns_SimpleTransducer * this,
170        picoos_MemoryManager mm);
171
172pico_status_t  picotrns_stInitialize(picotrns_SimpleTransducer transducer);
173
174pico_status_t picotrns_stAddWithPlane(picotrns_SimpleTransducer this, picoos_char * inStr, picoos_uint8 plane);
175
176pico_status_t picotrns_stTransduce(picotrns_SimpleTransducer this, picokfst_FST fst);
177
178pico_status_t picotrns_stGetSymSequence(
179        picotrns_SimpleTransducer this,
180        picoos_uint8 * outputSymIds,
181        picoos_uint32 maxOutputSymIds);
182
183
184
185
186
187#ifdef __cplusplus
188}
189#endif
190
191#endif /*PICOTRNS_H_*/
192