1/*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/**
17 * @file picokfst.c
18 *
19 * FST knowledge loading and access
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28#include "picoos.h"
29#include "picodbg.h"
30#include "picoknow.h"
31#include "picokfst.h"
32
33#ifdef __cplusplus
34extern "C" {
35#endif
36#if 0
37}
38#endif
39
40
41#define FileHdrSize 4       /* size of FST file header */
42
43
44
45/* ************************************************************/
46/* function to create specialized kb, */
47/* to be used by picorsrc only */
48/* ************************************************************/
49
50/** object       : FSTKnowledgeBase
51 *  shortcut     : kfst
52 *  derived from : picoknow_KnowledgeBase
53 */
54
55typedef struct kfst_subobj * kfst_SubObj;
56
57typedef struct kfst_subobj{
58    picoos_uint8 * fstStream;         /* the byte stream base address */
59    picoos_int32 hdrLen;              /* length of file header */
60    picoos_int32 transductionMode;    /* transduction mode to be used for FST */
61    picoos_int32 nrClasses;           /* nr of pair/transition classes in FST; class is in [1..nrClasses] */
62    picoos_int32 nrStates;            /* nr of states in FST; state is in [1..nrState] */
63    picoos_int32 termClass;           /* pair class of terminator symbol pair; probably obsolete */
64    picoos_int32 alphaHashTabSize;    /* size of pair alphabet hash table */
65    picoos_int32 alphaHashTabPos;     /* absolute address of the start of the pair alphabet */
66    picoos_int32 transTabEntrySize;   /* size in bytes of each transition table entry */
67    picoos_int32 transTabPos;         /* absolute address of the start of the transition table */
68    picoos_int32 inEpsStateTabPos;    /* absolute address of the start of the input epsilon transition table */
69    picoos_int32 accStateTabPos;      /* absolute address of the table of accepting states */
70} kfst_subobj_t;
71
72
73
74/* ************************************************************/
75/* primitives for reading from byte stream */
76/* ************************************************************/
77
78/* Converts 'nrBytes' bytes starting at position '*pos' in byte stream 'stream' into unsigned number 'num'.
79   '*pos' is modified to the position right after the number */
80static void FixedBytesToUnsignedNum (picoos_uint8 * stream, picoos_uint8 nrBytes, picoos_uint32 * pos, picoos_uint32 * num)
81{
82    picoos_int32 i;
83
84    (*num) = 0;
85    for (i = 0; i < nrBytes; i++) {
86        (*num) = ((*num) << 8) + (picoos_uint32)stream[*pos];
87        (*pos)++;
88    }
89}
90
91
92/* Converts 'nrBytes' bytes starting at position '*pos' in byte stream 'stream' into signed number 'num'.
93   '*pos' is modified to the position right after the number */
94static void FixedBytesToSignedNum (picoos_uint8 * stream, picoos_uint8 nrBytes, picoos_uint32 * pos, picoos_int32 * num)
95{
96    picoos_int32 i;
97    picoos_uint32 val;
98
99    val = 0;
100    for (i = 0; i < nrBytes; i++) {
101        val = (val << 8) + (picoos_uint32)stream[*pos];
102        (*pos)++;
103    }
104    if (val % 2 == 1) {
105        /* negative number */
106        (*num) = -((picoos_int32)((val - 1) / 2)) - 1;
107    } else {
108        /* positive number */
109        (*num) = val / 2;
110    }
111}
112
113
114/* Converts varying-sized sequence of bytes starting at position '*pos' in byte stream 'stream'
115   into (signed) number 'num'. '*pos' is modified to the position right after the number. */
116static void BytesToNum (picoos_uint8 * stream, picoos_uint32 * pos, picoos_int32 * num)
117{
118    picoos_uint32 val;
119    picoos_uint32 b;
120
121    val = 0;
122    b = (picoos_uint32)stream[*pos];
123    (*pos)++;
124    while (b < 128) {
125        val = (val << 7) + b;
126        b = (picoos_uint32)stream[*pos];
127        (*pos)++;
128    }
129    val = (val << 7) + (b - 128);
130    if (val % 2 == 1) {
131        /* negative number */
132        (*num) = -((picoos_int32)((val - 1) / 2)) - 1;
133    } else {
134        /* positive number */
135        (*num) = val / 2;
136    }
137}
138
139
140/* ************************************************************/
141/* setting up FST from byte stream */
142/* ************************************************************/
143
144static pico_status_t kfstInitialize(register picoknow_KnowledgeBase this,
145        picoos_Common common)
146{
147    picoos_uint32 curpos;
148    picoos_int32 offs;
149    kfst_subobj_t * kfst;
150
151    PICODBG_DEBUG(("kfstInitialize -- start\n"));
152
153    if (NULL == this || NULL == this->subObj) {
154        return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL,
155                NULL);
156    }
157    kfst = (kfst_subobj_t *) this->subObj;
158
159    /* +CT+ */
160    kfst->fstStream = this->base;
161    PICODBG_TRACE(("base: %d\n",this->base));
162    kfst->hdrLen = FileHdrSize;
163    curpos = kfst->hdrLen;
164    BytesToNum(kfst->fstStream,& curpos,& kfst->transductionMode);
165    BytesToNum(kfst->fstStream,& curpos,& kfst->nrClasses);
166    BytesToNum(kfst->fstStream,& curpos,& kfst->nrStates);
167    BytesToNum(kfst->fstStream,& curpos,& kfst->termClass);
168    BytesToNum(kfst->fstStream,& curpos,& kfst->alphaHashTabSize);
169    BytesToNum(kfst->fstStream,& curpos,& offs);
170    kfst->alphaHashTabPos = kfst->hdrLen + offs;
171    BytesToNum(kfst->fstStream,& curpos,& kfst->transTabEntrySize);
172    BytesToNum(kfst->fstStream,& curpos,& offs);
173    kfst->transTabPos = kfst->hdrLen + offs;
174    BytesToNum(kfst->fstStream,& curpos,& offs);
175    kfst->inEpsStateTabPos = kfst->hdrLen + offs;
176    BytesToNum(kfst->fstStream,& curpos,& offs);
177    kfst->accStateTabPos = kfst->hdrLen + offs;
178    /* -CT- */
179
180    return PICO_OK;
181}
182
183
184static pico_status_t kfstSubObjDeallocate(register picoknow_KnowledgeBase this,
185        picoos_MemoryManager mm)
186{
187    if (NULL != this) {
188        picoos_deallocate(mm, (void *) &this->subObj);
189    }
190    return PICO_OK;
191}
192
193
194/* calculates a small number of data (e.g. addresses) from kb for fast access.
195 * This data is encapsulated in a picokfst_FST that can later be retrieved
196 * with picokfst_getFST. */
197pico_status_t picokfst_specializeFSTKnowledgeBase(picoknow_KnowledgeBase this,
198                                                  picoos_Common common)
199{
200    pico_status_t status;
201
202    if (NULL == this) {
203        return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL, NULL);
204    }
205    if (0 < this->size) {
206        /* not a dummy kb */
207        this->subDeallocate = kfstSubObjDeallocate;
208
209        this->subObj = picoos_allocate(common->mm, sizeof(kfst_subobj_t));
210
211        if (NULL == this->subObj) {
212            return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
213        }
214        status = kfstInitialize(this, common);
215        if (PICO_OK != status) {
216            picoos_deallocate(common->mm,(void **)&this->subObj);
217        }
218    }
219    return PICO_OK;
220}
221
222
223/* ************************************************************/
224/* FST type and getFST function */
225/* ************************************************************/
226
227
228
229/* return kb FST for usage in PU */
230picokfst_FST picokfst_getFST(picoknow_KnowledgeBase this)
231{
232    if (NULL == this) {
233        return NULL;
234    } else {
235        return (picokfst_FST) this->subObj;
236    }
237}
238
239
240
241/* ************************************************************/
242/* FST access methods */
243/* ************************************************************/
244
245
246/* see description in header file */
247extern picoos_uint8 picokfst_kfstGetTransductionMode(picokfst_FST this)
248{
249    kfst_SubObj fst = (kfst_SubObj) this;
250    if (fst != NULL) {
251        return fst->transductionMode;
252    } else {
253        return 0;
254    }
255}
256
257
258/* see description in header file */
259extern void picokfst_kfstGetFSTSizes (picokfst_FST this, picoos_int32 *nrStates, picoos_int32 *nrClasses)
260{
261    kfst_SubObj fst = (kfst_SubObj) this;
262    if (fst != NULL) {
263        *nrStates = fst->nrStates;
264        *nrClasses = fst->nrClasses;
265    } else {
266        *nrStates = 0;
267        *nrClasses = 0;
268    }
269}
270
271/* see description in header file */
272extern void picokfst_kfstStartPairSearch (picokfst_FST this, picokfst_symid_t inSym,
273                                          picoos_bool * inSymFound, picoos_int32 * searchState)
274{
275    picoos_uint32 pos;
276    picoos_int32 offs;
277    picoos_int32 h;
278    picoos_int32 inSymCellPos;
279    picoos_int32 inSymX;
280    picoos_int32 nextSameHashInSymOffs;
281
282    kfst_SubObj fst = (kfst_SubObj) this;
283    (*searchState) =  -1;
284    (*inSymFound) = 0;
285    h = inSym % fst->alphaHashTabSize;
286    pos = fst->alphaHashTabPos + (h * 4);
287    FixedBytesToSignedNum(fst->fstStream,4,& pos,& offs);
288    if (offs > 0) {
289        inSymCellPos = fst->alphaHashTabPos + offs;
290        pos = inSymCellPos;
291        BytesToNum(fst->fstStream,& pos,& inSymX);
292        BytesToNum(fst->fstStream,& pos,& nextSameHashInSymOffs);
293        while ((inSymX != inSym) && (nextSameHashInSymOffs > 0)) {
294            inSymCellPos = inSymCellPos + nextSameHashInSymOffs;
295            pos = inSymCellPos;
296            BytesToNum(fst->fstStream,& pos,& inSymX);
297            BytesToNum(fst->fstStream,& pos,& nextSameHashInSymOffs);
298        }
299        if (inSymX == inSym) {
300            /* input symbol found; state is set to position after symbol cell */
301            (*searchState) = pos;
302            (*inSymFound) = 1;
303        }
304    }
305}
306
307
308/* see description in header file */
309extern void picokfst_kfstGetNextPair (picokfst_FST this, picoos_int32 * searchState,
310                                      picoos_bool * pairFound,
311                                      picokfst_symid_t * outSym, picokfst_class_t * pairClass)
312{
313    picoos_uint32 pos;
314    picoos_int32 val;
315
316    kfst_SubObj fst = (kfst_SubObj) this;
317    if ((*searchState) < 0) {
318        (*pairFound) = 0;
319        (*outSym) = PICOKFST_SYMID_ILLEG;
320        (*pairClass) =  -1;
321    } else {
322        pos = (*searchState);
323        BytesToNum(fst->fstStream,& pos,& val);
324        *outSym = (picokfst_symid_t)val;
325        if ((*outSym) != PICOKFST_SYMID_ILLEG) {
326            BytesToNum(fst->fstStream,& pos,& val);
327            *pairClass = (picokfst_class_t)val;
328            (*pairFound) = 1;
329            (*searchState) = pos;
330        } else {
331            (*pairFound) = 0;
332            (*outSym) = PICOKFST_SYMID_ILLEG;
333            (*pairClass) =  -1;
334            (*searchState) =  -1;
335        }
336    }
337}
338
339
340
341/* see description in header file */
342extern void picokfst_kfstGetTrans (picokfst_FST this, picokfst_state_t startState, picokfst_class_t transClass,
343                                   picokfst_state_t * endState)
344{
345
346    picoos_uint32 pos;
347    picoos_int32 index;
348    picoos_uint32 endStateX;
349
350    kfst_SubObj fst = (kfst_SubObj) this;
351    if ((startState < 1) || (startState > fst->nrStates) || (transClass < 1) || (transClass > fst->nrClasses)) {
352        (*endState) = 0;
353    } else {
354        index = (startState - 1) * fst->nrClasses + transClass - 1;
355        pos = fst->transTabPos + (index * fst->transTabEntrySize);
356        FixedBytesToUnsignedNum(fst->fstStream,fst->transTabEntrySize,& pos,& endStateX);
357        (*endState) = endStateX;
358    }
359}
360
361
362/* see description in header file */
363extern void picokfst_kfstStartInEpsTransSearch (picokfst_FST this, picokfst_state_t startState,
364                                                picoos_bool * inEpsTransFound, picoos_int32 * searchState)
365{
366
367    picoos_int32 offs;
368    picoos_uint32 pos;
369
370    kfst_SubObj fst = (kfst_SubObj) this;
371    (*searchState) =  -1;
372    (*inEpsTransFound) = 0;
373    if ((startState > 0) && (startState <= fst->nrStates)) {
374        pos = fst->inEpsStateTabPos + (startState - 1) * 4;
375        FixedBytesToSignedNum(fst->fstStream,4,& pos,& offs);
376        if (offs > 0) {
377            (*searchState) = fst->inEpsStateTabPos + offs;
378            (*inEpsTransFound) = 1;
379        }
380    }
381}
382
383
384
385/* see description in header file */
386extern void picokfst_kfstGetNextInEpsTrans (picokfst_FST this, picoos_int32 * searchState,
387                                            picoos_bool * inEpsTransFound,
388                                            picokfst_symid_t * outSym, picokfst_state_t * endState)
389{
390    picoos_uint32 pos;
391    picoos_int32 val;
392
393    kfst_SubObj fst = (kfst_SubObj) this;
394    if ((*searchState) < 0) {
395        (*inEpsTransFound) = 0;
396        (*outSym) = PICOKFST_SYMID_ILLEG;
397        (*endState) = 0;
398    } else {
399        pos = (*searchState);
400        BytesToNum(fst->fstStream,& pos,& val);
401        *outSym = (picokfst_symid_t)val;
402        if ((*outSym) != PICOKFST_SYMID_ILLEG) {
403            BytesToNum(fst->fstStream,& pos,& val);
404            *endState = (picokfst_state_t)val;
405            (*inEpsTransFound) = 1;
406            (*searchState) = pos;
407        } else {
408            (*inEpsTransFound) = 0;
409            (*outSym) = PICOKFST_SYMID_ILLEG;
410            (*endState) = 0;
411            (*searchState) =  -1;
412        }
413    }
414}
415
416
417/* see description in header file */
418extern picoos_bool picokfst_kfstIsAcceptingState (picokfst_FST this, picokfst_state_t state)
419{
420
421    picoos_uint32 pos;
422    picoos_uint32 val;
423
424    kfst_SubObj fst = (kfst_SubObj) this;
425    if ((state > 0) && (state <= fst->nrStates)) {
426        pos = fst->accStateTabPos + (state - 1);
427        FixedBytesToUnsignedNum(fst->fstStream,1,& pos,& val);
428        return (val == 1);
429    } else {
430        return 0;
431    }
432}
433
434#ifdef __cplusplus
435}
436#endif
437
438/* End picofst.c */
439