1/*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/**
17 * @file picoklex.h
18 *
19 * knowledge base: lexicon
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28
29#ifndef PICOKLEX_H_
30#define PICOKLEX_H_
31
32#include "picoos.h"
33#include "picoknow.h"
34
35#ifdef __cplusplus
36extern "C" {
37#endif
38#if 0
39}
40#endif
41
42
43/* ************************************************************/
44/* function to create specialized kb, */
45/* to be used by picorsrc only */
46/* ************************************************************/
47
48pico_status_t picoklex_specializeLexKnowledgeBase(picoknow_KnowledgeBase this,
49                                                  picoos_Common common);
50
51
52/* ************************************************************/
53/* lexicon type and getLex function */
54/* ************************************************************/
55
56/* lexicon type */
57typedef struct picoklex_lex * picoklex_Lex;
58
59/* return kb lex for usage in PU */
60picoklex_Lex picoklex_getLex(picoknow_KnowledgeBase this);
61
62
63/* ************************************************************/
64/* lexicon lookup result type */
65/* ************************************************************/
66
67/* max nr of results */
68#define PICOKLEX_MAX_NRRES   4
69
70/* nr of bytes used for pos and index, needs to fit in uint32, ie. max 4 */
71#define PICOKLEX_POSIND_SIZE 4
72/* nr of bytes used for index, needs to fit in uint32, ie. max 4 */
73#define PICOKLEX_IND_SIZE    3
74/* max len (in bytes) of ind, (PICOKLEX_MAX_NRRES * PICOKLEX_POSIND_SIZE) */
75#define PICOKLEX_POSIND_MAXLEN 16
76
77
78/* the lexicon lookup result(s) are stored in field posind, which
79   contains a sequence of
80     POS1-byte, IND1-bytes, POS2-byte, IND2-bytes, etc.
81
82   the IND-bytes are the byte position(s) in the lexblocks part of the
83   lexicon byte stream, starting at picoklex_lex_t.lexblocks.
84
85   for lexentries without phones only the POS (there can be only one)
86   is stored in posind, nrres equals one, and phonfound is FALSE.
87*/
88
89typedef struct {
90    picoos_uint8 nrres;      /* number of results, 0 of no entry found */
91    picoos_uint8 posindlen;  /* number of posind bytes */
92    picoos_uint8 phonfound;  /* phones found flag, TRUE if found */
93    picoos_uint8 posind[PICOKLEX_POSIND_MAXLEN]; /* sequence of multi-ind,
94                                                    one per result */
95} picoklex_lexl_result_t;
96
97
98/* ************************************************************/
99/* lexicon lookup functions */
100/* ************************************************************/
101
102/** lookup lex by graph; result(s) are in lexres, ie. the phones are
103   not returned directly (because they are used later and space can be
104   saved using indices first), lexres contains an index (or several)
105   to the entry for later fast lookup once the phones are needed.
106   PICOKLEX_IND_SIZE bytes are used for the index, these ind bytes are
107   saved in the WORDINDEX items. If at least one entry is found TRUE
108   is returned, FALSE otherwise */
109picoos_uint8 picoklex_lexLookup(const picoklex_Lex this,
110                                const picoos_uint8 *graph,
111                                const picoos_uint16 graphlen,
112                                picoklex_lexl_result_t *lexres);
113
114/** lookup lex entry by index ind; ind is a sequence of bytes with
115   length indlen (must be equal PICOKLEX_IND_SIZE) that is the content
116   of a WORDINDEX item. Returns TRUE if okay, FALSE otherwise */
117picoos_uint8 picoklex_lexIndLookup(const picoklex_Lex this,
118                                   const picoos_uint8 *ind,
119                                   const picoos_uint8 indlen,
120                                   picoos_uint8 *pos,
121                                   picoos_uint8 **phon,
122                                   picoos_uint8 *phonlen);
123
124#ifdef __cplusplus
125}
126#endif
127
128
129#endif /*PICOKLEX_H_*/
130