1b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/*
2b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
4b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Licensed under the Apache License, Version 2.0 (the "License");
5b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * you may not use this file except in compliance with the License.
6b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * You may obtain a copy of the License at
7b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
8b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *     http://www.apache.org/licenses/LICENSE-2.0
9b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
10b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Unless required by applicable law or agreed to in writing, software
11b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * distributed under the License is distributed on an "AS IS" BASIS,
12b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * See the License for the specific language governing permissions and
14b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * limitations under the License.
15b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */
16b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/**
17b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * @file picoklex.c
18b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
19b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
20b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * All rights reserved.
21b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
22b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * History:
23b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * - 2009-04-20 -- initial version
24b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
25b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */
26b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picoos.h"
27b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picodbg.h"
28b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picodata.h"
29b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picoknow.h"
30b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picoklex.h"
31b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
32b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus
33b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenextern "C" {
34b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
35b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if 0
36b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
37b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
38b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
39b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
40b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* lexicon */
41b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
42b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
43b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/**
44b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * @addtogroup picolex
45b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *
46b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  overview:
47b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - lex consists of optional searchindex and a non-empty list of lexblocks
48b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - lexblocks are fixed size, at the start of a block there is also the
49b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    start of an entry
50b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - using the searchindex a unambiguous lexblock can be determined which
51b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    contains the entry (or there is no entry)
52b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - one lex entry has POS GRAPH PHON, all mandatory, but
53b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    - PHON can be empty string -> no pronunciation in the resulting TTS output
54b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    - PHON can be :G2P -> use G2P later to add pronunciation
55b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - (POS,GRAPH) is a uniq key (only one entry allowed)
56b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  - (GRAPH) is almost a uniq key (2-4 entries with the same GRAPH, and
57b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    differing POS and differing PHON possible)
58b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    - for one graph we can have two or three solutions from the lex
59b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen       which all need to be passed on the the next PU
60b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    - in this case GRAPH, POS, and PHON all must be available in lex
61b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
62b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  sizing:
63b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    - 3 bytes entry index -> 16MB addressable
64b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    - 2 bytes searchindex nr -> 64K blocks possible
65b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    - 5 bytes per searchindex entry
66b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen      - 3 bytes for graph-prefix
67b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen      - 2 bytes blockadr in searchindex -> 64K blocks possible
68b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    - lexblock size 512B:
69b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen      - 32M possible
70b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen      - with ~20 bytes per entry
71b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        -> max. average of ~26 entries to be searched per lookup
72b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    - overhead of ~10 bytes per block to sync with
73b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen      block boundaries
74b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    - examples:
75b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen      - 500KB lex -> 1000 blocks,
76b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        1000 entries in searchindex, ~25.6K lex-entries,
77b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        - ~5KB searchindex
78b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen           ~10KB overhead for block sync
79b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen      - 100KB lex -> 200 blocks,
80b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        200 entries in searchindex, ~5.1K lex-entries,
81b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        - ~1KB searchindex
82b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen           ~2KB overhead for block sync
83b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
84b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen  pil-file: lexicon knowledge base in binary form
85b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
86b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    lex-kb = content
87b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
88b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    content = searchindex {lexblock}1:NRBLOCKS2
89b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
90b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    lexblock = {lexentry}1:        (lexblock size is fixed 512Bytes)
91b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
92b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    searchindex = NRBLOCKS2 {GRAPH1 GRAPH1 GRAPH1 LEXBLOCKIND2}=NRBLOCKS2
93b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
94b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    lexentry = LENGRAPH1 {GRAPH1}=LENGRAPH1-1
95b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               LENPOSPHON1 POS1 {PHON1}=LENPOSPHON1-2
96b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
97b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    - special cases:
98b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen      - PHON is empty string (no pronunciation in the resulting TTS output):
99b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lexentry = LENGRAPH1 {GRAPH1}=LENGRAPH1-1  2 POS1
100b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen      - PHON can be :G2P -> use G2P later to add pronunciation:
101b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lexentry = LENGRAPH1 {GRAPH1}=LENGRAPH1-1  3 POS1 <reserved-phon-val=5>
102b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    - multi-byte values always little endian
103b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen*/
104b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
105b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
106b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
107b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* lexicon data defines */
108b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* may not be changed with current implementation */
109b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
110b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
111b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* nr bytes of nrblocks info */
112b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOKLEX_LEX_NRBLOCKS_SIZE 2
113b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
114b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* search index entry: - nr graphs
115b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       - nr bytes of block index
116b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       - nr bytes per entry, NRGRAPHS*INDSIZE */
117b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOKLEX_LEX_SIE_NRGRAPHS  3
118b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOKLEX_LEX_SIE_INDSIZE   2
119b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOKLEX_LEX_SIE_SIZE      5
120b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
121b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* nr of bytes per lexblock */
122b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOKLEX_LEXBLOCK_SIZE   512
123b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
124b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
125b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* reserved values in klex to indicate :G2P needed for a lexentry */
126b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOKLEX_NEEDS_G2P   5
127b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
128b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
129b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
130b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* lexicon type and loading */
131b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
132b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
133b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/** object       : LexKnowledgeBase
134b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *  shortcut     : klex
135b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *  derived from : picoknow_KnowledgeBase
136b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */
137b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
138b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chentypedef struct klex_subobj *klex_SubObj;
139b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
140b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chentypedef struct klex_subobj
141b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{
142b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 nrblocks; /* nr lexblocks = nr eles in searchind */
143b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 *searchind;
144b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 *lexblocks;
145b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} klex_subobj_t;
146b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
147b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
148b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t klexInitialize(register picoknow_KnowledgeBase this,
149b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                    picoos_Common common)
150b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{
151b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint32 curpos = 0;
152b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    klex_subobj_t *klex;
153b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
154b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("start"));
155b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
156b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* check whether (this->size != 0) done before calling this function */
157b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
158b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (NULL == this || NULL == this->subObj) {
159b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
160b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                       NULL, NULL);
161b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
162b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    klex = (klex_subobj_t *) this->subObj;
163b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
164b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos,
165b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                             &(klex->nrblocks))) {
166b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (klex->nrblocks > 0) {
167b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            PICODBG_DEBUG(("nr blocks: %i, curpos: %i", klex->nrblocks,curpos));
168b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            klex->searchind = this->base + curpos;
169b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        } else {
170b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            klex->searchind = NULL;
171b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
172b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        klex->lexblocks = this->base + PICOKLEX_LEX_NRBLOCKS_SIZE +
173b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                             (klex->nrblocks * (PICOKLEX_LEX_SIE_SIZE));
174b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return PICO_OK;
175b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } else {
176b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
177b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                       NULL, NULL);
178b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
179b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
180b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
181b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
182b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t klexSubObjDeallocate(register picoknow_KnowledgeBase this,
183b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                          picoos_MemoryManager mm)
184b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{
185b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (NULL != this) {
186b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_deallocate(mm, (void *) &this->subObj);
187b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
188b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return PICO_OK;
189b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
190b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
191b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
192b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* we don't offer a specialized constructor for a LexKnowledgeBase but
193b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * instead a "specializer" of an allready existing generic
194b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * picoknow_KnowledgeBase */
195b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
196b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpico_status_t picoklex_specializeLexKnowledgeBase(picoknow_KnowledgeBase this,
197b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                                  picoos_Common common)
198b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{
199b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (NULL == this) {
200b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
201b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                       NULL, NULL);
202b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
203b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (this->size > 0) {
204b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        this->subDeallocate = klexSubObjDeallocate;
205b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        this->subObj = picoos_allocate(common->mm, sizeof(klex_subobj_t));
206b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (NULL == this->subObj) {
207b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
208b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                           NULL, NULL);
209b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
210b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return klexInitialize(this, common);
211b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } else {
212b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* some dummy klex */
213b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return PICO_OK;
214b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
215b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
216b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
217b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* for now we don't need to do anything special for the main lex */
218b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/*
219b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpico_status_t picoklex_specializeMainLexKnowledgeBase(
220b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoknow_KnowledgeBase this,
221b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_Common common)
222b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{
223b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return picoklex_specializeLexKnowledgeBase(this,common);
224b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
225b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen*/
226b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
227b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
228b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
229b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* lexicon getLex */
230b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
231b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
232b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpicoklex_Lex picoklex_getLex(picoknow_KnowledgeBase this)
233b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{
234b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (NULL == this) {
235b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return NULL;
236b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } else {
237b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return (picoklex_Lex) this->subObj;
238b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
239b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
240b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
241b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
242b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
243b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* functions on searchindex */
244b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
245b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
246b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
247b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic picoos_uint32 klex_getSearchIndexVal(const klex_SubObj this,
248b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                            picoos_uint16 index)
249b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{
250b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint32 pos, val;
251b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    pos = index * PICOKLEX_LEX_SIE_SIZE;
252b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    val = this->searchind[pos];
253b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    val = (val << 8) + this->searchind[pos + 1];
254b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    val = (val << 8) + this->searchind[pos + 2];
255b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return val;
256b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
257b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
258b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
259b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* Determine first lexblock containing entries for specified
260b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen   grapheme. */
261b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
262b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic picoos_uint16 klex_getLexblockNr(const klex_SubObj this,
263b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                        const picoos_uint8 *graphsi) {
264b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* graphsi is of len PICOKLEX_LEX_SI_NGRAPHS */
265b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_int32 low, mid, high;
266b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint32 searchval, indval;
267b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
268b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* PICOKLEX_LEX_SIE_NRGRAPHS */
269b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
270b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* convert graph-prefix to number with 'lexicographic' ordering */
271b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    searchval = graphsi[0];
272b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    searchval = (searchval << 8) + graphsi[1];
273b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    searchval = (searchval << 8) + graphsi[2];
274b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
275b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    low = 0;
276b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    high = this->nrblocks;
277b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
278b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* do binary search */
279b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    while (low < high) {
280b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        mid = (low + high) / 2;
281b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        indval = klex_getSearchIndexVal(this, mid);
282b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (indval < searchval) {
283b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            low = mid + 1;
284b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        } else {
285b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            high = mid;
286b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
287b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
288b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_ASSERT(high == low);
289b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* low points to the first entry greater than or equal to searchval */
290b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
291b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (low < this->nrblocks) {
292b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        indval = klex_getSearchIndexVal(this, low);
293b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (indval > searchval) {
294b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            low--;
295b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* if there are identical elements in the search index we have
296b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               to move to the first one */
297b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (low > 0) {
298b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                indval = klex_getSearchIndexVal(this, low);
299b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                while (indval == klex_getSearchIndexVal(this, low-1)) {
300b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    low--;
301b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
302b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
303b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
304b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } else {
305b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        low = this->nrblocks - 1;
306b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
307b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
308b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if defined(PICO_DEBUG)
309b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    {
310b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        picoos_uint32 pos = low * PICOKLEX_LEX_SIE_SIZE;
311b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("binary search result is %c%c%c (%d)",
312b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       this->searchind[pos], this->searchind[pos + 1],
313b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       this->searchind[pos + 2], low));
314b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
315b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
316b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
317b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return (picoos_uint16) low;
318b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
319b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
320b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
321b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* Determine number of adjacent lexblocks containing entries for
322b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen   the same grapheme search prefix (identified by search index). */
323b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
324b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic picoos_uint16 klex_getLexblockRange(const klex_SubObj this,
325b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                           picoos_uint16 index)
326b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{
327b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 count;
328b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint32 sval1, sval2;
329b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
330b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sval1 = klex_getSearchIndexVal(this, index);
331b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
332b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if defined(PICO_DEBUG)
333b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* 'index' must point to first lexblock of its kind */
334b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (index > 0) {
335b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sval2 = klex_getSearchIndexVal(this, index - 1);
336b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_ASSERT(sval1 != sval2);
337b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
338b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
339b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
340b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    index++;
341b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    sval2 = klex_getSearchIndexVal(this, index);
342b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
343b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    count = 1;
344b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    while (sval1 == sval2) {
345b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        count++;
346b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        index++;
347b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        sval2 = klex_getSearchIndexVal(this, index);
348b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
349b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
350b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return count;
351b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
352b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
353b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
354b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
355b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* functions on single lexblock */
356b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
357b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
358b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic picoos_int8 klex_lexMatch(picoos_uint8 *lexentry,
359b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                 const picoos_uint8 *graph,
360b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                 const picoos_uint16 graphlen) {
361b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 i;
362b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 lexlen;
363b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 *lexgraph;
364b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
365b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    lexlen = lexentry[0] - 1;
366b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    lexgraph = &(lexentry[1]);
367b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (i=0; (i<graphlen) && (i<lexlen); i++) {
368b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_TRACE(("%d|%d  graph|lex: %c|%c", graphlen, lexlen,
369b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       graph[i], lexgraph[i]));
370b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (lexgraph[i] < graph[i]) {
371b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            return -1;
372b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        } else if (lexgraph[i] > graph[i]) {
373b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            return 1;
374b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
375b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
376b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (graphlen == lexlen) {
377b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return 0;
378b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } else if (lexlen < graphlen) {
379b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return -1;
380b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } else {
381b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return 1;
382b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
383b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
384b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
385b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
386b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic void klex_setLexResult(const picoos_uint8 *lexentry,
387b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                              const picoos_uint32 lexpos,
388b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                              picoklex_lexl_result_t *lexres) {
389b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 i;
390b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
391b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* check if :G2P */
392e9f72c8954f29f10cb4feb16d328a1b5c1fd7169Jean-Michel Trivi    if ((2 < (lexentry[lexentry[0]])) && ((lexentry[lexentry[0] + 2]) == PICOKLEX_NEEDS_G2P)) {
393b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* set pos */
394b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lexres->posind[0] = lexentry[lexentry[0] + 1];
395b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* set rest */
396b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lexres->phonfound = FALSE;
397b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lexres->posindlen = 1;
398b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lexres->nrres = 1;
399b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("result %d :G2P", lexres->nrres));
400b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } else {
401b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        i = lexres->nrres * (PICOKLEX_POSIND_SIZE);
402b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lexres->posindlen += PICOKLEX_POSIND_SIZE;
403b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lexres->phonfound = TRUE;
404b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* set pos */
405b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lexres->posind[i++] = lexentry[lexentry[0] + 1];
406b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* set ind, PICOKLEX_IND_SIZE */
407b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lexres->posind[i++] = 0x000000ff & (lexpos);
408b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lexres->posind[i++] = 0x000000ff & (lexpos >>  8);
409b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lexres->posind[i]   = 0x000000ff & (lexpos >> 16);
410b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lexres->nrres++;
411b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_DEBUG(("result %d", lexres->nrres));
412b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
413b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
414b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
415b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
416b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic void klex_lexblockLookup(klex_SubObj this,
417b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                const picoos_uint32 lexposStart,
418b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                const picoos_uint32 lexposEnd,
419b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                const picoos_uint8 *graph,
420b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                const picoos_uint16 graphlen,
421b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                picoklex_lexl_result_t *lexres) {
422b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint32 lexpos;
423b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_int8 rv;
424b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
425b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    lexres->nrres = 0;
426b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
427b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    lexpos = lexposStart;
428b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    rv = -1;
429b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    while ((rv < 0) && (lexpos < lexposEnd)) {
430b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
431b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        rv = klex_lexMatch(&(this->lexblocks[lexpos]), graph, graphlen);
432b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
433b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (rv == 0) { /* found */
434b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            klex_setLexResult(&(this->lexblocks[lexpos]), lexpos, lexres);
435b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            if (lexres->phonfound) {
436b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* look for more results, up to MAX_NRRES, don't even
437b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                   check if more results would be available */
438b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                while ((lexres->nrres < PICOKLEX_MAX_NRRES) &&
439b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       (lexpos < lexposEnd)) {
440b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    lexpos += this->lexblocks[lexpos];
441b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    lexpos += this->lexblocks[lexpos];
442b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    /* if there are no more entries in this block, advance
443b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                       to next block by skipping all zeros */
444b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    while ((this->lexblocks[lexpos] == 0) &&
445b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                           (lexpos < lexposEnd)) {
446b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        lexpos++;
447b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
448b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    if (lexpos < lexposEnd) {
449b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        if (klex_lexMatch(&(this->lexblocks[lexpos]), graph,
450b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                          graphlen) == 0) {
451b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            klex_setLexResult(&(this->lexblocks[lexpos]),
452b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                              lexpos, lexres);
453b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        } else {
454b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            /* no more results, quit loop */
455b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                            lexpos = lexposEnd;
456b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                        }
457b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                    }
458b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                }
459b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            } else {
460b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                /* :G2P mark */
461b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
462b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        } else if (rv < 0) {
463b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* not found, goto next entry */
464b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            lexpos += this->lexblocks[lexpos];
465b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            lexpos += this->lexblocks[lexpos];
466b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* if there are no more entries in this block, advance
467b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen               to next block by skipping all zeros */
468b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            while ((this->lexblocks[lexpos] == 0) && (lexpos < lexposEnd)) {
469b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                lexpos++;
470b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            }
471b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        } else {
472b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            /* rv > 0, not found, won't show up later in block */
473b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
474b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
475b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
476b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
477b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
478b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
479b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* lexicon lookup functions */
480b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/
481b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
482b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpicoos_uint8 picoklex_lexLookup(const picoklex_Lex this,
483b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                const picoos_uint8 *graph,
484b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                const picoos_uint16 graphlen,
485b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                picoklex_lexl_result_t *lexres) {
486b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint16 lbnr, lbc;
487b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint32 lexposStart, lexposEnd;
488b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 i;
489b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint8 tgraph[PICOKLEX_LEX_SIE_NRGRAPHS];
490b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    klex_SubObj klex = (klex_SubObj) this;
491b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
492b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (NULL == klex) {
493b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_ERROR(("no lexicon loaded"));
494b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* no exception here needed, already checked at initialization */
495b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return FALSE;
496b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
497b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
498b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    lexres->nrres = 0;
499b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    lexres->posindlen = 0;
500b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    lexres->phonfound = FALSE;
501b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
502b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    for (i = 0; i<PICOKLEX_LEX_SIE_NRGRAPHS; i++) {
503b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        if (i < graphlen) {
504b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            tgraph[i] = graph[i];
505b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        } else {
506b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen            tgraph[i] = '\0';
507b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        }
508b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
509b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("tgraph: %c%c%c", tgraph[0],tgraph[1],tgraph[2]));
510b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
511b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if ((klex->nrblocks) == 0) {
512b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        /* no searchindex, no lexblock */
513b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_WARN(("no searchindex, no lexblock"));
514b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return FALSE;
515b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    } else {
516b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lbnr = klex_getLexblockNr(klex, tgraph);
517b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_ASSERT(lbnr < klex->nrblocks);
518b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        lbc = klex_getLexblockRange(klex, lbnr);
519b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        PICODBG_ASSERT((lbc >= 1) && (lbc <= klex->nrblocks));
520b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
521b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("lexblock nr: %d (#%d)", lbnr, lbc));
522b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
523b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    lexposStart = lbnr * PICOKLEX_LEXBLOCK_SIZE;
524b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    lexposEnd = lexposStart + lbc * PICOKLEX_LEXBLOCK_SIZE;
525b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
526b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("lookup start, lexpos range %d..%d", lexposStart,lexposEnd));
527b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    klex_lexblockLookup(klex, lexposStart, lexposEnd, graph, graphlen, lexres);
528b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("lookup done, %d found", lexres->nrres));
529b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
530b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return (lexres->nrres > 0);
531b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
532b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
533b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
534b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpicoos_uint8 picoklex_lexIndLookup(const picoklex_Lex this,
535b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                   const picoos_uint8 *ind,
536b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                   const picoos_uint8 indlen,
537b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                   picoos_uint8 *pos,
538b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                   picoos_uint8 **phon,
539b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen                                   picoos_uint8 *phonlen) {
540b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    picoos_uint32 pentry;
541b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    klex_SubObj klex = (klex_SubObj) this;
542b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
543b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* check indlen */
544b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (indlen != PICOKLEX_IND_SIZE) {
545b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return FALSE;
546b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
547b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
548b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* PICOKLEX_IND_SIZE */
549b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    pentry = 0x000000ff & (ind[0]);
550b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    pentry |= ((picoos_uint32)(ind[1]) <<  8);
551b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    pentry |= ((picoos_uint32)(ind[2]) << 16);
552b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
553b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    /* check ind if it is within lexblocks byte stream, if not, return FALSE */
554b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    if (pentry >= ((picoos_uint32)klex->nrblocks * PICOKLEX_LEXBLOCK_SIZE)) {
555b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen        return FALSE;
556b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    }
557b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
558b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    pentry += (klex->lexblocks[pentry]);
559b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    *phonlen = (klex->lexblocks[pentry++]) - 2;
560b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    *pos = klex->lexblocks[pentry++];
561b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    *phon = &(klex->lexblocks[pentry]);
562b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
563b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    PICODBG_DEBUG(("pentry: %d, phonlen: %d", pentry, *phonlen));
564b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen    return TRUE;
565b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
566b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
567b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus
568b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen}
569b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif
570b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
571b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen
572b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* end */
573