1b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* 2b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 3b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 4b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Licensed under the Apache License, Version 2.0 (the "License"); 5b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * you may not use this file except in compliance with the License. 6b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * You may obtain a copy of the License at 7b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 8b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * http://www.apache.org/licenses/LICENSE-2.0 9b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 10b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Unless required by applicable law or agreed to in writing, software 11b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * distributed under the License is distributed on an "AS IS" BASIS, 12b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * See the License for the specific language governing permissions and 14b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * limitations under the License. 15b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */ 16b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/** 17b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * @file picoklex.c 18b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 19b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 20b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * All rights reserved. 21b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 22b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * History: 23b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * - 2009-04-20 -- initial version 24b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 25b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */ 26b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picoos.h" 27b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picodbg.h" 28b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picodata.h" 29b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picoknow.h" 30b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#include "picoklex.h" 31b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 32b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus 33b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenextern "C" { 34b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif 35b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if 0 36b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 37b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif 38b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 39b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 40b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* lexicon */ 41b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 42b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 43b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/** 44b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * @addtogroup picolex 45b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * 46b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen overview: 47b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - lex consists of optional searchindex and a non-empty list of lexblocks 48b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - lexblocks are fixed size, at the start of a block there is also the 49b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen start of an entry 50b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - using the searchindex a unambiguous lexblock can be determined which 51b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen contains the entry (or there is no entry) 52b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - one lex entry has POS GRAPH PHON, all mandatory, but 53b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PHON can be empty string -> no pronunciation in the resulting TTS output 54b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PHON can be :G2P -> use G2P later to add pronunciation 55b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - (POS,GRAPH) is a uniq key (only one entry allowed) 56b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - (GRAPH) is almost a uniq key (2-4 entries with the same GRAPH, and 57b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen differing POS and differing PHON possible) 58b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - for one graph we can have two or three solutions from the lex 59b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen which all need to be passed on the the next PU 60b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - in this case GRAPH, POS, and PHON all must be available in lex 61b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 62b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen sizing: 63b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - 3 bytes entry index -> 16MB addressable 64b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - 2 bytes searchindex nr -> 64K blocks possible 65b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - 5 bytes per searchindex entry 66b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - 3 bytes for graph-prefix 67b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - 2 bytes blockadr in searchindex -> 64K blocks possible 68b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - lexblock size 512B: 69b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - 32M possible 70b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - with ~20 bytes per entry 71b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen -> max. average of ~26 entries to be searched per lookup 72b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - overhead of ~10 bytes per block to sync with 73b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen block boundaries 74b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - examples: 75b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - 500KB lex -> 1000 blocks, 76b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 1000 entries in searchindex, ~25.6K lex-entries, 77b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - ~5KB searchindex 78b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen ~10KB overhead for block sync 79b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - 100KB lex -> 200 blocks, 80b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 200 entries in searchindex, ~5.1K lex-entries, 81b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - ~1KB searchindex 82b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen ~2KB overhead for block sync 83b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 84b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen pil-file: lexicon knowledge base in binary form 85b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 86b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lex-kb = content 87b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 88b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen content = searchindex {lexblock}1:NRBLOCKS2 89b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 90b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexblock = {lexentry}1: (lexblock size is fixed 512Bytes) 91b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 92b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen searchindex = NRBLOCKS2 {GRAPH1 GRAPH1 GRAPH1 LEXBLOCKIND2}=NRBLOCKS2 93b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 94b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexentry = LENGRAPH1 {GRAPH1}=LENGRAPH1-1 95b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen LENPOSPHON1 POS1 {PHON1}=LENPOSPHON1-2 96b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 97b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - special cases: 98b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PHON is empty string (no pronunciation in the resulting TTS output): 99b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexentry = LENGRAPH1 {GRAPH1}=LENGRAPH1-1 2 POS1 100b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - PHON can be :G2P -> use G2P later to add pronunciation: 101b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexentry = LENGRAPH1 {GRAPH1}=LENGRAPH1-1 3 POS1 <reserved-phon-val=5> 102b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - multi-byte values always little endian 103b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen*/ 104b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 105b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 106b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 107b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* lexicon data defines */ 108b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* may not be changed with current implementation */ 109b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 110b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 111b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* nr bytes of nrblocks info */ 112b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOKLEX_LEX_NRBLOCKS_SIZE 2 113b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 114b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* search index entry: - nr graphs 115b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - nr bytes of block index 116b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen - nr bytes per entry, NRGRAPHS*INDSIZE */ 117b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOKLEX_LEX_SIE_NRGRAPHS 3 118b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOKLEX_LEX_SIE_INDSIZE 2 119b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOKLEX_LEX_SIE_SIZE 5 120b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 121b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* nr of bytes per lexblock */ 122b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOKLEX_LEXBLOCK_SIZE 512 123b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 124b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 125b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* reserved values in klex to indicate :G2P needed for a lexentry */ 126b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#define PICOKLEX_NEEDS_G2P 5 127b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 128b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 129b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 130b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* lexicon type and loading */ 131b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 132b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 133b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/** object : LexKnowledgeBase 134b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * shortcut : klex 135b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * derived from : picoknow_KnowledgeBase 136b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen */ 137b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 138b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chentypedef struct klex_subobj *klex_SubObj; 139b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 140b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chentypedef struct klex_subobj 141b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{ 142b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint16 nrblocks; /* nr lexblocks = nr eles in searchind */ 143b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint8 *searchind; 144b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint8 *lexblocks; 145b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} klex_subobj_t; 146b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 147b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 148b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t klexInitialize(register picoknow_KnowledgeBase this, 149b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_Common common) 150b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{ 151b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint32 curpos = 0; 152b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen klex_subobj_t *klex; 153b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 154b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_DEBUG(("start")); 155b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 156b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* check whether (this->size != 0) done before calling this function */ 157b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 158b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (NULL == this || NULL == this->subObj) { 159b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, 160b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen NULL, NULL); 161b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 162b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen klex = (klex_subobj_t *) this->subObj; 163b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 164b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, 165b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen &(klex->nrblocks))) { 166b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (klex->nrblocks > 0) { 167b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_DEBUG(("nr blocks: %i, curpos: %i", klex->nrblocks,curpos)); 168b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen klex->searchind = this->base + curpos; 169b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else { 170b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen klex->searchind = NULL; 171b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 172b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen klex->lexblocks = this->base + PICOKLEX_LEX_NRBLOCKS_SIZE + 173b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen (klex->nrblocks * (PICOKLEX_LEX_SIE_SIZE)); 174b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return PICO_OK; 175b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else { 176b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, 177b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen NULL, NULL); 178b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 179b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 180b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 181b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 182b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic pico_status_t klexSubObjDeallocate(register picoknow_KnowledgeBase this, 183b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_MemoryManager mm) 184b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{ 185b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (NULL != this) { 186b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_deallocate(mm, (void *) &this->subObj); 187b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 188b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return PICO_OK; 189b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 190b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 191b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 192b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* we don't offer a specialized constructor for a LexKnowledgeBase but 193b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * instead a "specializer" of an allready existing generic 194b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen * picoknow_KnowledgeBase */ 195b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 196b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpico_status_t picoklex_specializeLexKnowledgeBase(picoknow_KnowledgeBase this, 197b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_Common common) 198b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{ 199b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (NULL == this) { 200b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, 201b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen NULL, NULL); 202b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 203b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (this->size > 0) { 204b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen this->subDeallocate = klexSubObjDeallocate; 205b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen this->subObj = picoos_allocate(common->mm, sizeof(klex_subobj_t)); 206b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (NULL == this->subObj) { 207b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, 208b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen NULL, NULL); 209b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 210b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return klexInitialize(this, common); 211b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else { 212b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* some dummy klex */ 213b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return PICO_OK; 214b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 215b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 216b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 217b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* for now we don't need to do anything special for the main lex */ 218b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* 219b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpico_status_t picoklex_specializeMainLexKnowledgeBase( 220b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoknow_KnowledgeBase this, 221b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_Common common) 222b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{ 223b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return picoklex_specializeLexKnowledgeBase(this,common); 224b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 225b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen*/ 226b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 227b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 228b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 229b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* lexicon getLex */ 230b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 231b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 232b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpicoklex_Lex picoklex_getLex(picoknow_KnowledgeBase this) 233b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{ 234b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (NULL == this) { 235b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return NULL; 236b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else { 237b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return (picoklex_Lex) this->subObj; 238b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 239b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 240b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 241b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 242b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 243b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* functions on searchindex */ 244b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 245b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 246b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 247b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic picoos_uint32 klex_getSearchIndexVal(const klex_SubObj this, 248b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint16 index) 249b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{ 250b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint32 pos, val; 251b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen pos = index * PICOKLEX_LEX_SIE_SIZE; 252b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen val = this->searchind[pos]; 253b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen val = (val << 8) + this->searchind[pos + 1]; 254b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen val = (val << 8) + this->searchind[pos + 2]; 255b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return val; 256b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 257b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 258b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 259b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* Determine first lexblock containing entries for specified 260b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen grapheme. */ 261b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 262b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic picoos_uint16 klex_getLexblockNr(const klex_SubObj this, 263b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen const picoos_uint8 *graphsi) { 264b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* graphsi is of len PICOKLEX_LEX_SI_NGRAPHS */ 265b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_int32 low, mid, high; 266b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint32 searchval, indval; 267b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 268b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* PICOKLEX_LEX_SIE_NRGRAPHS */ 269b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 270b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* convert graph-prefix to number with 'lexicographic' ordering */ 271b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen searchval = graphsi[0]; 272b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen searchval = (searchval << 8) + graphsi[1]; 273b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen searchval = (searchval << 8) + graphsi[2]; 274b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 275b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen low = 0; 276b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen high = this->nrblocks; 277b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 278b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* do binary search */ 279b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen while (low < high) { 280b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen mid = (low + high) / 2; 281b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen indval = klex_getSearchIndexVal(this, mid); 282b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (indval < searchval) { 283b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen low = mid + 1; 284b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else { 285b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen high = mid; 286b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 287b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 288b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_ASSERT(high == low); 289b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* low points to the first entry greater than or equal to searchval */ 290b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 291b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (low < this->nrblocks) { 292b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen indval = klex_getSearchIndexVal(this, low); 293b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (indval > searchval) { 294b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen low--; 295b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* if there are identical elements in the search index we have 296b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen to move to the first one */ 297b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (low > 0) { 298b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen indval = klex_getSearchIndexVal(this, low); 299b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen while (indval == klex_getSearchIndexVal(this, low-1)) { 300b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen low--; 301b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 302b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 303b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 304b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else { 305b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen low = this->nrblocks - 1; 306b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 307b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 308b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if defined(PICO_DEBUG) 309b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen { 310b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint32 pos = low * PICOKLEX_LEX_SIE_SIZE; 311b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_DEBUG(("binary search result is %c%c%c (%d)", 312b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen this->searchind[pos], this->searchind[pos + 1], 313b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen this->searchind[pos + 2], low)); 314b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 315b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif 316b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 317b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return (picoos_uint16) low; 318b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 319b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 320b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 321b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* Determine number of adjacent lexblocks containing entries for 322b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen the same grapheme search prefix (identified by search index). */ 323b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 324b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic picoos_uint16 klex_getLexblockRange(const klex_SubObj this, 325b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint16 index) 326b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen{ 327b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint16 count; 328b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint32 sval1, sval2; 329b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 330b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen sval1 = klex_getSearchIndexVal(this, index); 331b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 332b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#if defined(PICO_DEBUG) 333b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* 'index' must point to first lexblock of its kind */ 334b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (index > 0) { 335b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen sval2 = klex_getSearchIndexVal(this, index - 1); 336b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_ASSERT(sval1 != sval2); 337b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 338b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif 339b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 340b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen index++; 341b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen sval2 = klex_getSearchIndexVal(this, index); 342b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 343b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen count = 1; 344b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen while (sval1 == sval2) { 345b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen count++; 346b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen index++; 347b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen sval2 = klex_getSearchIndexVal(this, index); 348b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 349b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 350b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return count; 351b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 352b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 353b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 354b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 355b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* functions on single lexblock */ 356b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 357b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 358b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic picoos_int8 klex_lexMatch(picoos_uint8 *lexentry, 359b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen const picoos_uint8 *graph, 360b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen const picoos_uint16 graphlen) { 361b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint8 i; 362b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint8 lexlen; 363b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint8 *lexgraph; 364b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 365b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexlen = lexentry[0] - 1; 366b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexgraph = &(lexentry[1]); 367b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen for (i=0; (i<graphlen) && (i<lexlen); i++) { 368b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_TRACE(("%d|%d graph|lex: %c|%c", graphlen, lexlen, 369b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen graph[i], lexgraph[i])); 370b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (lexgraph[i] < graph[i]) { 371b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return -1; 372b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else if (lexgraph[i] > graph[i]) { 373b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return 1; 374b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 375b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 376b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (graphlen == lexlen) { 377b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return 0; 378b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else if (lexlen < graphlen) { 379b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return -1; 380b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else { 381b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return 1; 382b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 383b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 384b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 385b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 386b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic void klex_setLexResult(const picoos_uint8 *lexentry, 387b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen const picoos_uint32 lexpos, 388b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoklex_lexl_result_t *lexres) { 389b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint8 i; 390b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 391b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* check if :G2P */ 392e9f72c8954f29f10cb4feb16d328a1b5c1fd7169Jean-Michel Trivi if ((2 < (lexentry[lexentry[0]])) && ((lexentry[lexentry[0] + 2]) == PICOKLEX_NEEDS_G2P)) { 393b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* set pos */ 394b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->posind[0] = lexentry[lexentry[0] + 1]; 395b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* set rest */ 396b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->phonfound = FALSE; 397b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->posindlen = 1; 398b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->nrres = 1; 399b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_DEBUG(("result %d :G2P", lexres->nrres)); 400b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else { 401b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen i = lexres->nrres * (PICOKLEX_POSIND_SIZE); 402b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->posindlen += PICOKLEX_POSIND_SIZE; 403b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->phonfound = TRUE; 404b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* set pos */ 405b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->posind[i++] = lexentry[lexentry[0] + 1]; 406b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* set ind, PICOKLEX_IND_SIZE */ 407b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->posind[i++] = 0x000000ff & (lexpos); 408b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->posind[i++] = 0x000000ff & (lexpos >> 8); 409b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->posind[i] = 0x000000ff & (lexpos >> 16); 410b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->nrres++; 411b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_DEBUG(("result %d", lexres->nrres)); 412b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 413b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 414b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 415b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 416b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenstatic void klex_lexblockLookup(klex_SubObj this, 417b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen const picoos_uint32 lexposStart, 418b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen const picoos_uint32 lexposEnd, 419b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen const picoos_uint8 *graph, 420b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen const picoos_uint16 graphlen, 421b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoklex_lexl_result_t *lexres) { 422b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint32 lexpos; 423b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_int8 rv; 424b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 425b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->nrres = 0; 426b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 427b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexpos = lexposStart; 428b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen rv = -1; 429b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen while ((rv < 0) && (lexpos < lexposEnd)) { 430b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 431b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen rv = klex_lexMatch(&(this->lexblocks[lexpos]), graph, graphlen); 432b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 433b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (rv == 0) { /* found */ 434b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen klex_setLexResult(&(this->lexblocks[lexpos]), lexpos, lexres); 435b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (lexres->phonfound) { 436b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* look for more results, up to MAX_NRRES, don't even 437b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen check if more results would be available */ 438b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen while ((lexres->nrres < PICOKLEX_MAX_NRRES) && 439b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen (lexpos < lexposEnd)) { 440b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexpos += this->lexblocks[lexpos]; 441b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexpos += this->lexblocks[lexpos]; 442b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* if there are no more entries in this block, advance 443b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen to next block by skipping all zeros */ 444b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen while ((this->lexblocks[lexpos] == 0) && 445b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen (lexpos < lexposEnd)) { 446b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexpos++; 447b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 448b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (lexpos < lexposEnd) { 449b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (klex_lexMatch(&(this->lexblocks[lexpos]), graph, 450b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen graphlen) == 0) { 451b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen klex_setLexResult(&(this->lexblocks[lexpos]), 452b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexpos, lexres); 453b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else { 454b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* no more results, quit loop */ 455b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexpos = lexposEnd; 456b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 457b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 458b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 459b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else { 460b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* :G2P mark */ 461b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 462b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else if (rv < 0) { 463b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* not found, goto next entry */ 464b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexpos += this->lexblocks[lexpos]; 465b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexpos += this->lexblocks[lexpos]; 466b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* if there are no more entries in this block, advance 467b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen to next block by skipping all zeros */ 468b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen while ((this->lexblocks[lexpos] == 0) && (lexpos < lexposEnd)) { 469b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexpos++; 470b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 471b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else { 472b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* rv > 0, not found, won't show up later in block */ 473b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 474b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 475b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 476b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 477b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 478b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 479b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* lexicon lookup functions */ 480b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* ************************************************************/ 481b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 482b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpicoos_uint8 picoklex_lexLookup(const picoklex_Lex this, 483b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen const picoos_uint8 *graph, 484b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen const picoos_uint16 graphlen, 485b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoklex_lexl_result_t *lexres) { 486b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint16 lbnr, lbc; 487b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint32 lexposStart, lexposEnd; 488b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint8 i; 489b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint8 tgraph[PICOKLEX_LEX_SIE_NRGRAPHS]; 490b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen klex_SubObj klex = (klex_SubObj) this; 491b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 492b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (NULL == klex) { 493b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_ERROR(("no lexicon loaded")); 494b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* no exception here needed, already checked at initialization */ 495b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return FALSE; 496b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 497b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 498b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->nrres = 0; 499b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->posindlen = 0; 500b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexres->phonfound = FALSE; 501b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 502b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen for (i = 0; i<PICOKLEX_LEX_SIE_NRGRAPHS; i++) { 503b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (i < graphlen) { 504b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen tgraph[i] = graph[i]; 505b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else { 506b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen tgraph[i] = '\0'; 507b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 508b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 509b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_DEBUG(("tgraph: %c%c%c", tgraph[0],tgraph[1],tgraph[2])); 510b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 511b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if ((klex->nrblocks) == 0) { 512b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* no searchindex, no lexblock */ 513b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_WARN(("no searchindex, no lexblock")); 514b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return FALSE; 515b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } else { 516b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lbnr = klex_getLexblockNr(klex, tgraph); 517b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_ASSERT(lbnr < klex->nrblocks); 518b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lbc = klex_getLexblockRange(klex, lbnr); 519b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_ASSERT((lbc >= 1) && (lbc <= klex->nrblocks)); 520b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 521b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_DEBUG(("lexblock nr: %d (#%d)", lbnr, lbc)); 522b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 523b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexposStart = lbnr * PICOKLEX_LEXBLOCK_SIZE; 524b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen lexposEnd = lexposStart + lbc * PICOKLEX_LEXBLOCK_SIZE; 525b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 526b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_DEBUG(("lookup start, lexpos range %d..%d", lexposStart,lexposEnd)); 527b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen klex_lexblockLookup(klex, lexposStart, lexposEnd, graph, graphlen, lexres); 528b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_DEBUG(("lookup done, %d found", lexres->nrres)); 529b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 530b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return (lexres->nrres > 0); 531b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 532b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 533b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 534b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chenpicoos_uint8 picoklex_lexIndLookup(const picoklex_Lex this, 535b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen const picoos_uint8 *ind, 536b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen const picoos_uint8 indlen, 537b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint8 *pos, 538b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint8 **phon, 539b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint8 *phonlen) { 540b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen picoos_uint32 pentry; 541b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen klex_SubObj klex = (klex_SubObj) this; 542b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 543b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* check indlen */ 544b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (indlen != PICOKLEX_IND_SIZE) { 545b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return FALSE; 546b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 547b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 548b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* PICOKLEX_IND_SIZE */ 549b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen pentry = 0x000000ff & (ind[0]); 550b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen pentry |= ((picoos_uint32)(ind[1]) << 8); 551b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen pentry |= ((picoos_uint32)(ind[2]) << 16); 552b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 553b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen /* check ind if it is within lexblocks byte stream, if not, return FALSE */ 554b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen if (pentry >= ((picoos_uint32)klex->nrblocks * PICOKLEX_LEXBLOCK_SIZE)) { 555b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return FALSE; 556b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen } 557b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 558b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen pentry += (klex->lexblocks[pentry]); 559b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *phonlen = (klex->lexblocks[pentry++]) - 2; 560b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *pos = klex->lexblocks[pentry++]; 561b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen *phon = &(klex->lexblocks[pentry]); 562b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 563b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen PICODBG_DEBUG(("pentry: %d, phonlen: %d", pentry, *phonlen)); 564b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen return TRUE; 565b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 566b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 567b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#ifdef __cplusplus 568b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen} 569b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen#endif 570b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 571b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen 572b190149a69b110e6719ce0a41877a683f8db7ae7Charles Chen/* end */ 573