17898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project/* 27898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * Copyright (C) 2009 The Android Open Source Project 37898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * 47898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License"); 57898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * you may not use this file except in compliance with the License. 67898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * You may obtain a copy of the License at 77898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * 87898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * http://www.apache.org/licenses/LICENSE-2.0 97898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * 107898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * Unless required by applicable law or agreed to in writing, software 117898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS, 127898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 137898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * See the License for the specific language governing permissions and 147898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * limitations under the License. 157898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project */ 167898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 177898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#ifndef PINYINIME_INCLUDE_DICTDEF_H__ 187898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#define PINYINIME_INCLUDE_DICTDEF_H__ 197898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 207898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#include <stdlib.h> 217898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#include "./utf16char.h" 227898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 237898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectnamespace ime_pinyin { 247898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 257898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// Enable the following line when building the binary dictionary model. 267898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// #define ___BUILD_MODEL___ 277898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 287898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projecttypedef unsigned char uint8; 297898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projecttypedef unsigned short uint16; 307898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projecttypedef unsigned int uint32; 317898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 327898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projecttypedef signed char int8; 337898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projecttypedef short int16; 347898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projecttypedef int int32; 357898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projecttypedef long long int64; 367898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projecttypedef unsigned long long uint64; 377898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 387898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst bool kPrintDebug0 = false; 397898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst bool kPrintDebug1 = false; 407898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst bool kPrintDebug2 = false; 417898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 427898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// The max length of a lemma. 437898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst size_t kMaxLemmaSize = 8; 447898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 457898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// The max length of a Pinyin (spelling). 467898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst size_t kMaxPinyinSize = 6; 477898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 487898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// The number of half spelling ids. For Chinese Pinyin, there 30 half ids. 497898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// See SpellingTrie.h for details. 507898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst size_t kHalfSpellingIdNum = 29; 517898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 527898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// The maximum number of full spellings. For Chinese Pinyin, there are only 537898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// about 410 spellings. 547898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// If change this value is bigger(needs more bits), please also update 557898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// other structures like SpellingNode, to make sure than a spelling id can be 567898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// stored. 577898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// -1 is because that 0 is never used. 587898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst size_t kMaxSpellingNum = 512 - kHalfSpellingIdNum - 1; 597898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst size_t kMaxSearchSteps = 40; 607898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 617898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// One character predicts its following characters. 627898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst size_t kMaxPredictSize = (kMaxLemmaSize - 1); 637898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 647898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// LemmaIdType must always be size_t. 657898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projecttypedef size_t LemmaIdType; 667898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst size_t kLemmaIdSize = 3; // Actually, a Id occupies 3 bytes in storage. 677898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst size_t kLemmaIdComposing = 0xffffff; 687898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 697898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projecttypedef uint16 LmaScoreType; 707898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projecttypedef uint16 KeyScoreType; 717898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 727898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// Number of items with highest score are kept for prediction purpose. 737898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst size_t kTopScoreLemmaNum = 10; 747898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 757898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst size_t kMaxPredictNumByGt3 = 1; 767898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst size_t kMaxPredictNumBy3 = 2; 777898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst size_t kMaxPredictNumBy2 = 2; 787898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 797898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// The last lemma id (included) for the system dictionary. The system 807898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// dictionary's ids always start from 1. 817898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst LemmaIdType kSysDictIdEnd = 500000; 827898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 837898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// The first lemma id for the user dictionary. 847898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst LemmaIdType kUserDictIdStart = 500001; 857898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 867898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project// The last lemma id (included) for the user dictionary. 877898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectconst LemmaIdType kUserDictIdEnd = 600000; 887898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 897898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projecttypedef struct { 907898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project uint16 half_splid:5; 917898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project uint16 full_splid:11; 927898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project} SpellingId, *PSpellingId; 937898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 947898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 957898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project/** 967898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * We use different node types for different layers 977898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * Statistical data of the building result for a testing dictionary: 987898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * root, level 0, level 1, level 2, level 3 997898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * max son num of one node: 406 280 41 2 - 1007898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * max homo num of one node: 0 90 23 2 2 1017898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * total node num of a layer: 1 406 31766 13516 993 1027898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * total homo num of a layer: 9 5674 44609 12667 995 1037898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * 1047898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * The node number for root and level 0 won't be larger than 500 1057898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * According to the information above, two kinds of nodes can be used; one for 1067898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * root and level 0, the other for these layers deeper than 0. 1077898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * 1087898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * LE = less and equal, 1097898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * A node occupies 16 bytes. so, totallly less than 16 * 500 = 8K 1107898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project */ 1117898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectstruct LmaNodeLE0 { 1127898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project size_t son_1st_off; 1137898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project size_t homo_idx_buf_off; 1147898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project uint16 spl_idx; 1157898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project uint16 num_of_son; 1167898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project uint16 num_of_homo; 1177898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project}; 1187898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 1197898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project/** 1207898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * GE = great and equal 1217898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * A node occupies 8 bytes. 1227898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project */ 1237898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectstruct LmaNodeGE1 { 1247898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project uint16 son_1st_off_l; // Low bits of the son_1st_off 1257898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project uint16 homo_idx_buf_off_l; // Low bits of the homo_idx_buf_off_1 1267898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project uint16 spl_idx; 1277898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project unsigned char num_of_son; // number of son nodes 1287898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project unsigned char num_of_homo; // number of homo words 1297898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project unsigned char son_1st_off_h; // high bits of the son_1st_off 1307898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project unsigned char homo_idx_buf_off_h; // high bits of the homo_idx_buf_off 1317898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project}; 1327898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 1337898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#ifdef ___BUILD_MODEL___ 1347898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectstruct SingleCharItem { 1357898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project float freq; 1367898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project char16 hz; 1377898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project SpellingId splid; 1387898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project}; 1397898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 1407898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectstruct LemmaEntry { 1417898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project LemmaIdType idx_by_py; 1427898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project LemmaIdType idx_by_hz; 1437898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project char16 hanzi_str[kMaxLemmaSize + 1]; 1447898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 1457898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // The SingleCharItem id for each Hanzi. 1467898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project uint16 hanzi_scis_ids[kMaxLemmaSize]; 1477898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 1487898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project uint16 spl_idx_arr[kMaxLemmaSize + 1]; 1497898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project char pinyin_str[kMaxLemmaSize][kMaxPinyinSize + 1]; 1507898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project unsigned char hz_str_len; 1517898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project float freq; 1527898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project}; 1537898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#endif // ___BUILD_MODEL___ 1547898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 1557898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project} // namespace ime_pinyin 1567898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 1577898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#endif // PINYINIME_INCLUDE_DICTDEF_H__ 158