1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__ 18#define PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__ 19 20#include <stdlib.h> 21#include "./spellingtrie.h" 22 23namespace ime_pinyin { 24 25// Type used to identify the size of a pool, such as id pool, etc. 26typedef uint16 PoolPosType; 27 28// Type used to identify a parsing mile stone in an atom dictionary. 29typedef uint16 MileStoneHandle; 30 31// Type used to express a lemma and its probability score. 32typedef struct { 33 size_t id:(kLemmaIdSize * 8); 34 size_t lma_len:4; 35 uint16 psb; // The score, the lower psb, the higher possibility. 36 // For single character items, we may also need Hanzi. 37 // For multiple characer items, ignore it. 38 char16 hanzi; 39} LmaPsbItem, *PLmaPsbItem; 40 41// LmaPsbItem extended with string. 42typedef struct { 43 LmaPsbItem lpi; 44 char16 str[kMaxLemmaSize + 1]; 45} LmaPsbStrItem, *PLmaPsbStrItem; 46 47 48typedef struct { 49 float psb; 50 char16 pre_hzs[kMaxPredictSize]; 51 uint16 his_len; // The length of the history used to do the prediction. 52} NPredictItem, *PNPredictItem; 53 54// Parameter structure used to extend in a dictionary. All dictionaries 55// receives the same DictExtPara and a dictionary specific MileStoneHandle for 56// extending. 57// 58// When the user inputs a new character, AtomDictBase::extend_dict() will be 59// called at least once for each dictionary. 60// 61// For example, when the user inputs "wm", extend_dict() will be called twice, 62// and the DictExtPara parameter are as follows respectively: 63// 1. splids = {w, m}; splids_extended = 1; ext_len = 1; step_no = 1; 64// splid_end_split = false; id_start = wa(the first id start with 'w'); 65// id_num = number of ids starting with 'w'. 66// 2. splids = {m}; splids_extended = 0; ext_len = 1; step_no = 1; 67// splid_end_split = false; id_start = wa; id_num = number of ids starting with 68// 'w'. 69// 70// For string "women", one of the cases of the DictExtPara parameter is: 71// splids = {wo, men}, splids_extended = 1, ext_len = 3 (length of "men"), 72// step_no = 4; splid_end_split = false; id_start = men, id_num = 1. 73// 74typedef struct { 75 // Spelling ids for extending, there are splids_extended + 1 ids in the 76 // buffer. 77 // For a normal lemma, there can only be kMaxLemmaSize spelling ids in max, 78 // but for a composing phrase, there can kMaxSearchSteps spelling ids. 79 uint16 splids[kMaxSearchSteps]; 80 81 // Number of ids that have been used before. splids[splids_extended] is the 82 // newly added id for the current extension. 83 uint16 splids_extended; 84 85 // The step span of the extension. It is also the size of the string for 86 // the newly added spelling id. 87 uint16 ext_len; 88 89 // The step number for the current extension. It is also the ending position 90 // in the input Pinyin string for the substring of spelling ids in splids[]. 91 // For example, when the user inputs "women", step_no = 4. 92 // This parameter may useful to manage the MileStoneHandle list for each 93 // step. When the user deletes a character from the string, MileStoneHandle 94 // objects for the the steps after that character should be reset; when the 95 // user begins a new string, all MileStoneHandle objects should be reset. 96 uint16 step_no; 97 98 // Indicate whether the newly added spelling ends with a splitting character 99 bool splid_end_split; 100 101 // If the newly added id is a half id, id_start is the first id of the 102 // corresponding full ids; if the newly added id is a full id, id_start is 103 // that id. 104 uint16 id_start; 105 106 // If the newly added id is a half id, id_num is the number of corresponding 107 // ids; if it is a full id, id_num == 1. 108 uint16 id_num; 109}DictExtPara, *PDictExtPara; 110 111bool is_system_lemma(LemmaIdType lma_id); 112bool is_user_lemma(LemmaIdType lma_id); 113bool is_composing_lemma(LemmaIdType lma_id); 114 115int cmp_lpi_with_psb(const void *p1, const void *p2); 116int cmp_lpi_with_unified_psb(const void *p1, const void *p2); 117int cmp_lpi_with_id(const void *p1, const void *p2); 118int cmp_lpi_with_hanzi(const void *p1, const void *p2); 119 120int cmp_lpsi_with_str(const void *p1, const void *p2); 121 122int cmp_hanzis_1(const void *p1, const void *p2); 123int cmp_hanzis_2(const void *p1, const void *p2); 124int cmp_hanzis_3(const void *p1, const void *p2); 125int cmp_hanzis_4(const void *p1, const void *p2); 126int cmp_hanzis_5(const void *p1, const void *p2); 127int cmp_hanzis_6(const void *p1, const void *p2); 128int cmp_hanzis_7(const void *p1, const void *p2); 129int cmp_hanzis_8(const void *p1, const void *p2); 130 131int cmp_npre_by_score(const void *p1, const void *p2); 132int cmp_npre_by_hislen_score(const void *p1, const void *p2); 133int cmp_npre_by_hanzi_score(const void *p1, const void *p2); 134 135 136size_t remove_duplicate_npre(NPredictItem *npre_items, size_t npre_num); 137 138size_t align_to_size_t(size_t size); 139 140} // namespace 141 142#endif // PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__ 143