17898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project/* 27898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * Copyright (C) 2009 The Android Open Source Project 37898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * 47898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License"); 57898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * you may not use this file except in compliance with the License. 67898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * You may obtain a copy of the License at 77898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * 87898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * http://www.apache.org/licenses/LICENSE-2.0 97898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * 107898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * Unless required by applicable law or agreed to in writing, software 117898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS, 127898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 137898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * See the License for the specific language governing permissions and 147898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project * limitations under the License. 157898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project */ 167898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 177898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#ifndef PINYINIME_INCLUDE_NGRAM_H__ 187898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#define PINYINIME_INCLUDE_NGRAM_H__ 197898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 207898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#include <stdio.h> 217898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#include <stdlib.h> 227898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#include "./dictdef.h" 237898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 247898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectnamespace ime_pinyin { 257898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 267898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projecttypedef unsigned char CODEBOOK_TYPE; 277898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 287898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectstatic const size_t kCodeBookSize = 256; 297898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 307898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Projectclass NGram { 317898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project public: 327898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // The maximum score of a lemma item. 337898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project static const LmaScoreType kMaxScore = 0x3fff; 347898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 357898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // In order to reduce the storage size, the original log value is amplified by 367898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // kScoreAmplifier, and we use LmaScoreType to store. 377898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // After this process, an item with a lower score has a higher frequency. 387898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project static const int kLogValueAmplifier = -800; 397898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 407898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // System words' total frequency. It is not the real total frequency, instead, 417898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // It is only used to adjust system lemmas' scores when the user dictionary's 427898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // total frequency changes. 437898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // In this version, frequencies of system lemmas are fixed. We are considering 447898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // to make them changable in next version. 457898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project static const size_t kSysDictTotalFreq = 100000000; 467898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 477898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project private: 487898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 497898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project static NGram* instance_; 507898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 517898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project bool initialized_; 527898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project size_t idx_num_; 537898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 547898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project size_t total_freq_none_sys_; 557898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 567898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // Score compensation for system dictionary lemmas. 577898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // Because after user adds some user lemmas, the total frequency changes, and 587898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // we use this value to normalize the score. 597898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project float sys_score_compensation_; 607898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 617898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#ifdef ___BUILD_MODEL___ 627898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project double *freq_codes_df_; 637898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#endif 647898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project LmaScoreType *freq_codes_; 657898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project CODEBOOK_TYPE *lma_freq_idx_; 667898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 677898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project public: 687898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project NGram(); 697898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project ~NGram(); 707898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 717898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project static NGram& get_instance(); 727898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 737898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project bool save_ngram(FILE *fp); 747898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project bool load_ngram(FILE *fp); 757898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 767898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // Set the total frequency of all none system dictionaries. 777898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project void set_total_freq_none_sys(size_t freq_none_sys); 787898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 797898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project float get_uni_psb(LemmaIdType lma_id); 807898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 817898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // Convert a probability to score. Actually, the score will be limited to 827898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // kMaxScore, but at runtime, we also need float expression to get accurate 837898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // value of the score. 847898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // After the conversion, a lower score indicates a higher probability of the 857898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // item. 867898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project static float convert_psb_to_score(double psb); 877898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 887898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#ifdef ___BUILD_MODEL___ 897898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project // For constructing the unigram mode model. 907898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project bool build_unigram(LemmaEntry *lemma_arr, size_t num, 917898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project LemmaIdType next_idx_unused); 927898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#endif 937898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project}; 947898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project} 957898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project 967898d76cc005bbe1c5893a9f57439561e0771ccThe Android Open Source Project#endif // PINYINIME_INCLUDE_NGRAM_H__ 97