1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef LATINIME_BIGRAM_DICTIONARY_H 18#define LATINIME_BIGRAM_DICTIONARY_H 19 20#include <map> 21#include <stdint.h> 22 23#include "defines.h" 24 25namespace latinime { 26 27class BigramDictionary { 28 public: 29 BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions); 30 int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize, 31 unsigned short *outWords, int *frequencies, int *outputTypes) const; 32 void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength, 33 std::map<int, int> *map, uint8_t *filter) const; 34 bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const; 35 ~BigramDictionary(); 36 private: 37 DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary); 38 bool addWordBigram(unsigned short *word, int length, int frequency, 39 int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const; 40 int getBigramAddress(int *pos, bool advance); 41 int getBigramFreq(int *pos); 42 void searchForTerminalNode(int addressLookingFor, int frequency); 43 bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; } 44 bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; } 45 bool checkFirstCharacter(unsigned short *word, int *inputCodes) const; 46 int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength, 47 const bool forceLowerCaseSearch) const; 48 49 const unsigned char *DICT; 50 const int MAX_WORD_LENGTH; 51 const int MAX_PREDICTIONS; 52 // TODO: Re-implement proximity correction for bigram correction 53 static const int MAX_ALTERNATIVES = 1; 54}; 55} // namespace latinime 56#endif // LATINIME_BIGRAM_DICTIONARY_H 57