130088259480130e5bac5c2028e2c7c3e6d4c51a2satok/*
230088259480130e5bac5c2028e2c7c3e6d4c51a2satok**
330088259480130e5bac5c2028e2c7c3e6d4c51a2satok** Copyright 2010, The Android Open Source Project
430088259480130e5bac5c2028e2c7c3e6d4c51a2satok**
530088259480130e5bac5c2028e2c7c3e6d4c51a2satok** Licensed under the Apache License, Version 2.0 (the "License");
630088259480130e5bac5c2028e2c7c3e6d4c51a2satok** you may not use this file except in compliance with the License.
730088259480130e5bac5c2028e2c7c3e6d4c51a2satok** You may obtain a copy of the License at
830088259480130e5bac5c2028e2c7c3e6d4c51a2satok**
930088259480130e5bac5c2028e2c7c3e6d4c51a2satok**     http://www.apache.org/licenses/LICENSE-2.0
1030088259480130e5bac5c2028e2c7c3e6d4c51a2satok**
1130088259480130e5bac5c2028e2c7c3e6d4c51a2satok** Unless required by applicable law or agreed to in writing, software
1230088259480130e5bac5c2028e2c7c3e6d4c51a2satok** distributed under the License is distributed on an "AS IS" BASIS,
1330088259480130e5bac5c2028e2c7c3e6d4c51a2satok** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1430088259480130e5bac5c2028e2c7c3e6d4c51a2satok** See the License for the specific language governing permissions and
1530088259480130e5bac5c2028e2c7c3e6d4c51a2satok** limitations under the License.
1630088259480130e5bac5c2028e2c7c3e6d4c51a2satok*/
1730088259480130e5bac5c2028e2c7c3e6d4c51a2satok
1818c28f431eadc1b451ca25d14fd683db4b234838satok#include <string.h>
1918c28f431eadc1b451ca25d14fd683db4b234838satok
20e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok#define LOG_TAG "LatinIME: bigram_dictionary.cpp"
21e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok
2230088259480130e5bac5c2028e2c7c3e6d4c51a2satok#include "bigram_dictionary.h"
23588e2f296451a8eb074af9140d018b828105237fJean Chalard#include "binary_format.h"
2449ba135fdedb3c6b33ec915e91ecad682b7655b8Jean Chalard#include "bloom_filter.h"
253b088a2f365a9ce06f58243c83cb961ea2920b7eKen Wakasa#include "defines.h"
2649ba135fdedb3c6b33ec915e91ecad682b7655b8Jean Chalard#include "dictionary.h"
2730088259480130e5bac5c2028e2c7c3e6d4c51a2satok
2830088259480130e5bac5c2028e2c7c3e6d4c51a2satoknamespace latinime {
2930088259480130e5bac5c2028e2c7c3e6d4c51a2satok
3018c28f431eadc1b451ca25d14fd683db4b234838satokBigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
3118c28f431eadc1b451ca25d14fd683db4b234838satok        Dictionary *parentDictionary)
325b0761e6a94227d6ef788f589fb6edcd44ed791fJean Chalard    : DICT(dict), MAX_WORD_LENGTH(maxWordLength), mParentDictionary(parentDictionary) {
33de3070a71b39742c3ac7b613f45af88cc95c1205Ken Wakasa    if (DEBUG_DICT) {
349fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok        AKLOGI("BigramDictionary - constructor");
35de3070a71b39742c3ac7b613f45af88cc95c1205Ken Wakasa    }
3618c28f431eadc1b451ca25d14fd683db4b234838satok}
3718c28f431eadc1b451ca25d14fd683db4b234838satok
3818c28f431eadc1b451ca25d14fd683db4b234838satokBigramDictionary::~BigramDictionary() {
3918c28f431eadc1b451ca25d14fd683db4b234838satok}
4018c28f431eadc1b451ca25d14fd683db4b234838satok
4118c28f431eadc1b451ca25d14fd683db4b234838satokbool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequency) {
4218c28f431eadc1b451ca25d14fd683db4b234838satok    word[length] = 0;
4318c28f431eadc1b451ca25d14fd683db4b234838satok    if (DEBUG_DICT) {
44ce9efbff53ba04bd719c3c15d8a5a501ff12714fDoug Kwan#ifdef FLAG_DBG
4518c28f431eadc1b451ca25d14fd683db4b234838satok        char s[length + 1];
4618c28f431eadc1b451ca25d14fd683db4b234838satok        for (int i = 0; i <= length; i++) s[i] = word[i];
479fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok        AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
48787945bf1ef2e5449b5df16dfe15beeb0fd7cb71satok#endif
4918c28f431eadc1b451ca25d14fd683db4b234838satok    }
5018c28f431eadc1b451ca25d14fd683db4b234838satok
5118c28f431eadc1b451ca25d14fd683db4b234838satok    // Find the right insertion point
5218c28f431eadc1b451ca25d14fd683db4b234838satok    int insertAt = 0;
5318c28f431eadc1b451ca25d14fd683db4b234838satok    while (insertAt < mMaxBigrams) {
5418c28f431eadc1b451ca25d14fd683db4b234838satok        if (frequency > mBigramFreq[insertAt] || (mBigramFreq[insertAt] == frequency
5518c28f431eadc1b451ca25d14fd683db4b234838satok                && length < Dictionary::wideStrLen(mBigramChars + insertAt * MAX_WORD_LENGTH))) {
5618c28f431eadc1b451ca25d14fd683db4b234838satok            break;
5718c28f431eadc1b451ca25d14fd683db4b234838satok        }
5818c28f431eadc1b451ca25d14fd683db4b234838satok        insertAt++;
5918c28f431eadc1b451ca25d14fd683db4b234838satok    }
60de3070a71b39742c3ac7b613f45af88cc95c1205Ken Wakasa    if (DEBUG_DICT) {
619fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok        AKLOGI("Bigram: InsertAt -> %d maxBigrams: %d", insertAt, mMaxBigrams);
62de3070a71b39742c3ac7b613f45af88cc95c1205Ken Wakasa    }
6318c28f431eadc1b451ca25d14fd683db4b234838satok    if (insertAt < mMaxBigrams) {
6418c28f431eadc1b451ca25d14fd683db4b234838satok        memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]),
6518c28f431eadc1b451ca25d14fd683db4b234838satok               (char*) mBigramFreq + insertAt * sizeof(mBigramFreq[0]),
6618c28f431eadc1b451ca25d14fd683db4b234838satok               (mMaxBigrams - insertAt - 1) * sizeof(mBigramFreq[0]));
6718c28f431eadc1b451ca25d14fd683db4b234838satok        mBigramFreq[insertAt] = frequency;
6818c28f431eadc1b451ca25d14fd683db4b234838satok        memmove((char*) mBigramChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short),
6918c28f431eadc1b451ca25d14fd683db4b234838satok               (char*) mBigramChars + (insertAt    ) * MAX_WORD_LENGTH * sizeof(short),
7018c28f431eadc1b451ca25d14fd683db4b234838satok               (mMaxBigrams - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH);
7118c28f431eadc1b451ca25d14fd683db4b234838satok        unsigned short *dest = mBigramChars + (insertAt    ) * MAX_WORD_LENGTH;
7218c28f431eadc1b451ca25d14fd683db4b234838satok        while (length--) {
7318c28f431eadc1b451ca25d14fd683db4b234838satok            *dest++ = *word++;
7418c28f431eadc1b451ca25d14fd683db4b234838satok        }
7518c28f431eadc1b451ca25d14fd683db4b234838satok        *dest = 0; // NULL terminate
76de3070a71b39742c3ac7b613f45af88cc95c1205Ken Wakasa        if (DEBUG_DICT) {
779fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok            AKLOGI("Bigram: Added word at %d", insertAt);
78de3070a71b39742c3ac7b613f45af88cc95c1205Ken Wakasa        }
7918c28f431eadc1b451ca25d14fd683db4b234838satok        return true;
8018c28f431eadc1b451ca25d14fd683db4b234838satok    }
8118c28f431eadc1b451ca25d14fd683db4b234838satok    return false;
8218c28f431eadc1b451ca25d14fd683db4b234838satok}
8318c28f431eadc1b451ca25d14fd683db4b234838satok
84588e2f296451a8eb074af9140d018b828105237fJean Chalard/* Parameters :
85588e2f296451a8eb074af9140d018b828105237fJean Chalard * prevWord: the word before, the one for which we need to look up bigrams.
86588e2f296451a8eb074af9140d018b828105237fJean Chalard * prevWordLength: its length.
87588e2f296451a8eb074af9140d018b828105237fJean Chalard * codes: what user typed, in the same format as for UnigramDictionary::getSuggestions.
88588e2f296451a8eb074af9140d018b828105237fJean Chalard * codesSize: the size of the codes array.
89588e2f296451a8eb074af9140d018b828105237fJean Chalard * bigramChars: an array for output, at the same format as outwords for getSuggestions.
90588e2f296451a8eb074af9140d018b828105237fJean Chalard * bigramFreq: an array to output frequencies.
91588e2f296451a8eb074af9140d018b828105237fJean Chalard * maxWordLength: the maximum size of a word.
92588e2f296451a8eb074af9140d018b828105237fJean Chalard * maxBigrams: the maximum number of bigrams fitting in the bigramChars array.
93588e2f296451a8eb074af9140d018b828105237fJean Chalard * This method returns the number of bigrams this word has, for backward compatibility.
94588e2f296451a8eb074af9140d018b828105237fJean Chalard * Note: this is not the number of bigrams output in the array, which is the number of
95588e2f296451a8eb074af9140d018b828105237fJean Chalard * bigrams this word has WHOSE first letter also matches the letter the user typed.
96588e2f296451a8eb074af9140d018b828105237fJean Chalard * TODO: this may not be a sensible thing to do. It makes sense when the bigrams are
97588e2f296451a8eb074af9140d018b828105237fJean Chalard * used to match the first letter of the second word, but once the user has typed more
98588e2f296451a8eb074af9140d018b828105237fJean Chalard * and the bigrams are used to boost unigram result scores, it makes little sense to
99588e2f296451a8eb074af9140d018b828105237fJean Chalard * reduce their scope to the ones that match the first letter.
100588e2f296451a8eb074af9140d018b828105237fJean Chalard */
101522a04ea5b249d0af556647d2abcad57e5b99b4fJean Chalardint BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *codes,
10218c28f431eadc1b451ca25d14fd683db4b234838satok        int codesSize, unsigned short *bigramChars, int *bigramFreq, int maxWordLength,
1036ba8de2a608dfe4865b0b59a753f2d2abbedeeffsatok        int maxBigrams) {
104588e2f296451a8eb074af9140d018b828105237fJean Chalard    // TODO: remove unused arguments, and refrain from storing stuff in members of this class
105588e2f296451a8eb074af9140d018b828105237fJean Chalard    // TODO: have "in" arguments before "out" ones, and make out args explicit in the name
10618c28f431eadc1b451ca25d14fd683db4b234838satok    mBigramFreq = bigramFreq;
10718c28f431eadc1b451ca25d14fd683db4b234838satok    mBigramChars = bigramChars;
10818c28f431eadc1b451ca25d14fd683db4b234838satok    mInputCodes = codes;
10918c28f431eadc1b451ca25d14fd683db4b234838satok    mMaxBigrams = maxBigrams;
11018c28f431eadc1b451ca25d14fd683db4b234838satok
111588e2f296451a8eb074af9140d018b828105237fJean Chalard    const uint8_t* const root = DICT;
112351864b38a2a19a3b591efe3ed58a5998bb4c79dJean Chalard    int pos = getBigramListPositionForWord(prevWord, prevWordLength);
113351864b38a2a19a3b591efe3ed58a5998bb4c79dJean Chalard    // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
114ee396df162b31cff9763dd10a7da2b47aef10c01Jean Chalard    if (0 == pos) return 0;
115588e2f296451a8eb074af9140d018b828105237fJean Chalard    int bigramFlags;
116588e2f296451a8eb074af9140d018b828105237fJean Chalard    int bigramCount = 0;
117588e2f296451a8eb074af9140d018b828105237fJean Chalard    do {
118588e2f296451a8eb074af9140d018b828105237fJean Chalard        bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
119588e2f296451a8eb074af9140d018b828105237fJean Chalard        uint16_t bigramBuffer[MAX_WORD_LENGTH];
12062cd919dca9ef85cdd045b539b3ef85e921c6e4cJean Chalard        int unigramFreq = 0;
121588e2f296451a8eb074af9140d018b828105237fJean Chalard        const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
122588e2f296451a8eb074af9140d018b828105237fJean Chalard                &pos);
123588e2f296451a8eb074af9140d018b828105237fJean Chalard        const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH,
124e308459531a4dd64ee80aa76e351725180ad856eJean Chalard                bigramBuffer, &unigramFreq);
125588e2f296451a8eb074af9140d018b828105237fJean Chalard
126ad290d6505247171e1e8437446c6f5d148a01778Jean Chalard        // codesSize == 0 means we are trying to find bigram predictions.
127ad290d6505247171e1e8437446c6f5d148a01778Jean Chalard        if (codesSize < 1 || checkFirstCharacter(bigramBuffer)) {
128e308459531a4dd64ee80aa76e351725180ad856eJean Chalard            const int bigramFreq = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
12946fe49fc05df02228222b8a8e49d4cc8e1f0ea3cJean Chalard            // Due to space constraints, the frequency for bigrams is approximate - the lower the
13046fe49fc05df02228222b8a8e49d4cc8e1f0ea3cJean Chalard            // unigram frequency, the worse the precision. The theoritical maximum error in
13146fe49fc05df02228222b8a8e49d4cc8e1f0ea3cJean Chalard            // resulting frequency is 8 - although in the practice it's never bigger than 3 or 4
13246fe49fc05df02228222b8a8e49d4cc8e1f0ea3cJean Chalard            // in very bad cases. This means that sometimes, we'll see some bigrams interverted
13346fe49fc05df02228222b8a8e49d4cc8e1f0ea3cJean Chalard            // here, but it can't get too bad.
134e308459531a4dd64ee80aa76e351725180ad856eJean Chalard            const int frequency =
135e308459531a4dd64ee80aa76e351725180ad856eJean Chalard                    BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreq);
1369715cc4ed58defe71906de6fae3c0bbfac05a80cJean Chalard            if (addWordBigram(bigramBuffer, length, frequency)) {
1379715cc4ed58defe71906de6fae3c0bbfac05a80cJean Chalard                ++bigramCount;
1389715cc4ed58defe71906de6fae3c0bbfac05a80cJean Chalard            }
13918c28f431eadc1b451ca25d14fd683db4b234838satok        }
1404d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang    } while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
141588e2f296451a8eb074af9140d018b828105237fJean Chalard    return bigramCount;
14230088259480130e5bac5c2028e2c7c3e6d4c51a2satok}
14318c28f431eadc1b451ca25d14fd683db4b234838satok
144ee396df162b31cff9763dd10a7da2b47aef10c01Jean Chalard// Returns a pointer to the start of the bigram list.
145ee396df162b31cff9763dd10a7da2b47aef10c01Jean Chalard// If the word is not found or has no bigrams, this function returns 0.
146351864b38a2a19a3b591efe3ed58a5998bb4c79dJean Chalardint BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
147351864b38a2a19a3b591efe3ed58a5998bb4c79dJean Chalard        const int prevWordLength) {
1481ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard    if (0 >= prevWordLength) return 0;
149351864b38a2a19a3b591efe3ed58a5998bb4c79dJean Chalard    const uint8_t* const root = DICT;
1509c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard    int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength);
1519c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard
1529c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard    if (NOT_VALID_WORD == pos) return 0;
1539c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard    const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
1549c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard    if (0 == (flags & UnigramDictionary::FLAG_HAS_BIGRAMS)) return 0;
1559c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard    if (0 == (flags & UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS)) {
1569c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard        BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
1579c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard    } else {
1589c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard        pos = BinaryFormat::skipOtherCharacters(root, pos);
1599c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard    }
1609c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard    pos = BinaryFormat::skipFrequency(flags, pos);
161402b0570505c7ea1389e1c153e5db0300568ce26Jean Chalard    pos = BinaryFormat::skipChildrenPosition(flags, pos);
1629c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard    pos = BinaryFormat::skipShortcuts(root, flags, pos);
1639c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard    return pos;
1649c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard}
1659c2a96aa6cb6d8c1f7a559dbd7051302cfc6150bJean Chalard
166f1634c872c57a5e8d0a861cda299fdbd98740e79Jean Chalardvoid BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord,
167f1634c872c57a5e8d0a861cda299fdbd98740e79Jean Chalard        const int prevWordLength, std::map<int, int> *map, uint8_t *filter) {
168f1634c872c57a5e8d0a861cda299fdbd98740e79Jean Chalard    memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE);
1691ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard    const uint8_t* const root = DICT;
1701ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard    int pos = getBigramListPositionForWord(prevWord, prevWordLength);
1711ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard    if (0 == pos) return;
1721ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard
1731ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard    int bigramFlags;
1741ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard    do {
1751ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard        bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
1761ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard        const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
1771ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard        const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
1781ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard                &pos);
1791ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard        (*map)[bigramPos] = frequency;
180f1634c872c57a5e8d0a861cda299fdbd98740e79Jean Chalard        setInFilter(filter, bigramPos);
1811ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard    } while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
1821ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard}
1831ff8dc47be1734555af1c0c011ea6cf72b395a43Jean Chalard
18418c28f431eadc1b451ca25d14fd683db4b234838satokbool BigramDictionary::checkFirstCharacter(unsigned short *word) {
18518c28f431eadc1b451ca25d14fd683db4b234838satok    // Checks whether this word starts with same character or neighboring characters of
18618c28f431eadc1b451ca25d14fd683db4b234838satok    // what user typed.
18718c28f431eadc1b451ca25d14fd683db4b234838satok
18818c28f431eadc1b451ca25d14fd683db4b234838satok    int *inputCodes = mInputCodes;
18918c28f431eadc1b451ca25d14fd683db4b234838satok    int maxAlt = MAX_ALTERNATIVES;
190aeda8a7798fa1a028998cf4c81397de8a06610a9Tom Ouyang    const unsigned short firstBaseChar = toBaseLowerCase(*word);
19118c28f431eadc1b451ca25d14fd683db4b234838satok    while (maxAlt > 0) {
192aeda8a7798fa1a028998cf4c81397de8a06610a9Tom Ouyang        if (toBaseLowerCase(*inputCodes) == firstBaseChar) {
19318c28f431eadc1b451ca25d14fd683db4b234838satok            return true;
19418c28f431eadc1b451ca25d14fd683db4b234838satok        }
19518c28f431eadc1b451ca25d14fd683db4b234838satok        inputCodes++;
19618c28f431eadc1b451ca25d14fd683db4b234838satok        maxAlt--;
19718c28f431eadc1b451ca25d14fd683db4b234838satok    }
19818c28f431eadc1b451ca25d14fd683db4b234838satok    return false;
19918c28f431eadc1b451ca25d14fd683db4b234838satok}
20018c28f431eadc1b451ca25d14fd683db4b234838satok
2014d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyangbool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
2024d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang        int length2) {
2034d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang    const uint8_t* const root = DICT;
2044d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang    int pos = getBigramListPositionForWord(word1, length1);
2054d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang    // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
2064d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang    if (0 == pos) return false;
2074d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang    int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2);
2084d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang    if (NOT_VALID_WORD == nextWordPos) return false;
2094d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang    int bigramFlags;
2104d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang    do {
2114d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang        bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
2124d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang        const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
2134d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang                &pos);
2144d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang        if (bigramPos == nextWordPos) {
2154d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang            return true;
2164d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang        }
2174d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang    } while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
2184d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang    return false;
2194d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang}
2204d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang
22130088259480130e5bac5c2028e2c7c3e6d4c51a2satok// TODO: Move functions related to bigram to here
22230088259480130e5bac5c2028e2c7c3e6d4c51a2satok} // namespace latinime
223