digraph_utils.cpp revision bd0d1afdb28a28e2ddac1409208c59ba64350399
1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "suggest/core/dictionary/digraph_utils.h" 18 19#include <cstdlib> 20 21#include "defines.h" 22#include "suggest/core/dictionary/binary_dictionary_header.h" 23#include "utils/char_utils.h" 24 25namespace latinime { 26 27const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] = 28 { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS 29 { 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS 30 { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS 31const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] = 32 { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE 33 { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE 34const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = 35 { DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES }; 36 37/* static */ bool DigraphUtils::hasDigraphForCodePoint( 38 const BinaryDictionaryHeader *const header, const int compositeGlyphCodePoint) { 39 const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(header); 40 if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) { 41 return true; 42 } 43 return false; 44} 45 46// Returns the digraph type associated with the given dictionary. 47/* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary( 48 const BinaryDictionaryHeader *const header) { 49 if (header->requiresGermanUmlautProcessing()) { 50 return DIGRAPH_TYPE_GERMAN_UMLAUT; 51 } 52 if (header->requiresFrenchLigatureProcessing()) { 53 return DIGRAPH_TYPE_FRENCH_LIGATURES; 54 } 55 return DIGRAPH_TYPE_NONE; 56} 57 58// Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index 59// (which specifies the first or second codepoint in the digraph). 60/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int compositeGlyphCodePoint, 61 const DigraphCodePointIndex digraphCodePointIndex) { 62 if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) { 63 return NOT_A_CODE_POINT; 64 } 65 const DigraphUtils::digraph_t *const digraph = 66 DigraphUtils::getDigraphForCodePoint(compositeGlyphCodePoint); 67 if (!digraph) { 68 return NOT_A_CODE_POINT; 69 } 70 if (digraphCodePointIndex == FIRST_DIGRAPH_CODEPOINT) { 71 return digraph->first; 72 } else if (digraphCodePointIndex == SECOND_DIGRAPH_CODEPOINT) { 73 return digraph->second; 74 } 75 ASSERT(false); 76 return NOT_A_CODE_POINT; 77} 78 79// Retrieves the set of all digraphs associated with the given digraph type. 80// Returns the size of the digraph array, or 0 if none exist. 81/* static */ int DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize( 82 const DigraphUtils::DigraphType digraphType, 83 const DigraphUtils::digraph_t **const digraphs) { 84 if (digraphType == DigraphUtils::DIGRAPH_TYPE_GERMAN_UMLAUT) { 85 *digraphs = GERMAN_UMLAUT_DIGRAPHS; 86 return NELEMS(GERMAN_UMLAUT_DIGRAPHS); 87 } 88 if (digraphType == DIGRAPH_TYPE_FRENCH_LIGATURES) { 89 *digraphs = FRENCH_LIGATURES_DIGRAPHS; 90 return NELEMS(FRENCH_LIGATURES_DIGRAPHS); 91 } 92 return 0; 93} 94 95/** 96 * Returns the digraph for the input composite glyph codepoint, or 0 if none exists. 97 * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint. 98 */ 99/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint( 100 const int compositeGlyphCodePoint) { 101 for (size_t i = 0; i < NELEMS(USED_DIGRAPH_TYPES); i++) { 102 const DigraphUtils::digraph_t *const digraph = getDigraphForDigraphTypeAndCodePoint( 103 USED_DIGRAPH_TYPES[i], compositeGlyphCodePoint); 104 if (digraph) { 105 return digraph; 106 } 107 } 108 return 0; 109} 110 111/** 112 * Returns the digraph for the input composite glyph codepoint, or 0 if none exists. 113 * digraphType: the type of digraphs supported. 114 * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint. 115 */ 116/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint( 117 const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) { 118 const DigraphUtils::digraph_t *digraphs = 0; 119 const int compositeGlyphLowerCodePoint = CharUtils::toLowerCase(compositeGlyphCodePoint); 120 const int digraphsSize = 121 DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(digraphType, &digraphs); 122 for (int i = 0; i < digraphsSize; i++) { 123 if (digraphs[i].compositeGlyph == compositeGlyphLowerCodePoint) { 124 return &digraphs[i]; 125 } 126 } 127 return 0; 128} 129 130} // namespace latinime 131