digraph_utils.cpp revision 76e579c7caf2ef04f440be21c27377fe0b4150ff
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/core/dictionary/digraph_utils.h"
18
19#include <cstdlib>
20
21#include "defines.h"
22#include "suggest/core/policy/dictionary_header_structure_policy.h"
23#include "utils/char_utils.h"
24
25namespace latinime {
26
27const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] =
28        { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
29        { 'o', 'e', 0x00F6 },   // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
30        { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
31const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] =
32        { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE
33        { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE
34const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
35        { DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES };
36
37/* static */ bool DigraphUtils::hasDigraphForCodePoint(
38        const DictionaryHeaderStructurePolicy *const headerPolicy,
39        const int compositeGlyphCodePoint) {
40    const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(headerPolicy);
41    if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) {
42        return true;
43    }
44    return false;
45}
46
47// Returns the digraph type associated with the given dictionary.
48/* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary(
49        const DictionaryHeaderStructurePolicy *const headerPolicy) {
50    if (headerPolicy->requiresGermanUmlautProcessing()) {
51        return DIGRAPH_TYPE_GERMAN_UMLAUT;
52    }
53    if (headerPolicy->requiresFrenchLigatureProcessing()) {
54        return DIGRAPH_TYPE_FRENCH_LIGATURES;
55    }
56    return DIGRAPH_TYPE_NONE;
57}
58
59// Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index
60// (which specifies the first or second codepoint in the digraph).
61/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
62        const DigraphCodePointIndex digraphCodePointIndex) {
63    if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) {
64        return NOT_A_CODE_POINT;
65    }
66    const DigraphUtils::digraph_t *const digraph =
67            DigraphUtils::getDigraphForCodePoint(compositeGlyphCodePoint);
68    if (!digraph) {
69        return NOT_A_CODE_POINT;
70    }
71    if (digraphCodePointIndex == FIRST_DIGRAPH_CODEPOINT) {
72        return digraph->first;
73    } else if (digraphCodePointIndex == SECOND_DIGRAPH_CODEPOINT) {
74        return digraph->second;
75    }
76    ASSERT(false);
77    return NOT_A_CODE_POINT;
78}
79
80// Retrieves the set of all digraphs associated with the given digraph type.
81// Returns the size of the digraph array, or 0 if none exist.
82/* static */ int DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(
83        const DigraphUtils::DigraphType digraphType,
84        const DigraphUtils::digraph_t **const digraphs) {
85    if (digraphType == DigraphUtils::DIGRAPH_TYPE_GERMAN_UMLAUT) {
86        *digraphs = GERMAN_UMLAUT_DIGRAPHS;
87        return NELEMS(GERMAN_UMLAUT_DIGRAPHS);
88    }
89    if (digraphType == DIGRAPH_TYPE_FRENCH_LIGATURES) {
90        *digraphs = FRENCH_LIGATURES_DIGRAPHS;
91        return NELEMS(FRENCH_LIGATURES_DIGRAPHS);
92    }
93    return 0;
94}
95
96/**
97 * Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
98 * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
99 */
100/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint(
101        const int compositeGlyphCodePoint) {
102    for (size_t i = 0; i < NELEMS(USED_DIGRAPH_TYPES); i++) {
103        const DigraphUtils::digraph_t *const digraph = getDigraphForDigraphTypeAndCodePoint(
104                USED_DIGRAPH_TYPES[i], compositeGlyphCodePoint);
105        if (digraph) {
106            return digraph;
107        }
108    }
109    return 0;
110}
111
112/**
113 * Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
114 * digraphType: the type of digraphs supported.
115 * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
116 */
117/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint(
118        const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) {
119    const DigraphUtils::digraph_t *digraphs = 0;
120    const int compositeGlyphLowerCodePoint = CharUtils::toLowerCase(compositeGlyphCodePoint);
121    const int digraphsSize =
122            DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(digraphType, &digraphs);
123    for (int i = 0; i < digraphsSize; i++) {
124        if (digraphs[i].compositeGlyph == compositeGlyphLowerCodePoint) {
125            return &digraphs[i];
126        }
127    }
128    return 0;
129}
130
131} // namespace latinime
132