16d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka/*
26d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * Copyright (C) 2015 The Android Open Source Project
36d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka *
46d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * Licensed under the Apache License, Version 2.0 (the "License");
56d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * you may not use this file except in compliance with the License.
66d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * You may obtain a copy of the License at
76d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka *
86d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka *      http://www.apache.org/licenses/LICENSE-2.0
96d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka *
106d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * Unless required by applicable law or agreed to in writing, software
116d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * distributed under the License is distributed on an "AS IS" BASIS,
126d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
136d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * See the License for the specific language governing permissions and
146d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * limitations under the License.
156d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka */
166d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka
176d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka#define LOG_TAG "Minikin"
186d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka
196d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka#include "FontLanguageListCache.h"
206d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka
21198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <unicode/uloc.h>
22198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <unordered_set>
236d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka
2439ab40115fae6d0c948e435233b3dd997ee7d8e5Mark Salyzyn#include <log/log.h>
25555d84c6f98eafcbe677cdcb8e9605760acd8ce5Mark Salyzyn
26198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include "FontLanguage.h"
27555d84c6f98eafcbe677cdcb8e9605760acd8ce5Mark Salyzyn#include "MinikinInternal.h"
286d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka
2914e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonakanamespace minikin {
306d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka
31fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonakaconst uint32_t FontLanguageListCache::kEmptyListId;
32fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka
33198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka// Returns the text length of output.
34198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakastatic size_t toLanguageTag(char* output, size_t outSize, const std::string& locale) {
35198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    output[0] = '\0';
36198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    if (locale.empty()) {
37198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        return 0;
38198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
39198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
40198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    size_t outLength = 0;
41198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    UErrorCode uErr = U_ZERO_ERROR;
42198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    outLength = uloc_canonicalize(locale.c_str(), output, outSize, &uErr);
43198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    if (U_FAILURE(uErr)) {
44198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        // unable to build a proper language identifier
45198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        ALOGD("uloc_canonicalize(\"%s\") failed: %s", locale.c_str(), u_errorName(uErr));
46198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        output[0] = '\0';
47198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        return 0;
48198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
49198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
50198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    // Preserve "und" and "und-****" since uloc_addLikelySubtags changes "und" to "en-Latn-US".
51198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    if (strncmp(output, "und", 3) == 0 &&
52198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        (outLength == 3 || (outLength == 8 && output[3]  == '_'))) {
53198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        return outLength;
54198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
55198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
56198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    char likelyChars[ULOC_FULLNAME_CAPACITY];
57198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    uErr = U_ZERO_ERROR;
58198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    uloc_addLikelySubtags(output, likelyChars, ULOC_FULLNAME_CAPACITY, &uErr);
59198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    if (U_FAILURE(uErr)) {
60198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        // unable to build a proper language identifier
61198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        ALOGD("uloc_addLikelySubtags(\"%s\") failed: %s", output, u_errorName(uErr));
62198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        output[0] = '\0';
63198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        return 0;
64198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
65198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
66198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    uErr = U_ZERO_ERROR;
67198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    outLength = uloc_toLanguageTag(likelyChars, output, outSize, FALSE, &uErr);
68198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    if (U_FAILURE(uErr)) {
69198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        // unable to build a proper language identifier
70198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        ALOGD("uloc_toLanguageTag(\"%s\") failed: %s", likelyChars, u_errorName(uErr));
71198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        output[0] = '\0';
72198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        return 0;
73198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
74198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#ifdef VERBOSE_DEBUG
75198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    ALOGD("ICU normalized '%s' to '%s'", locale.c_str(), output);
76198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#endif
77198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    return outLength;
78198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
79198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
80f3afe92def0fff022889fd036d68451223aac146Seigo Nonakastatic std::vector<FontLanguage> parseLanguageList(const std::string& input) {
81f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    std::vector<FontLanguage> result;
82198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    size_t currentIdx = 0;
83198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    size_t commaLoc = 0;
84198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    char langTag[ULOC_FULLNAME_CAPACITY];
85198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    std::unordered_set<uint64_t> seen;
86198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    std::string locale(input.size(), 0);
87198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
88198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    while ((commaLoc = input.find_first_of(',', currentIdx)) != std::string::npos) {
89198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        locale.assign(input, currentIdx, commaLoc - currentIdx);
90198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        currentIdx = commaLoc + 1;
91198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        size_t length = toLanguageTag(langTag, ULOC_FULLNAME_CAPACITY, locale);
92198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        FontLanguage lang(langTag, length);
93198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        uint64_t identifier = lang.getIdentifier();
94198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        if (!lang.isUnsupported() && seen.count(identifier) == 0) {
95198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            result.push_back(lang);
966f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka            if (result.size() == FONT_LANGUAGES_LIMIT) {
976f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka              break;
986f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka            }
99198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            seen.insert(identifier);
100198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        }
101198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
1026f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka    if (result.size() < FONT_LANGUAGES_LIMIT) {
1036f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka      locale.assign(input, currentIdx, input.size() - currentIdx);
1046f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka      size_t length = toLanguageTag(langTag, ULOC_FULLNAME_CAPACITY, locale);
1056f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka      FontLanguage lang(langTag, length);
1066f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka      uint64_t identifier = lang.getIdentifier();
1076f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka      if (!lang.isUnsupported() && seen.count(identifier) == 0) {
1086f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka          result.push_back(lang);
1096f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka      }
110198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
111198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    return result;
112198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
113198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
114fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka// static
115fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonakauint32_t FontLanguageListCache::getId(const std::string& languages) {
116fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    FontLanguageListCache* inst = FontLanguageListCache::getInstance();
117fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    std::unordered_map<std::string, uint32_t>::const_iterator it =
118fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka            inst->mLanguageListLookupTable.find(languages);
119fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    if (it != inst->mLanguageListLookupTable.end()) {
120fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka        return it->second;
1216d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka    }
1226d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka
123fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    // Given language list is not in cache. Insert it and return newly assigned ID.
124fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    const uint32_t nextId = inst->mLanguageLists.size();
125fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    FontLanguages fontLanguages(parseLanguageList(languages));
126fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    if (fontLanguages.empty()) {
127fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka        return kEmptyListId;
128198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
129fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    inst->mLanguageLists.push_back(std::move(fontLanguages));
130fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    inst->mLanguageListLookupTable.insert(std::make_pair(languages, nextId));
131fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    return nextId;
1326d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka}
1336d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka
1346d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka// static
135fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonakaconst FontLanguages& FontLanguageListCache::getById(uint32_t id) {
136fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    FontLanguageListCache* inst = FontLanguageListCache::getInstance();
137fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    LOG_ALWAYS_FATAL_IF(id >= inst->mLanguageLists.size(), "Lookup by unknown language list ID.");
138fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    return inst->mLanguageLists[id];
1396d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka}
1406d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka
1416d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka// static
142fd4124c53399581dd94eac5a9749bc07b474a294Seigo NonakaFontLanguageListCache* FontLanguageListCache::getInstance() {
143fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    assertMinikinLocked();
144fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    static FontLanguageListCache* instance = nullptr;
145fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    if (instance == nullptr) {
146fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka        instance = new FontLanguageListCache();
147fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka
148fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka        // Insert an empty language list for mapping default language list to kEmptyListId.
149fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka        // The default language list has only one FontLanguage and it is the unsupported language.
150fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka        instance->mLanguageLists.push_back(FontLanguages());
151fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka        instance->mLanguageListLookupTable.insert(std::make_pair("", kEmptyListId));
152fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    }
153fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka    return instance;
1546d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka}
1556d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka
15614e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonaka}  // namespace minikin
157