1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <unicode/utf.h>
18#include <unicode/utf8.h>
19#include <cstdlib>
20#include <cutils/log.h>
21#include <vector>
22#include <string>
23
24namespace minikin {
25
26// src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null.
27// Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
28void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
29        size_t* offset) {
30    size_t input_ix = 0;
31    size_t output_ix = 0;
32    bool seen_offset = false;
33
34    while (src[input_ix] != 0) {
35        switch (src[input_ix]) {
36        case '\'':
37            // single ASCII char
38            LOG_ALWAYS_FATAL_IF(static_cast<uint8_t>(src[input_ix]) >= 0x80);
39            input_ix++;
40            LOG_ALWAYS_FATAL_IF(src[input_ix] == 0);
41            LOG_ALWAYS_FATAL_IF(output_ix >= buf_size);
42            buf[output_ix++] = (uint16_t)src[input_ix++];
43            LOG_ALWAYS_FATAL_IF(src[input_ix] != '\'');
44            input_ix++;
45            break;
46        case 'u':
47        case 'U': {
48            // Unicode codepoint in hex syntax
49            input_ix++;
50            LOG_ALWAYS_FATAL_IF(src[input_ix] != '+');
51            input_ix++;
52            char* endptr = (char*)src + input_ix;
53            unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
54            size_t num_hex_digits = endptr - (src + input_ix);
55
56            // also triggers on invalid number syntax, digits = 0
57            LOG_ALWAYS_FATAL_IF(num_hex_digits < 4u);
58            LOG_ALWAYS_FATAL_IF(num_hex_digits > 6u);
59            LOG_ALWAYS_FATAL_IF(codepoint > 0x10FFFFu);
60            input_ix += num_hex_digits;
61            if (U16_LENGTH(codepoint) == 1) {
62                LOG_ALWAYS_FATAL_IF(output_ix + 1 > buf_size);
63                buf[output_ix++] = codepoint;
64            } else {
65                // UTF-16 encoding
66                LOG_ALWAYS_FATAL_IF(output_ix + 2 > buf_size);
67                buf[output_ix++] = U16_LEAD(codepoint);
68                buf[output_ix++] = U16_TRAIL(codepoint);
69            }
70            break;
71        }
72        case ' ':
73            input_ix++;
74            break;
75        case '|':
76            LOG_ALWAYS_FATAL_IF(seen_offset);
77            LOG_ALWAYS_FATAL_IF(offset == nullptr);
78            *offset = output_ix;
79            seen_offset = true;
80            input_ix++;
81            break;
82        default:
83            LOG_ALWAYS_FATAL("Unexpected Character");
84        }
85    }
86    LOG_ALWAYS_FATAL_IF(result_size == nullptr);
87    *result_size = output_ix;
88    LOG_ALWAYS_FATAL_IF(!seen_offset && offset != nullptr);
89}
90
91std::vector<uint16_t> parseUnicodeStringWithOffset(const std::string& in, size_t* offset) {
92    std::unique_ptr<uint16_t[]> buffer(new uint16_t[in.size()]);
93    size_t result_size = 0;
94    ParseUnicode(buffer.get(), in.size(), in.c_str(), &result_size, offset);
95    return std::vector<uint16_t>(buffer.get(), buffer.get() + result_size);
96}
97
98std::vector<uint16_t> parseUnicodeString(const std::string& in) {
99    return parseUnicodeStringWithOffset(in, nullptr);
100}
101
102std::vector<uint16_t> utf8ToUtf16(const std::string& text) {
103    std::vector<uint16_t> result;
104    int32_t i = 0;
105    const int32_t textLength = static_cast<int32_t>(text.size());
106    uint32_t c = 0;
107    while (i < textLength) {
108        U8_NEXT(text.c_str(), i, textLength, c);
109        if (U16_LENGTH(c) == 1) {
110            result.push_back(c);
111        } else {
112            result.push_back(U16_LEAD(c));
113            result.push_back(U16_TRAIL(c));
114        }
115    }
116    return result;
117}
118
119}  // namespace minikin
120