1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <gtest/gtest.h>
18#include <unicode/utf.h>
19#include <cstdlib>
20
21// src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null.
22// Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
23void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
24        size_t* offset) {
25    size_t input_ix = 0;
26    size_t output_ix = 0;
27    bool seen_offset = false;
28
29    while (src[input_ix] != 0) {
30        switch (src[input_ix]) {
31        case '\'':
32            // single ASCII char
33            ASSERT_LT(src[input_ix], 0x80);
34            input_ix++;
35            ASSERT_NE(src[input_ix], 0);
36            ASSERT_LT(output_ix, buf_size);
37            buf[output_ix++] = (uint16_t)src[input_ix++];
38            ASSERT_EQ(src[input_ix], '\'');
39            input_ix++;
40            break;
41        case 'u':
42        case 'U': {
43            // Unicode codepoint in hex syntax
44            input_ix++;
45            ASSERT_EQ(src[input_ix], '+');
46            input_ix++;
47            char* endptr = (char*)src + input_ix;
48            unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
49            size_t num_hex_digits = endptr - (src + input_ix);
50            ASSERT_GE(num_hex_digits, 4u);  // also triggers on invalid number syntax, digits = 0
51            ASSERT_LE(num_hex_digits, 6u);
52            ASSERT_LE(codepoint, 0x10FFFFu);
53            input_ix += num_hex_digits;
54            if (U16_LENGTH(codepoint) == 1) {
55                ASSERT_LE(output_ix + 1, buf_size);
56                buf[output_ix++] = codepoint;
57            } else {
58                // UTF-16 encoding
59                ASSERT_LE(output_ix + 2, buf_size);
60                buf[output_ix++] = U16_LEAD(codepoint);
61                buf[output_ix++] = U16_TRAIL(codepoint);
62            }
63            break;
64        }
65        case ' ':
66            input_ix++;
67            break;
68        case '|':
69            ASSERT_FALSE(seen_offset);
70            ASSERT_NE(offset, nullptr);
71            *offset = output_ix;
72            seen_offset = true;
73            input_ix++;
74            break;
75        default:
76            FAIL();  // unexpected character
77        }
78    }
79    ASSERT_NE(result_size, nullptr);
80    *result_size = output_ix;
81    ASSERT_TRUE(seen_offset || offset == nullptr);
82}
83
84TEST(UnicodeUtils, parse) {
85    const size_t BUF_SIZE = 256;
86    uint16_t buf[BUF_SIZE];
87    size_t offset;
88    size_t size;
89    ParseUnicode(buf, BUF_SIZE, "U+000D U+1F431 | 'a'", &size, &offset);
90    EXPECT_EQ(size, 4u);
91    EXPECT_EQ(offset, 3u);
92    EXPECT_EQ(buf[0], 0x000D);
93    EXPECT_EQ(buf[1], 0xD83D);
94    EXPECT_EQ(buf[2], 0xDC31);
95    EXPECT_EQ(buf[3], 'a');
96}
97