1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "text/Unicode.h"
18
19#include <algorithm>
20#include <array>
21
22#include "text/Utf8Iterator.h"
23
24using ::android::StringPiece;
25
26namespace aapt {
27namespace text {
28
29namespace {
30
31struct CharacterProperties {
32  enum : uint32_t {
33    kXidStart = 1 << 0,
34    kXidContinue = 1 << 1,
35  };
36
37  char32_t first_char;
38  char32_t last_char;
39  uint32_t properties;
40};
41
42// Incude the generated data table.
43#include "text/Unicode_data.cpp"
44
45bool CompareCharacterProperties(const CharacterProperties& a, char32_t codepoint) {
46  return a.last_char < codepoint;
47}
48
49uint32_t FindCharacterProperties(char32_t codepoint) {
50  const auto iter_end = sCharacterProperties.end();
51  const auto iter = std::lower_bound(sCharacterProperties.begin(), iter_end, codepoint,
52                                     CompareCharacterProperties);
53  if (iter != iter_end && codepoint >= iter->first_char) {
54    return iter->properties;
55  }
56  return 0u;
57}
58
59}  // namespace
60
61bool IsXidStart(char32_t codepoint) {
62  return FindCharacterProperties(codepoint) & CharacterProperties::kXidStart;
63}
64
65bool IsXidContinue(char32_t codepoint) {
66  return FindCharacterProperties(codepoint) & CharacterProperties::kXidContinue;
67}
68
69// Hardcode the White_Space characters since they are few and the external/icu project doesn't
70// list them as data files to parse.
71// Sourced from http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
72bool IsWhitespace(char32_t codepoint) {
73  return (codepoint >= 0x0009 && codepoint <= 0x000d) || (codepoint == 0x0020) ||
74         (codepoint == 0x0085) || (codepoint == 0x00a0) || (codepoint == 0x1680) ||
75         (codepoint >= 0x2000 && codepoint <= 0x200a) || (codepoint == 0x2028) ||
76         (codepoint == 0x2029) || (codepoint == 0x202f) || (codepoint == 0x205f) ||
77         (codepoint == 0x3000);
78}
79
80bool IsJavaIdentifier(const StringPiece& str) {
81  Utf8Iterator iter(str);
82
83  // Check the first character.
84  if (!iter.HasNext()) {
85    return false;
86  }
87
88  const char32_t first_codepoint = iter.Next();
89  if (!IsXidStart(first_codepoint) && first_codepoint != U'_' && first_codepoint != U'$') {
90    return false;
91  }
92
93  while (iter.HasNext()) {
94    const char32_t codepoint = iter.Next();
95    if (!IsXidContinue(codepoint) && codepoint != U'$') {
96      return false;
97    }
98  }
99  return true;
100}
101
102bool IsValidResourceEntryName(const StringPiece& str) {
103  Utf8Iterator iter(str);
104
105  // Check the first character.
106  if (!iter.HasNext()) {
107    return false;
108  }
109
110  // Resources are allowed to start with '_'
111  const char32_t first_codepoint = iter.Next();
112  if (!IsXidStart(first_codepoint) && first_codepoint != U'_') {
113    return false;
114  }
115
116  while (iter.HasNext()) {
117    const char32_t codepoint = iter.Next();
118    if (!IsXidContinue(codepoint) && codepoint != U'.' && codepoint != U'-') {
119      return false;
120    }
121  }
122  return true;
123}
124
125}  // namespace text
126}  // namespace aapt
127