1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "BigBuffer.h"
18#include "Maybe.h"
19#include "StringPiece.h"
20#include "Util.h"
21
22#include <algorithm>
23#include <ostream>
24#include <string>
25#include <utils/Unicode.h>
26#include <vector>
27
28namespace aapt {
29namespace util {
30
31constexpr const char16_t* kSchemaAuto = u"http://schemas.android.com/apk/res-auto";
32constexpr const char16_t* kSchemaPrefix = u"http://schemas.android.com/apk/res/";
33
34static std::vector<std::string> splitAndTransform(const StringPiece& str, char sep,
35        const std::function<char(char)>& f) {
36    std::vector<std::string> parts;
37    const StringPiece::const_iterator end = std::end(str);
38    StringPiece::const_iterator start = std::begin(str);
39    StringPiece::const_iterator current;
40    do {
41        current = std::find(start, end, sep);
42        parts.emplace_back(str.substr(start, current).toString());
43        if (f) {
44            std::string& part = parts.back();
45            std::transform(part.begin(), part.end(), part.begin(), f);
46        }
47        start = current + 1;
48    } while (current != end);
49    return parts;
50}
51
52std::vector<std::string> split(const StringPiece& str, char sep) {
53    return splitAndTransform(str, sep, nullptr);
54}
55
56std::vector<std::string> splitAndLowercase(const StringPiece& str, char sep) {
57    return splitAndTransform(str, sep, ::tolower);
58}
59
60StringPiece16 trimWhitespace(const StringPiece16& str) {
61    if (str.size() == 0 || str.data() == nullptr) {
62        return str;
63    }
64
65    const char16_t* start = str.data();
66    const char16_t* end = str.data() + str.length();
67
68    while (start != end && util::isspace16(*start)) {
69        start++;
70    }
71
72    while (end != start && util::isspace16(*(end - 1))) {
73        end--;
74    }
75
76    return StringPiece16(start, end - start);
77}
78
79StringPiece16::const_iterator findNonAlphaNumericAndNotInSet(const StringPiece16& str,
80        const StringPiece16& allowedChars) {
81    const auto endIter = str.end();
82    for (auto iter = str.begin(); iter != endIter; ++iter) {
83        char16_t c = *iter;
84        if ((c >= u'a' && c <= u'z') ||
85                (c >= u'A' && c <= u'Z') ||
86                (c >= u'0' && c <= u'9')) {
87            continue;
88        }
89
90        bool match = false;
91        for (char16_t i : allowedChars) {
92            if (c == i) {
93                match = true;
94                break;
95            }
96        }
97
98        if (!match) {
99            return iter;
100        }
101    }
102    return endIter;
103}
104
105bool isJavaClassName(const StringPiece16& str) {
106    size_t pieces = 0;
107    for (const StringPiece16& piece : tokenize(str, u'.')) {
108        pieces++;
109        if (piece.empty()) {
110            return false;
111        }
112
113        // Can't have starting or trailing $ character.
114        if (piece.data()[0] == u'$' || piece.data()[piece.size() - 1] == u'$') {
115            return false;
116        }
117
118        if (findNonAlphaNumericAndNotInSet(piece, u"$_") != piece.end()) {
119            return false;
120        }
121    }
122    return pieces >= 2;
123}
124
125Maybe<std::u16string> getFullyQualifiedClassName(const StringPiece16& package,
126                                                 const StringPiece16& className) {
127    if (className.empty()) {
128        return {};
129    }
130
131    if (util::isJavaClassName(className)) {
132        return className.toString();
133    }
134
135    if (package.empty()) {
136        return {};
137    }
138
139    std::u16string result(package.data(), package.size());
140    if (className.data()[0] != u'.') {
141        result += u'.';
142    }
143    result.append(className.data(), className.size());
144    if (!isJavaClassName(result)) {
145        return {};
146    }
147    return result;
148}
149
150static Maybe<char16_t> parseUnicodeCodepoint(const char16_t** start, const char16_t* end) {
151    char16_t code = 0;
152    for (size_t i = 0; i < 4 && *start != end; i++, (*start)++) {
153        char16_t c = **start;
154        int a;
155        if (c >= '0' && c <= '9') {
156            a = c - '0';
157        } else if (c >= 'a' && c <= 'f') {
158            a = c - 'a' + 10;
159        } else if (c >= 'A' && c <= 'F') {
160            a = c - 'A' + 10;
161        } else {
162            return make_nothing<char16_t>();
163        }
164        code = (code << 4) | a;
165    }
166    return make_value(code);
167}
168
169StringBuilder& StringBuilder::append(const StringPiece16& str) {
170    if (!mError.empty()) {
171        return *this;
172    }
173
174    const char16_t* const end = str.end();
175    const char16_t* start = str.begin();
176    const char16_t* current = start;
177    while (current != end) {
178        if (*current == u'"') {
179            if (!mQuote && mTrailingSpace) {
180                // We found an opening quote, and we have
181                // trailing space, so we should append that
182                // space now.
183                if (mTrailingSpace) {
184                    // We had trailing whitespace, so
185                    // replace with a single space.
186                    if (!mStr.empty()) {
187                        mStr += u' ';
188                    }
189                    mTrailingSpace = false;
190                }
191            }
192            mQuote = !mQuote;
193            mStr.append(start, current - start);
194            start = current + 1;
195        } else if (*current == u'\'' && !mQuote) {
196            // This should be escaped.
197            mError = "unescaped apostrophe";
198            return *this;
199        } else if (*current == u'\\') {
200            // This is an escape sequence, convert to the real value.
201            if (!mQuote && mTrailingSpace) {
202                // We had trailing whitespace, so
203                // replace with a single space.
204                if (!mStr.empty()) {
205                    mStr += u' ';
206                }
207                mTrailingSpace = false;
208            }
209            mStr.append(start, current - start);
210            start = current + 1;
211
212            current++;
213            if (current != end) {
214                switch (*current) {
215                    case u't':
216                        mStr += u'\t';
217                        break;
218                    case u'n':
219                        mStr += u'\n';
220                        break;
221                    case u'#':
222                        mStr += u'#';
223                        break;
224                    case u'@':
225                        mStr += u'@';
226                        break;
227                    case u'?':
228                        mStr += u'?';
229                        break;
230                    case u'"':
231                        mStr += u'"';
232                        break;
233                    case u'\'':
234                        mStr += u'\'';
235                        break;
236                    case u'\\':
237                        mStr += u'\\';
238                        break;
239                    case u'u': {
240                        current++;
241                        Maybe<char16_t> c = parseUnicodeCodepoint(&current, end);
242                        if (!c) {
243                            mError = "invalid unicode escape sequence";
244                            return *this;
245                        }
246                        mStr += c.value();
247                        current -= 1;
248                        break;
249                    }
250
251                    default:
252                        // Ignore.
253                        break;
254                }
255                start = current + 1;
256            }
257        } else if (!mQuote) {
258            // This is not quoted text, so look for whitespace.
259            if (isspace16(*current)) {
260                // We found whitespace, see if we have seen some
261                // before.
262                if (!mTrailingSpace) {
263                    // We didn't see a previous adjacent space,
264                    // so mark that we did.
265                    mTrailingSpace = true;
266                    mStr.append(start, current - start);
267                }
268
269                // Keep skipping whitespace.
270                start = current + 1;
271            } else if (mTrailingSpace) {
272                // We saw trailing space before, so replace all
273                // that trailing space with one space.
274                if (!mStr.empty()) {
275                    mStr += u' ';
276                }
277                mTrailingSpace = false;
278            }
279        }
280        current++;
281    }
282    mStr.append(start, end - start);
283    return *this;
284}
285
286std::u16string utf8ToUtf16(const StringPiece& utf8) {
287    ssize_t utf16Length = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(utf8.data()),
288            utf8.length());
289    if (utf16Length <= 0) {
290        return {};
291    }
292
293    std::u16string utf16;
294    utf16.resize(utf16Length);
295    utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(), &*utf16.begin());
296    return utf16;
297}
298
299std::string utf16ToUtf8(const StringPiece16& utf16) {
300    ssize_t utf8Length = utf16_to_utf8_length(utf16.data(), utf16.length());
301    if (utf8Length <= 0) {
302        return {};
303    }
304
305    std::string utf8;
306    utf8.resize(utf8Length);
307    utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin());
308    return utf8;
309}
310
311bool writeAll(std::ostream& out, const BigBuffer& buffer) {
312    for (const auto& b : buffer) {
313        if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) {
314            return false;
315        }
316    }
317    return true;
318}
319
320std::unique_ptr<uint8_t[]> copy(const BigBuffer& buffer) {
321    std::unique_ptr<uint8_t[]> data = std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
322    uint8_t* p = data.get();
323    for (const auto& block : buffer) {
324        memcpy(p, block.buffer.get(), block.size);
325        p += block.size;
326    }
327    return data;
328}
329
330Maybe<std::u16string> extractPackageFromNamespace(const std::u16string& namespaceUri) {
331    if (stringStartsWith<char16_t>(namespaceUri, kSchemaPrefix)) {
332        StringPiece16 schemaPrefix = kSchemaPrefix;
333        StringPiece16 package = namespaceUri;
334        return package.substr(schemaPrefix.size(), package.size() - schemaPrefix.size())
335                .toString();
336    } else if (namespaceUri == kSchemaAuto) {
337        return std::u16string();
338    }
339    return {};
340}
341
342} // namespace util
343} // namespace aapt
344