Util.cpp revision 3b4cd94034ff3e5567a2ba6da35d640ff61db4b9
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "util/BigBuffer.h"
18#include "util/Maybe.h"
19#include "util/StringPiece.h"
20#include "util/Util.h"
21
22#include <algorithm>
23#include <ostream>
24#include <string>
25#include <utils/Unicode.h>
26#include <vector>
27
28namespace aapt {
29namespace util {
30
31constexpr const char16_t* kSchemaAuto = u"http://schemas.android.com/apk/res-auto";
32constexpr const char16_t* kSchemaPrefix = u"http://schemas.android.com/apk/res/";
33
34static std::vector<std::string> splitAndTransform(const StringPiece& str, char sep,
35        const std::function<char(char)>& f) {
36    std::vector<std::string> parts;
37    const StringPiece::const_iterator end = std::end(str);
38    StringPiece::const_iterator start = std::begin(str);
39    StringPiece::const_iterator current;
40    do {
41        current = std::find(start, end, sep);
42        parts.emplace_back(str.substr(start, current).toString());
43        if (f) {
44            std::string& part = parts.back();
45            std::transform(part.begin(), part.end(), part.begin(), f);
46        }
47        start = current + 1;
48    } while (current != end);
49    return parts;
50}
51
52std::vector<std::string> split(const StringPiece& str, char sep) {
53    return splitAndTransform(str, sep, nullptr);
54}
55
56std::vector<std::string> splitAndLowercase(const StringPiece& str, char sep) {
57    return splitAndTransform(str, sep, ::tolower);
58}
59
60StringPiece16 trimWhitespace(const StringPiece16& str) {
61    if (str.size() == 0 || str.data() == nullptr) {
62        return str;
63    }
64
65    const char16_t* start = str.data();
66    const char16_t* end = str.data() + str.length();
67
68    while (start != end && util::isspace16(*start)) {
69        start++;
70    }
71
72    while (end != start && util::isspace16(*(end - 1))) {
73        end--;
74    }
75
76    return StringPiece16(start, end - start);
77}
78
79StringPiece trimWhitespace(const StringPiece& str) {
80    if (str.size() == 0 || str.data() == nullptr) {
81        return str;
82    }
83
84    const char* start = str.data();
85    const char* end = str.data() + str.length();
86
87    while (start != end && isspace(*start)) {
88        start++;
89    }
90
91    while (end != start && isspace(*(end - 1))) {
92        end--;
93    }
94
95    return StringPiece(start, end - start);
96}
97
98StringPiece16::const_iterator findNonAlphaNumericAndNotInSet(const StringPiece16& str,
99        const StringPiece16& allowedChars) {
100    const auto endIter = str.end();
101    for (auto iter = str.begin(); iter != endIter; ++iter) {
102        char16_t c = *iter;
103        if ((c >= u'a' && c <= u'z') ||
104                (c >= u'A' && c <= u'Z') ||
105                (c >= u'0' && c <= u'9')) {
106            continue;
107        }
108
109        bool match = false;
110        for (char16_t i : allowedChars) {
111            if (c == i) {
112                match = true;
113                break;
114            }
115        }
116
117        if (!match) {
118            return iter;
119        }
120    }
121    return endIter;
122}
123
124bool isJavaClassName(const StringPiece16& str) {
125    size_t pieces = 0;
126    for (const StringPiece16& piece : tokenize(str, u'.')) {
127        pieces++;
128        if (piece.empty()) {
129            return false;
130        }
131
132        // Can't have starting or trailing $ character.
133        if (piece.data()[0] == u'$' || piece.data()[piece.size() - 1] == u'$') {
134            return false;
135        }
136
137        if (findNonAlphaNumericAndNotInSet(piece, u"$_") != piece.end()) {
138            return false;
139        }
140    }
141    return pieces >= 2;
142}
143
144bool isJavaPackageName(const StringPiece16& str) {
145    if (str.empty()) {
146        return false;
147    }
148
149    size_t pieces = 0;
150    for (const StringPiece16& piece : tokenize(str, u'.')) {
151        pieces++;
152        if (piece.empty()) {
153            return false;
154        }
155
156        if (piece.data()[0] == u'_' || piece.data()[piece.size() - 1] == u'_') {
157            return false;
158        }
159
160        if (findNonAlphaNumericAndNotInSet(piece, u"_") != piece.end()) {
161            return false;
162        }
163    }
164    return pieces >= 1;
165}
166
167Maybe<std::u16string> getFullyQualifiedClassName(const StringPiece16& package,
168                                                 const StringPiece16& className) {
169    if (className.empty()) {
170        return {};
171    }
172
173    if (util::isJavaClassName(className)) {
174        return className.toString();
175    }
176
177    if (package.empty()) {
178        return {};
179    }
180
181    std::u16string result(package.data(), package.size());
182    if (className.data()[0] != u'.') {
183        result += u'.';
184    }
185    result.append(className.data(), className.size());
186    if (!isJavaClassName(result)) {
187        return {};
188    }
189    return result;
190}
191
192static Maybe<char16_t> parseUnicodeCodepoint(const char16_t** start, const char16_t* end) {
193    char16_t code = 0;
194    for (size_t i = 0; i < 4 && *start != end; i++, (*start)++) {
195        char16_t c = **start;
196        int a;
197        if (c >= '0' && c <= '9') {
198            a = c - '0';
199        } else if (c >= 'a' && c <= 'f') {
200            a = c - 'a' + 10;
201        } else if (c >= 'A' && c <= 'F') {
202            a = c - 'A' + 10;
203        } else {
204            return make_nothing<char16_t>();
205        }
206        code = (code << 4) | a;
207    }
208    return make_value(code);
209}
210
211StringBuilder& StringBuilder::append(const StringPiece16& str) {
212    if (!mError.empty()) {
213        return *this;
214    }
215
216    const char16_t* const end = str.end();
217    const char16_t* start = str.begin();
218    const char16_t* current = start;
219    while (current != end) {
220        if (mLastCharWasEscape) {
221            switch (*current) {
222                case u't':
223                    mStr += u'\t';
224                    break;
225                case u'n':
226                    mStr += u'\n';
227                    break;
228                case u'#':
229                    mStr += u'#';
230                    break;
231                case u'@':
232                    mStr += u'@';
233                    break;
234                case u'?':
235                    mStr += u'?';
236                    break;
237                case u'"':
238                    mStr += u'"';
239                    break;
240                case u'\'':
241                    mStr += u'\'';
242                    break;
243                case u'\\':
244                    mStr += u'\\';
245                    break;
246                case u'u': {
247                    current++;
248                    Maybe<char16_t> c = parseUnicodeCodepoint(&current, end);
249                    if (!c) {
250                        mError = "invalid unicode escape sequence";
251                        return *this;
252                    }
253                    mStr += c.value();
254                    current -= 1;
255                    break;
256                }
257
258                default:
259                    // Ignore.
260                    break;
261            }
262            mLastCharWasEscape = false;
263            start = current + 1;
264        } else if (*current == u'"') {
265            if (!mQuote && mTrailingSpace) {
266                // We found an opening quote, and we have
267                // trailing space, so we should append that
268                // space now.
269                if (mTrailingSpace) {
270                    // We had trailing whitespace, so
271                    // replace with a single space.
272                    if (!mStr.empty()) {
273                        mStr += u' ';
274                    }
275                    mTrailingSpace = false;
276                }
277            }
278            mQuote = !mQuote;
279            mStr.append(start, current - start);
280            start = current + 1;
281        } else if (*current == u'\'' && !mQuote) {
282            // This should be escaped.
283            mError = "unescaped apostrophe";
284            return *this;
285        } else if (*current == u'\\') {
286            // This is an escape sequence, convert to the real value.
287            if (!mQuote && mTrailingSpace) {
288                // We had trailing whitespace, so
289                // replace with a single space.
290                if (!mStr.empty()) {
291                    mStr += u' ';
292                }
293                mTrailingSpace = false;
294            }
295            mStr.append(start, current - start);
296            start = current + 1;
297            mLastCharWasEscape = true;
298        } else if (!mQuote) {
299            // This is not quoted text, so look for whitespace.
300            if (isspace16(*current)) {
301                // We found whitespace, see if we have seen some
302                // before.
303                if (!mTrailingSpace) {
304                    // We didn't see a previous adjacent space,
305                    // so mark that we did.
306                    mTrailingSpace = true;
307                    mStr.append(start, current - start);
308                }
309
310                // Keep skipping whitespace.
311                start = current + 1;
312            } else if (mTrailingSpace) {
313                // We saw trailing space before, so replace all
314                // that trailing space with one space.
315                if (!mStr.empty()) {
316                    mStr += u' ';
317                }
318                mTrailingSpace = false;
319            }
320        }
321        current++;
322    }
323    mStr.append(start, end - start);
324    return *this;
325}
326
327std::u16string utf8ToUtf16(const StringPiece& utf8) {
328    ssize_t utf16Length = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(utf8.data()),
329            utf8.length());
330    if (utf16Length <= 0) {
331        return {};
332    }
333
334    std::u16string utf16;
335    utf16.resize(utf16Length);
336    utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(), &*utf16.begin());
337    return utf16;
338}
339
340std::string utf16ToUtf8(const StringPiece16& utf16) {
341    ssize_t utf8Length = utf16_to_utf8_length(utf16.data(), utf16.length());
342    if (utf8Length <= 0) {
343        return {};
344    }
345
346    std::string utf8;
347    utf8.resize(utf8Length);
348    utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin());
349    return utf8;
350}
351
352bool writeAll(std::ostream& out, const BigBuffer& buffer) {
353    for (const auto& b : buffer) {
354        if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) {
355            return false;
356        }
357    }
358    return true;
359}
360
361std::unique_ptr<uint8_t[]> copy(const BigBuffer& buffer) {
362    std::unique_ptr<uint8_t[]> data = std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
363    uint8_t* p = data.get();
364    for (const auto& block : buffer) {
365        memcpy(p, block.buffer.get(), block.size);
366        p += block.size;
367    }
368    return data;
369}
370
371Maybe<std::u16string> extractPackageFromNamespace(const std::u16string& namespaceUri) {
372    if (stringStartsWith<char16_t>(namespaceUri, kSchemaPrefix)) {
373        StringPiece16 schemaPrefix = kSchemaPrefix;
374        StringPiece16 package = namespaceUri;
375        return package.substr(schemaPrefix.size(), package.size() - schemaPrefix.size())
376                .toString();
377    } else if (namespaceUri == kSchemaAuto) {
378        return std::u16string();
379    }
380    return {};
381}
382
383bool extractResFilePathParts(const StringPiece16& path, StringPiece16* outPrefix,
384                             StringPiece16* outEntry, StringPiece16* outSuffix) {
385    if (!stringStartsWith<char16_t>(path, u"res/")) {
386        return false;
387    }
388
389    StringPiece16::const_iterator lastOccurence = path.end();
390    for (auto iter = path.begin() + StringPiece16(u"res/").size(); iter != path.end(); ++iter) {
391        if (*iter == u'/') {
392            lastOccurence = iter;
393        }
394    }
395
396    if (lastOccurence == path.end()) {
397        return false;
398    }
399
400    auto iter = std::find(lastOccurence, path.end(), u'.');
401    *outSuffix = StringPiece16(iter, path.end() - iter);
402    *outEntry = StringPiece16(lastOccurence + 1, iter - lastOccurence - 1);
403    *outPrefix = StringPiece16(path.begin(), lastOccurence - path.begin() + 1);
404    return true;
405}
406
407} // namespace util
408} // namespace aapt
409