Util.cpp revision 3b4cd94034ff3e5567a2ba6da35d640ff61db4b9
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "util/BigBuffer.h" 18#include "util/Maybe.h" 19#include "util/StringPiece.h" 20#include "util/Util.h" 21 22#include <algorithm> 23#include <ostream> 24#include <string> 25#include <utils/Unicode.h> 26#include <vector> 27 28namespace aapt { 29namespace util { 30 31constexpr const char16_t* kSchemaAuto = u"http://schemas.android.com/apk/res-auto"; 32constexpr const char16_t* kSchemaPrefix = u"http://schemas.android.com/apk/res/"; 33 34static std::vector<std::string> splitAndTransform(const StringPiece& str, char sep, 35 const std::function<char(char)>& f) { 36 std::vector<std::string> parts; 37 const StringPiece::const_iterator end = std::end(str); 38 StringPiece::const_iterator start = std::begin(str); 39 StringPiece::const_iterator current; 40 do { 41 current = std::find(start, end, sep); 42 parts.emplace_back(str.substr(start, current).toString()); 43 if (f) { 44 std::string& part = parts.back(); 45 std::transform(part.begin(), part.end(), part.begin(), f); 46 } 47 start = current + 1; 48 } while (current != end); 49 return parts; 50} 51 52std::vector<std::string> split(const StringPiece& str, char sep) { 53 return splitAndTransform(str, sep, nullptr); 54} 55 56std::vector<std::string> splitAndLowercase(const StringPiece& str, char sep) { 57 return splitAndTransform(str, sep, ::tolower); 58} 59 60StringPiece16 trimWhitespace(const StringPiece16& str) { 61 if (str.size() == 0 || str.data() == nullptr) { 62 return str; 63 } 64 65 const char16_t* start = str.data(); 66 const char16_t* end = str.data() + str.length(); 67 68 while (start != end && util::isspace16(*start)) { 69 start++; 70 } 71 72 while (end != start && util::isspace16(*(end - 1))) { 73 end--; 74 } 75 76 return StringPiece16(start, end - start); 77} 78 79StringPiece trimWhitespace(const StringPiece& str) { 80 if (str.size() == 0 || str.data() == nullptr) { 81 return str; 82 } 83 84 const char* start = str.data(); 85 const char* end = str.data() + str.length(); 86 87 while (start != end && isspace(*start)) { 88 start++; 89 } 90 91 while (end != start && isspace(*(end - 1))) { 92 end--; 93 } 94 95 return StringPiece(start, end - start); 96} 97 98StringPiece16::const_iterator findNonAlphaNumericAndNotInSet(const StringPiece16& str, 99 const StringPiece16& allowedChars) { 100 const auto endIter = str.end(); 101 for (auto iter = str.begin(); iter != endIter; ++iter) { 102 char16_t c = *iter; 103 if ((c >= u'a' && c <= u'z') || 104 (c >= u'A' && c <= u'Z') || 105 (c >= u'0' && c <= u'9')) { 106 continue; 107 } 108 109 bool match = false; 110 for (char16_t i : allowedChars) { 111 if (c == i) { 112 match = true; 113 break; 114 } 115 } 116 117 if (!match) { 118 return iter; 119 } 120 } 121 return endIter; 122} 123 124bool isJavaClassName(const StringPiece16& str) { 125 size_t pieces = 0; 126 for (const StringPiece16& piece : tokenize(str, u'.')) { 127 pieces++; 128 if (piece.empty()) { 129 return false; 130 } 131 132 // Can't have starting or trailing $ character. 133 if (piece.data()[0] == u'$' || piece.data()[piece.size() - 1] == u'$') { 134 return false; 135 } 136 137 if (findNonAlphaNumericAndNotInSet(piece, u"$_") != piece.end()) { 138 return false; 139 } 140 } 141 return pieces >= 2; 142} 143 144bool isJavaPackageName(const StringPiece16& str) { 145 if (str.empty()) { 146 return false; 147 } 148 149 size_t pieces = 0; 150 for (const StringPiece16& piece : tokenize(str, u'.')) { 151 pieces++; 152 if (piece.empty()) { 153 return false; 154 } 155 156 if (piece.data()[0] == u'_' || piece.data()[piece.size() - 1] == u'_') { 157 return false; 158 } 159 160 if (findNonAlphaNumericAndNotInSet(piece, u"_") != piece.end()) { 161 return false; 162 } 163 } 164 return pieces >= 1; 165} 166 167Maybe<std::u16string> getFullyQualifiedClassName(const StringPiece16& package, 168 const StringPiece16& className) { 169 if (className.empty()) { 170 return {}; 171 } 172 173 if (util::isJavaClassName(className)) { 174 return className.toString(); 175 } 176 177 if (package.empty()) { 178 return {}; 179 } 180 181 std::u16string result(package.data(), package.size()); 182 if (className.data()[0] != u'.') { 183 result += u'.'; 184 } 185 result.append(className.data(), className.size()); 186 if (!isJavaClassName(result)) { 187 return {}; 188 } 189 return result; 190} 191 192static Maybe<char16_t> parseUnicodeCodepoint(const char16_t** start, const char16_t* end) { 193 char16_t code = 0; 194 for (size_t i = 0; i < 4 && *start != end; i++, (*start)++) { 195 char16_t c = **start; 196 int a; 197 if (c >= '0' && c <= '9') { 198 a = c - '0'; 199 } else if (c >= 'a' && c <= 'f') { 200 a = c - 'a' + 10; 201 } else if (c >= 'A' && c <= 'F') { 202 a = c - 'A' + 10; 203 } else { 204 return make_nothing<char16_t>(); 205 } 206 code = (code << 4) | a; 207 } 208 return make_value(code); 209} 210 211StringBuilder& StringBuilder::append(const StringPiece16& str) { 212 if (!mError.empty()) { 213 return *this; 214 } 215 216 const char16_t* const end = str.end(); 217 const char16_t* start = str.begin(); 218 const char16_t* current = start; 219 while (current != end) { 220 if (mLastCharWasEscape) { 221 switch (*current) { 222 case u't': 223 mStr += u'\t'; 224 break; 225 case u'n': 226 mStr += u'\n'; 227 break; 228 case u'#': 229 mStr += u'#'; 230 break; 231 case u'@': 232 mStr += u'@'; 233 break; 234 case u'?': 235 mStr += u'?'; 236 break; 237 case u'"': 238 mStr += u'"'; 239 break; 240 case u'\'': 241 mStr += u'\''; 242 break; 243 case u'\\': 244 mStr += u'\\'; 245 break; 246 case u'u': { 247 current++; 248 Maybe<char16_t> c = parseUnicodeCodepoint(¤t, end); 249 if (!c) { 250 mError = "invalid unicode escape sequence"; 251 return *this; 252 } 253 mStr += c.value(); 254 current -= 1; 255 break; 256 } 257 258 default: 259 // Ignore. 260 break; 261 } 262 mLastCharWasEscape = false; 263 start = current + 1; 264 } else if (*current == u'"') { 265 if (!mQuote && mTrailingSpace) { 266 // We found an opening quote, and we have 267 // trailing space, so we should append that 268 // space now. 269 if (mTrailingSpace) { 270 // We had trailing whitespace, so 271 // replace with a single space. 272 if (!mStr.empty()) { 273 mStr += u' '; 274 } 275 mTrailingSpace = false; 276 } 277 } 278 mQuote = !mQuote; 279 mStr.append(start, current - start); 280 start = current + 1; 281 } else if (*current == u'\'' && !mQuote) { 282 // This should be escaped. 283 mError = "unescaped apostrophe"; 284 return *this; 285 } else if (*current == u'\\') { 286 // This is an escape sequence, convert to the real value. 287 if (!mQuote && mTrailingSpace) { 288 // We had trailing whitespace, so 289 // replace with a single space. 290 if (!mStr.empty()) { 291 mStr += u' '; 292 } 293 mTrailingSpace = false; 294 } 295 mStr.append(start, current - start); 296 start = current + 1; 297 mLastCharWasEscape = true; 298 } else if (!mQuote) { 299 // This is not quoted text, so look for whitespace. 300 if (isspace16(*current)) { 301 // We found whitespace, see if we have seen some 302 // before. 303 if (!mTrailingSpace) { 304 // We didn't see a previous adjacent space, 305 // so mark that we did. 306 mTrailingSpace = true; 307 mStr.append(start, current - start); 308 } 309 310 // Keep skipping whitespace. 311 start = current + 1; 312 } else if (mTrailingSpace) { 313 // We saw trailing space before, so replace all 314 // that trailing space with one space. 315 if (!mStr.empty()) { 316 mStr += u' '; 317 } 318 mTrailingSpace = false; 319 } 320 } 321 current++; 322 } 323 mStr.append(start, end - start); 324 return *this; 325} 326 327std::u16string utf8ToUtf16(const StringPiece& utf8) { 328 ssize_t utf16Length = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(utf8.data()), 329 utf8.length()); 330 if (utf16Length <= 0) { 331 return {}; 332 } 333 334 std::u16string utf16; 335 utf16.resize(utf16Length); 336 utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(), &*utf16.begin()); 337 return utf16; 338} 339 340std::string utf16ToUtf8(const StringPiece16& utf16) { 341 ssize_t utf8Length = utf16_to_utf8_length(utf16.data(), utf16.length()); 342 if (utf8Length <= 0) { 343 return {}; 344 } 345 346 std::string utf8; 347 utf8.resize(utf8Length); 348 utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin()); 349 return utf8; 350} 351 352bool writeAll(std::ostream& out, const BigBuffer& buffer) { 353 for (const auto& b : buffer) { 354 if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) { 355 return false; 356 } 357 } 358 return true; 359} 360 361std::unique_ptr<uint8_t[]> copy(const BigBuffer& buffer) { 362 std::unique_ptr<uint8_t[]> data = std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]); 363 uint8_t* p = data.get(); 364 for (const auto& block : buffer) { 365 memcpy(p, block.buffer.get(), block.size); 366 p += block.size; 367 } 368 return data; 369} 370 371Maybe<std::u16string> extractPackageFromNamespace(const std::u16string& namespaceUri) { 372 if (stringStartsWith<char16_t>(namespaceUri, kSchemaPrefix)) { 373 StringPiece16 schemaPrefix = kSchemaPrefix; 374 StringPiece16 package = namespaceUri; 375 return package.substr(schemaPrefix.size(), package.size() - schemaPrefix.size()) 376 .toString(); 377 } else if (namespaceUri == kSchemaAuto) { 378 return std::u16string(); 379 } 380 return {}; 381} 382 383bool extractResFilePathParts(const StringPiece16& path, StringPiece16* outPrefix, 384 StringPiece16* outEntry, StringPiece16* outSuffix) { 385 if (!stringStartsWith<char16_t>(path, u"res/")) { 386 return false; 387 } 388 389 StringPiece16::const_iterator lastOccurence = path.end(); 390 for (auto iter = path.begin() + StringPiece16(u"res/").size(); iter != path.end(); ++iter) { 391 if (*iter == u'/') { 392 lastOccurence = iter; 393 } 394 } 395 396 if (lastOccurence == path.end()) { 397 return false; 398 } 399 400 auto iter = std::find(lastOccurence, path.end(), u'.'); 401 *outSuffix = StringPiece16(iter, path.end() - iter); 402 *outEntry = StringPiece16(lastOccurence + 1, iter - lastOccurence - 1); 403 *outPrefix = StringPiece16(path.begin(), lastOccurence - path.begin() + 1); 404 return true; 405} 406 407} // namespace util 408} // namespace aapt 409