1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "util/Util.h"
18
19#include <utils/Unicode.h>
20#include <algorithm>
21#include <ostream>
22#include <string>
23#include <vector>
24
25#include "androidfw/StringPiece.h"
26
27#include "util/BigBuffer.h"
28#include "util/Maybe.h"
29
30using android::StringPiece;
31using android::StringPiece16;
32
33namespace aapt {
34namespace util {
35
36static std::vector<std::string> SplitAndTransform(
37    const StringPiece& str, char sep, const std::function<char(char)>& f) {
38  std::vector<std::string> parts;
39  const StringPiece::const_iterator end = std::end(str);
40  StringPiece::const_iterator start = std::begin(str);
41  StringPiece::const_iterator current;
42  do {
43    current = std::find(start, end, sep);
44    parts.emplace_back(str.substr(start, current).to_string());
45    if (f) {
46      std::string& part = parts.back();
47      std::transform(part.begin(), part.end(), part.begin(), f);
48    }
49    start = current + 1;
50  } while (current != end);
51  return parts;
52}
53
54std::vector<std::string> Split(const StringPiece& str, char sep) {
55  return SplitAndTransform(str, sep, nullptr);
56}
57
58std::vector<std::string> SplitAndLowercase(const StringPiece& str, char sep) {
59  return SplitAndTransform(str, sep, ::tolower);
60}
61
62bool StartsWith(const StringPiece& str, const StringPiece& prefix) {
63  if (str.size() < prefix.size()) {
64    return false;
65  }
66  return str.substr(0, prefix.size()) == prefix;
67}
68
69bool EndsWith(const StringPiece& str, const StringPiece& suffix) {
70  if (str.size() < suffix.size()) {
71    return false;
72  }
73  return str.substr(str.size() - suffix.size(), suffix.size()) == suffix;
74}
75
76StringPiece TrimWhitespace(const StringPiece& str) {
77  if (str.size() == 0 || str.data() == nullptr) {
78    return str;
79  }
80
81  const char* start = str.data();
82  const char* end = str.data() + str.length();
83
84  while (start != end && isspace(*start)) {
85    start++;
86  }
87
88  while (end != start && isspace(*(end - 1))) {
89    end--;
90  }
91
92  return StringPiece(start, end - start);
93}
94
95StringPiece::const_iterator FindNonAlphaNumericAndNotInSet(
96    const StringPiece& str, const StringPiece& allowed_chars) {
97  const auto end_iter = str.end();
98  for (auto iter = str.begin(); iter != end_iter; ++iter) {
99    char c = *iter;
100    if ((c >= u'a' && c <= u'z') || (c >= u'A' && c <= u'Z') ||
101        (c >= u'0' && c <= u'9')) {
102      continue;
103    }
104
105    bool match = false;
106    for (char i : allowed_chars) {
107      if (c == i) {
108        match = true;
109        break;
110      }
111    }
112
113    if (!match) {
114      return iter;
115    }
116  }
117  return end_iter;
118}
119
120bool IsJavaClassName(const StringPiece& str) {
121  size_t pieces = 0;
122  for (const StringPiece& piece : Tokenize(str, '.')) {
123    pieces++;
124    if (piece.empty()) {
125      return false;
126    }
127
128    // Can't have starting or trailing $ character.
129    if (piece.data()[0] == '$' || piece.data()[piece.size() - 1] == '$') {
130      return false;
131    }
132
133    if (FindNonAlphaNumericAndNotInSet(piece, "$_") != piece.end()) {
134      return false;
135    }
136  }
137  return pieces >= 2;
138}
139
140bool IsJavaPackageName(const StringPiece& str) {
141  if (str.empty()) {
142    return false;
143  }
144
145  size_t pieces = 0;
146  for (const StringPiece& piece : Tokenize(str, '.')) {
147    pieces++;
148    if (piece.empty()) {
149      return false;
150    }
151
152    if (piece.data()[0] == '_' || piece.data()[piece.size() - 1] == '_') {
153      return false;
154    }
155
156    if (FindNonAlphaNumericAndNotInSet(piece, "_") != piece.end()) {
157      return false;
158    }
159  }
160  return pieces >= 1;
161}
162
163Maybe<std::string> GetFullyQualifiedClassName(const StringPiece& package,
164                                              const StringPiece& classname) {
165  if (classname.empty()) {
166    return {};
167  }
168
169  if (util::IsJavaClassName(classname)) {
170    return classname.to_string();
171  }
172
173  if (package.empty()) {
174    return {};
175  }
176
177  std::string result(package.data(), package.size());
178  if (classname.data()[0] != '.') {
179    result += '.';
180  }
181
182  result.append(classname.data(), classname.size());
183  if (!IsJavaClassName(result)) {
184    return {};
185  }
186  return result;
187}
188
189static size_t ConsumeDigits(const char* start, const char* end) {
190  const char* c = start;
191  for (; c != end && *c >= '0' && *c <= '9'; c++) {
192  }
193  return static_cast<size_t>(c - start);
194}
195
196bool VerifyJavaStringFormat(const StringPiece& str) {
197  const char* c = str.begin();
198  const char* const end = str.end();
199
200  size_t arg_count = 0;
201  bool nonpositional = false;
202  while (c != end) {
203    if (*c == '%' && c + 1 < end) {
204      c++;
205
206      if (*c == '%') {
207        c++;
208        continue;
209      }
210
211      arg_count++;
212
213      size_t num_digits = ConsumeDigits(c, end);
214      if (num_digits > 0) {
215        c += num_digits;
216        if (c != end && *c != '$') {
217          // The digits were a size, but not a positional argument.
218          nonpositional = true;
219        }
220      } else if (*c == '<') {
221        // Reusing last argument, bad idea since positions can be moved around
222        // during translation.
223        nonpositional = true;
224
225        c++;
226
227        // Optionally we can have a $ after
228        if (c != end && *c == '$') {
229          c++;
230        }
231      } else {
232        nonpositional = true;
233      }
234
235      // Ignore size, width, flags, etc.
236      while (c != end && (*c == '-' || *c == '#' || *c == '+' || *c == ' ' ||
237                          *c == ',' || *c == '(' || (*c >= '0' && *c <= '9'))) {
238        c++;
239      }
240
241      /*
242       * This is a shortcut to detect strings that are going to Time.format()
243       * instead of String.format()
244       *
245       * Comparison of String.format() and Time.format() args:
246       *
247       * String: ABC E GH  ST X abcdefgh  nost x
248       *   Time:    DEFGHKMS W Za  d   hkm  s w yz
249       *
250       * Therefore we know it's definitely Time if we have:
251       *     DFKMWZkmwyz
252       */
253      if (c != end) {
254        switch (*c) {
255          case 'D':
256          case 'F':
257          case 'K':
258          case 'M':
259          case 'W':
260          case 'Z':
261          case 'k':
262          case 'm':
263          case 'w':
264          case 'y':
265          case 'z':
266            return true;
267        }
268      }
269    }
270
271    if (c != end) {
272      c++;
273    }
274  }
275
276  if (arg_count > 1 && nonpositional) {
277    // Multiple arguments were specified, but some or all were non positional.
278    // Translated
279    // strings may rearrange the order of the arguments, which will break the
280    // string.
281    return false;
282  }
283  return true;
284}
285
286static Maybe<std::string> ParseUnicodeCodepoint(const char** start,
287                                                const char* end) {
288  char32_t code = 0;
289  for (size_t i = 0; i < 4 && *start != end; i++, (*start)++) {
290    char c = **start;
291    char32_t a;
292    if (c >= '0' && c <= '9') {
293      a = c - '0';
294    } else if (c >= 'a' && c <= 'f') {
295      a = c - 'a' + 10;
296    } else if (c >= 'A' && c <= 'F') {
297      a = c - 'A' + 10;
298    } else {
299      return {};
300    }
301    code = (code << 4) | a;
302  }
303
304  ssize_t len = utf32_to_utf8_length(&code, 1);
305  if (len < 0) {
306    return {};
307  }
308
309  std::string result_utf8;
310  result_utf8.resize(len);
311  utf32_to_utf8(&code, 1, &*result_utf8.begin(), len + 1);
312  return result_utf8;
313}
314
315StringBuilder& StringBuilder::Append(const StringPiece& str) {
316  if (!error_.empty()) {
317    return *this;
318  }
319
320  // Where the new data will be appended to.
321  size_t new_data_index = str_.size();
322
323  const char* const end = str.end();
324  const char* start = str.begin();
325  const char* current = start;
326  while (current != end) {
327    if (last_char_was_escape_) {
328      switch (*current) {
329        case 't':
330          str_ += '\t';
331          break;
332        case 'n':
333          str_ += '\n';
334          break;
335        case '#':
336          str_ += '#';
337          break;
338        case '@':
339          str_ += '@';
340          break;
341        case '?':
342          str_ += '?';
343          break;
344        case '"':
345          str_ += '"';
346          break;
347        case '\'':
348          str_ += '\'';
349          break;
350        case '\\':
351          str_ += '\\';
352          break;
353        case 'u': {
354          current++;
355          Maybe<std::string> c = ParseUnicodeCodepoint(&current, end);
356          if (!c) {
357            error_ = "invalid unicode escape sequence";
358            return *this;
359          }
360          str_ += c.value();
361          current -= 1;
362          break;
363        }
364
365        default:
366          // Ignore.
367          break;
368      }
369      last_char_was_escape_ = false;
370      start = current + 1;
371    } else if (*current == '"') {
372      if (!quote_ && trailing_space_) {
373        // We found an opening quote, and we have
374        // trailing space, so we should append that
375        // space now.
376        if (trailing_space_) {
377          // We had trailing whitespace, so
378          // replace with a single space.
379          if (!str_.empty()) {
380            str_ += ' ';
381          }
382          trailing_space_ = false;
383        }
384      }
385      quote_ = !quote_;
386      str_.append(start, current - start);
387      start = current + 1;
388    } else if (*current == '\'' && !quote_) {
389      // This should be escaped.
390      error_ = "unescaped apostrophe";
391      return *this;
392    } else if (*current == '\\') {
393      // This is an escape sequence, convert to the real value.
394      if (!quote_ && trailing_space_) {
395        // We had trailing whitespace, so
396        // replace with a single space.
397        if (!str_.empty()) {
398          str_ += ' ';
399        }
400        trailing_space_ = false;
401      }
402      str_.append(start, current - start);
403      start = current + 1;
404      last_char_was_escape_ = true;
405    } else if (!quote_) {
406      // This is not quoted text, so look for whitespace.
407      if (isspace(*current)) {
408        // We found whitespace, see if we have seen some
409        // before.
410        if (!trailing_space_) {
411          // We didn't see a previous adjacent space,
412          // so mark that we did.
413          trailing_space_ = true;
414          str_.append(start, current - start);
415        }
416
417        // Keep skipping whitespace.
418        start = current + 1;
419      } else if (trailing_space_) {
420        // We saw trailing space before, so replace all
421        // that trailing space with one space.
422        if (!str_.empty()) {
423          str_ += ' ';
424        }
425        trailing_space_ = false;
426      }
427    }
428    current++;
429  }
430  str_.append(start, end - start);
431
432  // Accumulate the added string's UTF-16 length.
433  ssize_t len = utf8_to_utf16_length(
434      reinterpret_cast<const uint8_t*>(str_.data()) + new_data_index,
435      str_.size() - new_data_index);
436  if (len < 0) {
437    error_ = "invalid unicode code point";
438    return *this;
439  }
440  utf16_len_ += len;
441  return *this;
442}
443
444std::u16string Utf8ToUtf16(const StringPiece& utf8) {
445  ssize_t utf16_length = utf8_to_utf16_length(
446      reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length());
447  if (utf16_length <= 0) {
448    return {};
449  }
450
451  std::u16string utf16;
452  utf16.resize(utf16_length);
453  utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(),
454                &*utf16.begin(), utf16_length + 1);
455  return utf16;
456}
457
458std::string Utf16ToUtf8(const StringPiece16& utf16) {
459  ssize_t utf8_length = utf16_to_utf8_length(utf16.data(), utf16.length());
460  if (utf8_length <= 0) {
461    return {};
462  }
463
464  std::string utf8;
465  utf8.resize(utf8_length);
466  utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin(), utf8_length + 1);
467  return utf8;
468}
469
470bool WriteAll(std::ostream& out, const BigBuffer& buffer) {
471  for (const auto& b : buffer) {
472    if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) {
473      return false;
474    }
475  }
476  return true;
477}
478
479std::unique_ptr<uint8_t[]> Copy(const BigBuffer& buffer) {
480  std::unique_ptr<uint8_t[]> data =
481      std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
482  uint8_t* p = data.get();
483  for (const auto& block : buffer) {
484    memcpy(p, block.buffer.get(), block.size);
485    p += block.size;
486  }
487  return data;
488}
489
490typename Tokenizer::iterator& Tokenizer::iterator::operator++() {
491  const char* start = token_.end();
492  const char* end = str_.end();
493  if (start == end) {
494    end_ = true;
495    token_.assign(token_.end(), 0);
496    return *this;
497  }
498
499  start += 1;
500  const char* current = start;
501  while (current != end) {
502    if (*current == separator_) {
503      token_.assign(start, current - start);
504      return *this;
505    }
506    ++current;
507  }
508  token_.assign(start, end - start);
509  return *this;
510}
511
512bool Tokenizer::iterator::operator==(const iterator& rhs) const {
513  // We check equality here a bit differently.
514  // We need to know that the addresses are the same.
515  return token_.begin() == rhs.token_.begin() &&
516         token_.end() == rhs.token_.end() && end_ == rhs.end_;
517}
518
519bool Tokenizer::iterator::operator!=(const iterator& rhs) const {
520  return !(*this == rhs);
521}
522
523Tokenizer::iterator::iterator(StringPiece s, char sep, StringPiece tok,
524                              bool end)
525    : str_(s), separator_(sep), token_(tok), end_(end) {}
526
527Tokenizer::Tokenizer(StringPiece str, char sep)
528    : begin_(++iterator(str, sep, StringPiece(str.begin() - 1, 0), false)),
529      end_(str, sep, StringPiece(str.end(), 0), true) {}
530
531bool ExtractResFilePathParts(const StringPiece& path, StringPiece* out_prefix,
532                             StringPiece* out_entry, StringPiece* out_suffix) {
533  const StringPiece res_prefix("res/");
534  if (!StartsWith(path, res_prefix)) {
535    return false;
536  }
537
538  StringPiece::const_iterator last_occurence = path.end();
539  for (auto iter = path.begin() + res_prefix.size(); iter != path.end();
540       ++iter) {
541    if (*iter == '/') {
542      last_occurence = iter;
543    }
544  }
545
546  if (last_occurence == path.end()) {
547    return false;
548  }
549
550  auto iter = std::find(last_occurence, path.end(), '.');
551  *out_suffix = StringPiece(iter, path.end() - iter);
552  *out_entry = StringPiece(last_occurence + 1, iter - last_occurence - 1);
553  *out_prefix = StringPiece(path.begin(), last_occurence - path.begin() + 1);
554  return true;
555}
556
557StringPiece16 GetString16(const android::ResStringPool& pool, size_t idx) {
558  size_t len;
559  const char16_t* str = pool.stringAt(idx, &len);
560  if (str != nullptr) {
561    return StringPiece16(str, len);
562  }
563  return StringPiece16();
564}
565
566std::string GetString(const android::ResStringPool& pool, size_t idx) {
567  size_t len;
568  const char* str = pool.string8At(idx, &len);
569  if (str != nullptr) {
570    return std::string(str, len);
571  }
572  return Utf16ToUtf8(GetString16(pool, idx));
573}
574
575}  // namespace util
576}  // namespace aapt
577