1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "utils.h"
18
19#include <inttypes.h>
20#include <pthread.h>
21#include <sys/stat.h>
22#include <sys/syscall.h>
23#include <sys/types.h>
24#include <sys/wait.h>
25#include <unistd.h>
26#include <memory>
27
28#include "art_field-inl.h"
29#include "art_method-inl.h"
30#include "base/stl_util.h"
31#include "base/unix_file/fd_file.h"
32#include "dex_file-inl.h"
33#include "dex_instruction.h"
34#include "mirror/class-inl.h"
35#include "mirror/class_loader.h"
36#include "mirror/object-inl.h"
37#include "mirror/object_array-inl.h"
38#include "mirror/string.h"
39#include "oat_quick_method_header.h"
40#include "os.h"
41#include "scoped_thread_state_change.h"
42#include "utf-inl.h"
43
44#if defined(__APPLE__)
45#include "AvailabilityMacros.h"  // For MAC_OS_X_VERSION_MAX_ALLOWED
46#include <sys/syscall.h>
47#endif
48
49// For DumpNativeStack.
50#include <backtrace/Backtrace.h>
51#include <backtrace/BacktraceMap.h>
52
53#if defined(__linux__)
54#include <linux/unistd.h>
55#endif
56
57namespace art {
58
59#if defined(__linux__)
60static constexpr bool kUseAddr2line = !kIsTargetBuild;
61#endif
62
63pid_t GetTid() {
64#if defined(__APPLE__)
65  uint64_t owner;
66  CHECK_PTHREAD_CALL(pthread_threadid_np, (nullptr, &owner), __FUNCTION__);  // Requires Mac OS 10.6
67  return owner;
68#elif defined(__BIONIC__)
69  return gettid();
70#else
71  return syscall(__NR_gettid);
72#endif
73}
74
75std::string GetThreadName(pid_t tid) {
76  std::string result;
77  if (ReadFileToString(StringPrintf("/proc/self/task/%d/comm", tid), &result)) {
78    result.resize(result.size() - 1);  // Lose the trailing '\n'.
79  } else {
80    result = "<unknown>";
81  }
82  return result;
83}
84
85void GetThreadStack(pthread_t thread, void** stack_base, size_t* stack_size, size_t* guard_size) {
86#if defined(__APPLE__)
87  *stack_size = pthread_get_stacksize_np(thread);
88  void* stack_addr = pthread_get_stackaddr_np(thread);
89
90  // Check whether stack_addr is the base or end of the stack.
91  // (On Mac OS 10.7, it's the end.)
92  int stack_variable;
93  if (stack_addr > &stack_variable) {
94    *stack_base = reinterpret_cast<uint8_t*>(stack_addr) - *stack_size;
95  } else {
96    *stack_base = stack_addr;
97  }
98
99  // This is wrong, but there doesn't seem to be a way to get the actual value on the Mac.
100  pthread_attr_t attributes;
101  CHECK_PTHREAD_CALL(pthread_attr_init, (&attributes), __FUNCTION__);
102  CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, guard_size), __FUNCTION__);
103  CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), __FUNCTION__);
104#else
105  pthread_attr_t attributes;
106  CHECK_PTHREAD_CALL(pthread_getattr_np, (thread, &attributes), __FUNCTION__);
107  CHECK_PTHREAD_CALL(pthread_attr_getstack, (&attributes, stack_base, stack_size), __FUNCTION__);
108  CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, guard_size), __FUNCTION__);
109  CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), __FUNCTION__);
110
111#if defined(__GLIBC__)
112  // If we're the main thread, check whether we were run with an unlimited stack. In that case,
113  // glibc will have reported a 2GB stack for our 32-bit process, and our stack overflow detection
114  // will be broken because we'll die long before we get close to 2GB.
115  bool is_main_thread = (::art::GetTid() == getpid());
116  if (is_main_thread) {
117    rlimit stack_limit;
118    if (getrlimit(RLIMIT_STACK, &stack_limit) == -1) {
119      PLOG(FATAL) << "getrlimit(RLIMIT_STACK) failed";
120    }
121    if (stack_limit.rlim_cur == RLIM_INFINITY) {
122      size_t old_stack_size = *stack_size;
123
124      // Use the kernel default limit as our size, and adjust the base to match.
125      *stack_size = 8 * MB;
126      *stack_base = reinterpret_cast<uint8_t*>(*stack_base) + (old_stack_size - *stack_size);
127
128      VLOG(threads) << "Limiting unlimited stack (reported as " << PrettySize(old_stack_size) << ")"
129                    << " to " << PrettySize(*stack_size)
130                    << " with base " << *stack_base;
131    }
132  }
133#endif
134
135#endif
136}
137
138bool ReadFileToString(const std::string& file_name, std::string* result) {
139  File file;
140  if (!file.Open(file_name, O_RDONLY)) {
141    return false;
142  }
143
144  std::vector<char> buf(8 * KB);
145  while (true) {
146    int64_t n = TEMP_FAILURE_RETRY(read(file.Fd(), &buf[0], buf.size()));
147    if (n == -1) {
148      return false;
149    }
150    if (n == 0) {
151      return true;
152    }
153    result->append(&buf[0], n);
154  }
155}
156
157bool PrintFileToLog(const std::string& file_name, LogSeverity level) {
158  File file;
159  if (!file.Open(file_name, O_RDONLY)) {
160    return false;
161  }
162
163  constexpr size_t kBufSize = 256;  // Small buffer. Avoid stack overflow and stack size warnings.
164  char buf[kBufSize + 1];           // +1 for terminator.
165  size_t filled_to = 0;
166  while (true) {
167    DCHECK_LT(filled_to, kBufSize);
168    int64_t n = TEMP_FAILURE_RETRY(read(file.Fd(), &buf[filled_to], kBufSize - filled_to));
169    if (n <= 0) {
170      // Print the rest of the buffer, if it exists.
171      if (filled_to > 0) {
172        buf[filled_to] = 0;
173        LOG(level) << buf;
174      }
175      return n == 0;
176    }
177    // Scan for '\n'.
178    size_t i = filled_to;
179    bool found_newline = false;
180    for (; i < filled_to + n; ++i) {
181      if (buf[i] == '\n') {
182        // Found a line break, that's something to print now.
183        buf[i] = 0;
184        LOG(level) << buf;
185        // Copy the rest to the front.
186        if (i + 1 < filled_to + n) {
187          memmove(&buf[0], &buf[i + 1], filled_to + n - i - 1);
188          filled_to = filled_to + n - i - 1;
189        } else {
190          filled_to = 0;
191        }
192        found_newline = true;
193        break;
194      }
195    }
196    if (found_newline) {
197      continue;
198    } else {
199      filled_to += n;
200      // Check if we must flush now.
201      if (filled_to == kBufSize) {
202        buf[kBufSize] = 0;
203        LOG(level) << buf;
204        filled_to = 0;
205      }
206    }
207  }
208}
209
210std::string PrettyDescriptor(mirror::String* java_descriptor) {
211  if (java_descriptor == nullptr) {
212    return "null";
213  }
214  return PrettyDescriptor(java_descriptor->ToModifiedUtf8().c_str());
215}
216
217std::string PrettyDescriptor(mirror::Class* klass) {
218  if (klass == nullptr) {
219    return "null";
220  }
221  std::string temp;
222  return PrettyDescriptor(klass->GetDescriptor(&temp));
223}
224
225std::string PrettyDescriptor(const char* descriptor) {
226  // Count the number of '['s to get the dimensionality.
227  const char* c = descriptor;
228  size_t dim = 0;
229  while (*c == '[') {
230    dim++;
231    c++;
232  }
233
234  // Reference or primitive?
235  if (*c == 'L') {
236    // "[[La/b/C;" -> "a.b.C[][]".
237    c++;  // Skip the 'L'.
238  } else {
239    // "[[B" -> "byte[][]".
240    // To make life easier, we make primitives look like unqualified
241    // reference types.
242    switch (*c) {
243    case 'B': c = "byte;"; break;
244    case 'C': c = "char;"; break;
245    case 'D': c = "double;"; break;
246    case 'F': c = "float;"; break;
247    case 'I': c = "int;"; break;
248    case 'J': c = "long;"; break;
249    case 'S': c = "short;"; break;
250    case 'Z': c = "boolean;"; break;
251    case 'V': c = "void;"; break;  // Used when decoding return types.
252    default: return descriptor;
253    }
254  }
255
256  // At this point, 'c' is a string of the form "fully/qualified/Type;"
257  // or "primitive;". Rewrite the type with '.' instead of '/':
258  std::string result;
259  const char* p = c;
260  while (*p != ';') {
261    char ch = *p++;
262    if (ch == '/') {
263      ch = '.';
264    }
265    result.push_back(ch);
266  }
267  // ...and replace the semicolon with 'dim' "[]" pairs:
268  for (size_t i = 0; i < dim; ++i) {
269    result += "[]";
270  }
271  return result;
272}
273
274std::string PrettyField(ArtField* f, bool with_type) {
275  if (f == nullptr) {
276    return "null";
277  }
278  std::string result;
279  if (with_type) {
280    result += PrettyDescriptor(f->GetTypeDescriptor());
281    result += ' ';
282  }
283  std::string temp;
284  result += PrettyDescriptor(f->GetDeclaringClass()->GetDescriptor(&temp));
285  result += '.';
286  result += f->GetName();
287  return result;
288}
289
290std::string PrettyField(uint32_t field_idx, const DexFile& dex_file, bool with_type) {
291  if (field_idx >= dex_file.NumFieldIds()) {
292    return StringPrintf("<<invalid-field-idx-%d>>", field_idx);
293  }
294  const DexFile::FieldId& field_id = dex_file.GetFieldId(field_idx);
295  std::string result;
296  if (with_type) {
297    result += dex_file.GetFieldTypeDescriptor(field_id);
298    result += ' ';
299  }
300  result += PrettyDescriptor(dex_file.GetFieldDeclaringClassDescriptor(field_id));
301  result += '.';
302  result += dex_file.GetFieldName(field_id);
303  return result;
304}
305
306std::string PrettyType(uint32_t type_idx, const DexFile& dex_file) {
307  if (type_idx >= dex_file.NumTypeIds()) {
308    return StringPrintf("<<invalid-type-idx-%d>>", type_idx);
309  }
310  const DexFile::TypeId& type_id = dex_file.GetTypeId(type_idx);
311  return PrettyDescriptor(dex_file.GetTypeDescriptor(type_id));
312}
313
314std::string PrettyArguments(const char* signature) {
315  std::string result;
316  result += '(';
317  CHECK_EQ(*signature, '(');
318  ++signature;  // Skip the '('.
319  while (*signature != ')') {
320    size_t argument_length = 0;
321    while (signature[argument_length] == '[') {
322      ++argument_length;
323    }
324    if (signature[argument_length] == 'L') {
325      argument_length = (strchr(signature, ';') - signature + 1);
326    } else {
327      ++argument_length;
328    }
329    {
330      std::string argument_descriptor(signature, argument_length);
331      result += PrettyDescriptor(argument_descriptor.c_str());
332    }
333    if (signature[argument_length] != ')') {
334      result += ", ";
335    }
336    signature += argument_length;
337  }
338  CHECK_EQ(*signature, ')');
339  ++signature;  // Skip the ')'.
340  result += ')';
341  return result;
342}
343
344std::string PrettyReturnType(const char* signature) {
345  const char* return_type = strchr(signature, ')');
346  CHECK(return_type != nullptr);
347  ++return_type;  // Skip ')'.
348  return PrettyDescriptor(return_type);
349}
350
351std::string PrettyMethod(ArtMethod* m, bool with_signature) {
352  if (m == nullptr) {
353    return "null";
354  }
355  if (!m->IsRuntimeMethod()) {
356    m = m->GetInterfaceMethodIfProxy(Runtime::Current()->GetClassLinker()->GetImagePointerSize());
357  }
358  std::string result(PrettyDescriptor(m->GetDeclaringClassDescriptor()));
359  result += '.';
360  result += m->GetName();
361  if (UNLIKELY(m->IsFastNative())) {
362    result += "!";
363  }
364  if (with_signature) {
365    const Signature signature = m->GetSignature();
366    std::string sig_as_string(signature.ToString());
367    if (signature == Signature::NoSignature()) {
368      return result + sig_as_string;
369    }
370    result = PrettyReturnType(sig_as_string.c_str()) + " " + result +
371        PrettyArguments(sig_as_string.c_str());
372  }
373  return result;
374}
375
376std::string PrettyMethod(uint32_t method_idx, const DexFile& dex_file, bool with_signature) {
377  if (method_idx >= dex_file.NumMethodIds()) {
378    return StringPrintf("<<invalid-method-idx-%d>>", method_idx);
379  }
380  const DexFile::MethodId& method_id = dex_file.GetMethodId(method_idx);
381  std::string result(PrettyDescriptor(dex_file.GetMethodDeclaringClassDescriptor(method_id)));
382  result += '.';
383  result += dex_file.GetMethodName(method_id);
384  if (with_signature) {
385    const Signature signature = dex_file.GetMethodSignature(method_id);
386    std::string sig_as_string(signature.ToString());
387    if (signature == Signature::NoSignature()) {
388      return result + sig_as_string;
389    }
390    result = PrettyReturnType(sig_as_string.c_str()) + " " + result +
391        PrettyArguments(sig_as_string.c_str());
392  }
393  return result;
394}
395
396std::string PrettyTypeOf(mirror::Object* obj) {
397  if (obj == nullptr) {
398    return "null";
399  }
400  if (obj->GetClass() == nullptr) {
401    return "(raw)";
402  }
403  std::string temp;
404  std::string result(PrettyDescriptor(obj->GetClass()->GetDescriptor(&temp)));
405  if (obj->IsClass()) {
406    result += "<" + PrettyDescriptor(obj->AsClass()->GetDescriptor(&temp)) + ">";
407  }
408  return result;
409}
410
411std::string PrettyClass(mirror::Class* c) {
412  if (c == nullptr) {
413    return "null";
414  }
415  std::string result;
416  result += "java.lang.Class<";
417  result += PrettyDescriptor(c);
418  result += ">";
419  return result;
420}
421
422std::string PrettyClassAndClassLoader(mirror::Class* c) {
423  if (c == nullptr) {
424    return "null";
425  }
426  std::string result;
427  result += "java.lang.Class<";
428  result += PrettyDescriptor(c);
429  result += ",";
430  result += PrettyTypeOf(c->GetClassLoader());
431  // TODO: add an identifying hash value for the loader
432  result += ">";
433  return result;
434}
435
436std::string PrettyJavaAccessFlags(uint32_t access_flags) {
437  std::string result;
438  if ((access_flags & kAccPublic) != 0) {
439    result += "public ";
440  }
441  if ((access_flags & kAccProtected) != 0) {
442    result += "protected ";
443  }
444  if ((access_flags & kAccPrivate) != 0) {
445    result += "private ";
446  }
447  if ((access_flags & kAccFinal) != 0) {
448    result += "final ";
449  }
450  if ((access_flags & kAccStatic) != 0) {
451    result += "static ";
452  }
453  if ((access_flags & kAccTransient) != 0) {
454    result += "transient ";
455  }
456  if ((access_flags & kAccVolatile) != 0) {
457    result += "volatile ";
458  }
459  if ((access_flags & kAccSynchronized) != 0) {
460    result += "synchronized ";
461  }
462  return result;
463}
464
465std::string PrettySize(int64_t byte_count) {
466  // The byte thresholds at which we display amounts.  A byte count is displayed
467  // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
468  static const int64_t kUnitThresholds[] = {
469    0,              // B up to...
470    3*1024,         // KB up to...
471    2*1024*1024,    // MB up to...
472    1024*1024*1024  // GB from here.
473  };
474  static const int64_t kBytesPerUnit[] = { 1, KB, MB, GB };
475  static const char* const kUnitStrings[] = { "B", "KB", "MB", "GB" };
476  const char* negative_str = "";
477  if (byte_count < 0) {
478    negative_str = "-";
479    byte_count = -byte_count;
480  }
481  int i = arraysize(kUnitThresholds);
482  while (--i > 0) {
483    if (byte_count >= kUnitThresholds[i]) {
484      break;
485    }
486  }
487  return StringPrintf("%s%" PRId64 "%s",
488                      negative_str, byte_count / kBytesPerUnit[i], kUnitStrings[i]);
489}
490
491std::string PrintableChar(uint16_t ch) {
492  std::string result;
493  result += '\'';
494  if (NeedsEscaping(ch)) {
495    StringAppendF(&result, "\\u%04x", ch);
496  } else {
497    result += ch;
498  }
499  result += '\'';
500  return result;
501}
502
503std::string PrintableString(const char* utf) {
504  std::string result;
505  result += '"';
506  const char* p = utf;
507  size_t char_count = CountModifiedUtf8Chars(p);
508  for (size_t i = 0; i < char_count; ++i) {
509    uint32_t ch = GetUtf16FromUtf8(&p);
510    if (ch == '\\') {
511      result += "\\\\";
512    } else if (ch == '\n') {
513      result += "\\n";
514    } else if (ch == '\r') {
515      result += "\\r";
516    } else if (ch == '\t') {
517      result += "\\t";
518    } else {
519      const uint16_t leading = GetLeadingUtf16Char(ch);
520
521      if (NeedsEscaping(leading)) {
522        StringAppendF(&result, "\\u%04x", leading);
523      } else {
524        result += leading;
525      }
526
527      const uint32_t trailing = GetTrailingUtf16Char(ch);
528      if (trailing != 0) {
529        // All high surrogates will need escaping.
530        StringAppendF(&result, "\\u%04x", trailing);
531      }
532    }
533  }
534  result += '"';
535  return result;
536}
537
538// See http://java.sun.com/j2se/1.5.0/docs/guide/jni/spec/design.html#wp615 for the full rules.
539std::string MangleForJni(const std::string& s) {
540  std::string result;
541  size_t char_count = CountModifiedUtf8Chars(s.c_str());
542  const char* cp = &s[0];
543  for (size_t i = 0; i < char_count; ++i) {
544    uint32_t ch = GetUtf16FromUtf8(&cp);
545    if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')) {
546      result.push_back(ch);
547    } else if (ch == '.' || ch == '/') {
548      result += "_";
549    } else if (ch == '_') {
550      result += "_1";
551    } else if (ch == ';') {
552      result += "_2";
553    } else if (ch == '[') {
554      result += "_3";
555    } else {
556      const uint16_t leading = GetLeadingUtf16Char(ch);
557      const uint32_t trailing = GetTrailingUtf16Char(ch);
558
559      StringAppendF(&result, "_0%04x", leading);
560      if (trailing != 0) {
561        StringAppendF(&result, "_0%04x", trailing);
562      }
563    }
564  }
565  return result;
566}
567
568std::string DotToDescriptor(const char* class_name) {
569  std::string descriptor(class_name);
570  std::replace(descriptor.begin(), descriptor.end(), '.', '/');
571  if (descriptor.length() > 0 && descriptor[0] != '[') {
572    descriptor = "L" + descriptor + ";";
573  }
574  return descriptor;
575}
576
577std::string DescriptorToDot(const char* descriptor) {
578  size_t length = strlen(descriptor);
579  if (length > 1) {
580    if (descriptor[0] == 'L' && descriptor[length - 1] == ';') {
581      // Descriptors have the leading 'L' and trailing ';' stripped.
582      std::string result(descriptor + 1, length - 2);
583      std::replace(result.begin(), result.end(), '/', '.');
584      return result;
585    } else {
586      // For arrays the 'L' and ';' remain intact.
587      std::string result(descriptor);
588      std::replace(result.begin(), result.end(), '/', '.');
589      return result;
590    }
591  }
592  // Do nothing for non-class/array descriptors.
593  return descriptor;
594}
595
596std::string DescriptorToName(const char* descriptor) {
597  size_t length = strlen(descriptor);
598  if (descriptor[0] == 'L' && descriptor[length - 1] == ';') {
599    std::string result(descriptor + 1, length - 2);
600    return result;
601  }
602  return descriptor;
603}
604
605std::string JniShortName(ArtMethod* m) {
606  std::string class_name(m->GetDeclaringClassDescriptor());
607  // Remove the leading 'L' and trailing ';'...
608  CHECK_EQ(class_name[0], 'L') << class_name;
609  CHECK_EQ(class_name[class_name.size() - 1], ';') << class_name;
610  class_name.erase(0, 1);
611  class_name.erase(class_name.size() - 1, 1);
612
613  std::string method_name(m->GetName());
614
615  std::string short_name;
616  short_name += "Java_";
617  short_name += MangleForJni(class_name);
618  short_name += "_";
619  short_name += MangleForJni(method_name);
620  return short_name;
621}
622
623std::string JniLongName(ArtMethod* m) {
624  std::string long_name;
625  long_name += JniShortName(m);
626  long_name += "__";
627
628  std::string signature(m->GetSignature().ToString());
629  signature.erase(0, 1);
630  signature.erase(signature.begin() + signature.find(')'), signature.end());
631
632  long_name += MangleForJni(signature);
633
634  return long_name;
635}
636
637// Helper for IsValidPartOfMemberNameUtf8(), a bit vector indicating valid low ascii.
638uint32_t DEX_MEMBER_VALID_LOW_ASCII[4] = {
639  0x00000000,  // 00..1f low control characters; nothing valid
640  0x03ff2010,  // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
641  0x87fffffe,  // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
642  0x07fffffe   // 60..7f lowercase etc.; valid: 'a'..'z'
643};
644
645// Helper for IsValidPartOfMemberNameUtf8(); do not call directly.
646bool IsValidPartOfMemberNameUtf8Slow(const char** pUtf8Ptr) {
647  /*
648   * It's a multibyte encoded character. Decode it and analyze. We
649   * accept anything that isn't (a) an improperly encoded low value,
650   * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
651   * control character, or (e) a high space, layout, or special
652   * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
653   * U+fff0..U+ffff). This is all specified in the dex format
654   * document.
655   */
656
657  const uint32_t pair = GetUtf16FromUtf8(pUtf8Ptr);
658  const uint16_t leading = GetLeadingUtf16Char(pair);
659
660  // We have a surrogate pair resulting from a valid 4 byte UTF sequence.
661  // No further checks are necessary because 4 byte sequences span code
662  // points [U+10000, U+1FFFFF], which are valid codepoints in a dex
663  // identifier. Furthermore, GetUtf16FromUtf8 guarantees that each of
664  // the surrogate halves are valid and well formed in this instance.
665  if (GetTrailingUtf16Char(pair) != 0) {
666    return true;
667  }
668
669
670  // We've encountered a one, two or three byte UTF-8 sequence. The
671  // three byte UTF-8 sequence could be one half of a surrogate pair.
672  switch (leading >> 8) {
673    case 0x00:
674      // It's only valid if it's above the ISO-8859-1 high space (0xa0).
675      return (leading > 0x00a0);
676    case 0xd8:
677    case 0xd9:
678    case 0xda:
679    case 0xdb:
680      {
681        // We found a three byte sequence encoding one half of a surrogate.
682        // Look for the other half.
683        const uint32_t pair2 = GetUtf16FromUtf8(pUtf8Ptr);
684        const uint16_t trailing = GetLeadingUtf16Char(pair2);
685
686        return (GetTrailingUtf16Char(pair2) == 0) && (0xdc00 <= trailing && trailing <= 0xdfff);
687      }
688    case 0xdc:
689    case 0xdd:
690    case 0xde:
691    case 0xdf:
692      // It's a trailing surrogate, which is not valid at this point.
693      return false;
694    case 0x20:
695    case 0xff:
696      // It's in the range that has spaces, controls, and specials.
697      switch (leading & 0xfff8) {
698        case 0x2000:
699        case 0x2008:
700        case 0x2028:
701        case 0xfff0:
702        case 0xfff8:
703          return false;
704      }
705      return true;
706    default:
707      return true;
708  }
709
710  UNREACHABLE();
711}
712
713/* Return whether the pointed-at modified-UTF-8 encoded character is
714 * valid as part of a member name, updating the pointer to point past
715 * the consumed character. This will consume two encoded UTF-16 code
716 * points if the character is encoded as a surrogate pair. Also, if
717 * this function returns false, then the given pointer may only have
718 * been partially advanced.
719 */
720static bool IsValidPartOfMemberNameUtf8(const char** pUtf8Ptr) {
721  uint8_t c = (uint8_t) **pUtf8Ptr;
722  if (LIKELY(c <= 0x7f)) {
723    // It's low-ascii, so check the table.
724    uint32_t wordIdx = c >> 5;
725    uint32_t bitIdx = c & 0x1f;
726    (*pUtf8Ptr)++;
727    return (DEX_MEMBER_VALID_LOW_ASCII[wordIdx] & (1 << bitIdx)) != 0;
728  }
729
730  // It's a multibyte encoded character. Call a non-inline function
731  // for the heavy lifting.
732  return IsValidPartOfMemberNameUtf8Slow(pUtf8Ptr);
733}
734
735bool IsValidMemberName(const char* s) {
736  bool angle_name = false;
737
738  switch (*s) {
739    case '\0':
740      // The empty string is not a valid name.
741      return false;
742    case '<':
743      angle_name = true;
744      s++;
745      break;
746  }
747
748  while (true) {
749    switch (*s) {
750      case '\0':
751        return !angle_name;
752      case '>':
753        return angle_name && s[1] == '\0';
754    }
755
756    if (!IsValidPartOfMemberNameUtf8(&s)) {
757      return false;
758    }
759  }
760}
761
762enum ClassNameType { kName, kDescriptor };
763template<ClassNameType kType, char kSeparator>
764static bool IsValidClassName(const char* s) {
765  int arrayCount = 0;
766  while (*s == '[') {
767    arrayCount++;
768    s++;
769  }
770
771  if (arrayCount > 255) {
772    // Arrays may have no more than 255 dimensions.
773    return false;
774  }
775
776  ClassNameType type = kType;
777  if (type != kDescriptor && arrayCount != 0) {
778    /*
779     * If we're looking at an array of some sort, then it doesn't
780     * matter if what is being asked for is a class name; the
781     * format looks the same as a type descriptor in that case, so
782     * treat it as such.
783     */
784    type = kDescriptor;
785  }
786
787  if (type == kDescriptor) {
788    /*
789     * We are looking for a descriptor. Either validate it as a
790     * single-character primitive type, or continue on to check the
791     * embedded class name (bracketed by "L" and ";").
792     */
793    switch (*(s++)) {
794    case 'B':
795    case 'C':
796    case 'D':
797    case 'F':
798    case 'I':
799    case 'J':
800    case 'S':
801    case 'Z':
802      // These are all single-character descriptors for primitive types.
803      return (*s == '\0');
804    case 'V':
805      // Non-array void is valid, but you can't have an array of void.
806      return (arrayCount == 0) && (*s == '\0');
807    case 'L':
808      // Class name: Break out and continue below.
809      break;
810    default:
811      // Oddball descriptor character.
812      return false;
813    }
814  }
815
816  /*
817   * We just consumed the 'L' that introduces a class name as part
818   * of a type descriptor, or we are looking for an unadorned class
819   * name.
820   */
821
822  bool sepOrFirst = true;  // first character or just encountered a separator.
823  for (;;) {
824    uint8_t c = (uint8_t) *s;
825    switch (c) {
826    case '\0':
827      /*
828       * Premature end for a type descriptor, but valid for
829       * a class name as long as we haven't encountered an
830       * empty component (including the degenerate case of
831       * the empty string "").
832       */
833      return (type == kName) && !sepOrFirst;
834    case ';':
835      /*
836       * Invalid character for a class name, but the
837       * legitimate end of a type descriptor. In the latter
838       * case, make sure that this is the end of the string
839       * and that it doesn't end with an empty component
840       * (including the degenerate case of "L;").
841       */
842      return (type == kDescriptor) && !sepOrFirst && (s[1] == '\0');
843    case '/':
844    case '.':
845      if (c != kSeparator) {
846        // The wrong separator character.
847        return false;
848      }
849      if (sepOrFirst) {
850        // Separator at start or two separators in a row.
851        return false;
852      }
853      sepOrFirst = true;
854      s++;
855      break;
856    default:
857      if (!IsValidPartOfMemberNameUtf8(&s)) {
858        return false;
859      }
860      sepOrFirst = false;
861      break;
862    }
863  }
864}
865
866bool IsValidBinaryClassName(const char* s) {
867  return IsValidClassName<kName, '.'>(s);
868}
869
870bool IsValidJniClassName(const char* s) {
871  return IsValidClassName<kName, '/'>(s);
872}
873
874bool IsValidDescriptor(const char* s) {
875  return IsValidClassName<kDescriptor, '/'>(s);
876}
877
878void Split(const std::string& s, char separator, std::vector<std::string>* result) {
879  const char* p = s.data();
880  const char* end = p + s.size();
881  while (p != end) {
882    if (*p == separator) {
883      ++p;
884    } else {
885      const char* start = p;
886      while (++p != end && *p != separator) {
887        // Skip to the next occurrence of the separator.
888      }
889      result->push_back(std::string(start, p - start));
890    }
891  }
892}
893
894std::string Trim(const std::string& s) {
895  std::string result;
896  unsigned int start_index = 0;
897  unsigned int end_index = s.size() - 1;
898
899  // Skip initial whitespace.
900  while (start_index < s.size()) {
901    if (!isspace(s[start_index])) {
902      break;
903    }
904    start_index++;
905  }
906
907  // Skip terminating whitespace.
908  while (end_index >= start_index) {
909    if (!isspace(s[end_index])) {
910      break;
911    }
912    end_index--;
913  }
914
915  // All spaces, no beef.
916  if (end_index < start_index) {
917    return "";
918  }
919  // Start_index is the first non-space, end_index is the last one.
920  return s.substr(start_index, end_index - start_index + 1);
921}
922
923template <typename StringT>
924std::string Join(const std::vector<StringT>& strings, char separator) {
925  if (strings.empty()) {
926    return "";
927  }
928
929  std::string result(strings[0]);
930  for (size_t i = 1; i < strings.size(); ++i) {
931    result += separator;
932    result += strings[i];
933  }
934  return result;
935}
936
937// Explicit instantiations.
938template std::string Join<std::string>(const std::vector<std::string>& strings, char separator);
939template std::string Join<const char*>(const std::vector<const char*>& strings, char separator);
940
941bool StartsWith(const std::string& s, const char* prefix) {
942  return s.compare(0, strlen(prefix), prefix) == 0;
943}
944
945bool EndsWith(const std::string& s, const char* suffix) {
946  size_t suffix_length = strlen(suffix);
947  size_t string_length = s.size();
948  if (suffix_length > string_length) {
949    return false;
950  }
951  size_t offset = string_length - suffix_length;
952  return s.compare(offset, suffix_length, suffix) == 0;
953}
954
955void SetThreadName(const char* thread_name) {
956  int hasAt = 0;
957  int hasDot = 0;
958  const char* s = thread_name;
959  while (*s) {
960    if (*s == '.') {
961      hasDot = 1;
962    } else if (*s == '@') {
963      hasAt = 1;
964    }
965    s++;
966  }
967  int len = s - thread_name;
968  if (len < 15 || hasAt || !hasDot) {
969    s = thread_name;
970  } else {
971    s = thread_name + len - 15;
972  }
973#if defined(__linux__)
974  // pthread_setname_np fails rather than truncating long strings.
975  char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded in the kernel.
976  strncpy(buf, s, sizeof(buf)-1);
977  buf[sizeof(buf)-1] = '\0';
978  errno = pthread_setname_np(pthread_self(), buf);
979  if (errno != 0) {
980    PLOG(WARNING) << "Unable to set the name of current thread to '" << buf << "'";
981  }
982#else  // __APPLE__
983  pthread_setname_np(thread_name);
984#endif
985}
986
987void GetTaskStats(pid_t tid, char* state, int* utime, int* stime, int* task_cpu) {
988  *utime = *stime = *task_cpu = 0;
989  std::string stats;
990  if (!ReadFileToString(StringPrintf("/proc/self/task/%d/stat", tid), &stats)) {
991    return;
992  }
993  // Skip the command, which may contain spaces.
994  stats = stats.substr(stats.find(')') + 2);
995  // Extract the three fields we care about.
996  std::vector<std::string> fields;
997  Split(stats, ' ', &fields);
998  *state = fields[0][0];
999  *utime = strtoull(fields[11].c_str(), nullptr, 10);
1000  *stime = strtoull(fields[12].c_str(), nullptr, 10);
1001  *task_cpu = strtoull(fields[36].c_str(), nullptr, 10);
1002}
1003
1004std::string GetSchedulerGroupName(pid_t tid) {
1005  // /proc/<pid>/cgroup looks like this:
1006  // 2:devices:/
1007  // 1:cpuacct,cpu:/
1008  // We want the third field from the line whose second field contains the "cpu" token.
1009  std::string cgroup_file;
1010  if (!ReadFileToString(StringPrintf("/proc/self/task/%d/cgroup", tid), &cgroup_file)) {
1011    return "";
1012  }
1013  std::vector<std::string> cgroup_lines;
1014  Split(cgroup_file, '\n', &cgroup_lines);
1015  for (size_t i = 0; i < cgroup_lines.size(); ++i) {
1016    std::vector<std::string> cgroup_fields;
1017    Split(cgroup_lines[i], ':', &cgroup_fields);
1018    std::vector<std::string> cgroups;
1019    Split(cgroup_fields[1], ',', &cgroups);
1020    for (size_t j = 0; j < cgroups.size(); ++j) {
1021      if (cgroups[j] == "cpu") {
1022        return cgroup_fields[2].substr(1);  // Skip the leading slash.
1023      }
1024    }
1025  }
1026  return "";
1027}
1028
1029#if defined(__linux__)
1030
1031ALWAYS_INLINE
1032static inline void WritePrefix(std::ostream* os, const char* prefix, bool odd) {
1033  if (prefix != nullptr) {
1034    *os << prefix;
1035  }
1036  *os << "  ";
1037  if (!odd) {
1038    *os << " ";
1039  }
1040}
1041
1042static bool RunCommand(std::string cmd, std::ostream* os, const char* prefix) {
1043  FILE* stream = popen(cmd.c_str(), "r");
1044  if (stream) {
1045    if (os != nullptr) {
1046      bool odd_line = true;               // We indent them differently.
1047      bool wrote_prefix = false;          // Have we already written a prefix?
1048      constexpr size_t kMaxBuffer = 128;  // Relatively small buffer. Should be OK as we're on an
1049                                          // alt stack, but just to be sure...
1050      char buffer[kMaxBuffer];
1051      while (!feof(stream)) {
1052        if (fgets(buffer, kMaxBuffer, stream) != nullptr) {
1053          // Split on newlines.
1054          char* tmp = buffer;
1055          for (;;) {
1056            char* new_line = strchr(tmp, '\n');
1057            if (new_line == nullptr) {
1058              // Print the rest.
1059              if (*tmp != 0) {
1060                if (!wrote_prefix) {
1061                  WritePrefix(os, prefix, odd_line);
1062                }
1063                wrote_prefix = true;
1064                *os << tmp;
1065              }
1066              break;
1067            }
1068            if (!wrote_prefix) {
1069              WritePrefix(os, prefix, odd_line);
1070            }
1071            char saved = *(new_line + 1);
1072            *(new_line + 1) = 0;
1073            *os << tmp;
1074            *(new_line + 1) = saved;
1075            tmp = new_line + 1;
1076            odd_line = !odd_line;
1077            wrote_prefix = false;
1078          }
1079        }
1080      }
1081    }
1082    pclose(stream);
1083    return true;
1084  } else {
1085    return false;
1086  }
1087}
1088
1089static void Addr2line(const std::string& map_src, uintptr_t offset, std::ostream& os,
1090                      const char* prefix) {
1091  std::string cmdline(StringPrintf("addr2line --functions --inlines --demangle -e %s %zx",
1092                                   map_src.c_str(), offset));
1093  RunCommand(cmdline.c_str(), &os, prefix);
1094}
1095
1096static bool PcIsWithinQuickCode(ArtMethod* method, uintptr_t pc) NO_THREAD_SAFETY_ANALYSIS {
1097  uintptr_t code = reinterpret_cast<uintptr_t>(EntryPointToCodePointer(
1098      method->GetEntryPointFromQuickCompiledCode()));
1099  if (code == 0) {
1100    return pc == 0;
1101  }
1102  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
1103  return code <= pc && pc <= (code + code_size);
1104}
1105#endif
1106
1107void DumpNativeStack(std::ostream& os, pid_t tid, BacktraceMap* existing_map, const char* prefix,
1108    ArtMethod* current_method, void* ucontext_ptr) {
1109#if __linux__
1110  // b/18119146
1111  if (RUNNING_ON_MEMORY_TOOL != 0) {
1112    return;
1113  }
1114
1115  BacktraceMap* map = existing_map;
1116  std::unique_ptr<BacktraceMap> tmp_map;
1117  if (map == nullptr) {
1118    tmp_map.reset(BacktraceMap::Create(getpid()));
1119    map = tmp_map.get();
1120  }
1121  std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
1122  if (!backtrace->Unwind(0, reinterpret_cast<ucontext*>(ucontext_ptr))) {
1123    os << prefix << "(backtrace::Unwind failed for thread " << tid
1124       << ": " <<  backtrace->GetErrorString(backtrace->GetError()) << ")\n";
1125    return;
1126  } else if (backtrace->NumFrames() == 0) {
1127    os << prefix << "(no native stack frames for thread " << tid << ")\n";
1128    return;
1129  }
1130
1131  // Check whether we have and should use addr2line.
1132  bool use_addr2line;
1133  if (kUseAddr2line) {
1134    // Try to run it to see whether we have it. Push an argument so that it doesn't assume a.out
1135    // and print to stderr.
1136    use_addr2line = (gAborting > 0) && RunCommand("addr2line -h", nullptr, nullptr);
1137  } else {
1138    use_addr2line = false;
1139  }
1140
1141  for (Backtrace::const_iterator it = backtrace->begin();
1142       it != backtrace->end(); ++it) {
1143    // We produce output like this:
1144    // ]    #00 pc 000075bb8  /system/lib/libc.so (unwind_backtrace_thread+536)
1145    // In order for parsing tools to continue to function, the stack dump
1146    // format must at least adhere to this format:
1147    //  #XX pc <RELATIVE_ADDR>  <FULL_PATH_TO_SHARED_LIBRARY> ...
1148    // The parsers require a single space before and after pc, and two spaces
1149    // after the <RELATIVE_ADDR>. There can be any prefix data before the
1150    // #XX. <RELATIVE_ADDR> has to be a hex number but with no 0x prefix.
1151    os << prefix << StringPrintf("#%02zu pc ", it->num);
1152    bool try_addr2line = false;
1153    if (!BacktraceMap::IsValid(it->map)) {
1154      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  ???"
1155                                                            : "%08" PRIxPTR "  ???",
1156                         it->pc);
1157    } else {
1158      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  "
1159                                                            : "%08" PRIxPTR "  ",
1160                         BacktraceMap::GetRelativePc(it->map, it->pc));
1161      os << it->map.name;
1162      os << " (";
1163      if (!it->func_name.empty()) {
1164        os << it->func_name;
1165        if (it->func_offset != 0) {
1166          os << "+" << it->func_offset;
1167        }
1168        try_addr2line = true;
1169      } else if (current_method != nullptr &&
1170          Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
1171          PcIsWithinQuickCode(current_method, it->pc)) {
1172        const void* start_of_code = current_method->GetEntryPointFromQuickCompiledCode();
1173        os << JniLongName(current_method) << "+"
1174           << (it->pc - reinterpret_cast<uintptr_t>(start_of_code));
1175      } else {
1176        os << "???";
1177      }
1178      os << ")";
1179    }
1180    os << "\n";
1181    if (try_addr2line && use_addr2line) {
1182      Addr2line(it->map.name, it->pc - it->map.start, os, prefix);
1183    }
1184  }
1185#else
1186  UNUSED(os, tid, existing_map, prefix, current_method, ucontext_ptr);
1187#endif
1188}
1189
1190#if defined(__APPLE__)
1191
1192// TODO: is there any way to get the kernel stack on Mac OS?
1193void DumpKernelStack(std::ostream&, pid_t, const char*, bool) {}
1194
1195#else
1196
1197void DumpKernelStack(std::ostream& os, pid_t tid, const char* prefix, bool include_count) {
1198  if (tid == GetTid()) {
1199    // There's no point showing that we're reading our stack out of /proc!
1200    return;
1201  }
1202
1203  std::string kernel_stack_filename(StringPrintf("/proc/self/task/%d/stack", tid));
1204  std::string kernel_stack;
1205  if (!ReadFileToString(kernel_stack_filename, &kernel_stack)) {
1206    os << prefix << "(couldn't read " << kernel_stack_filename << ")\n";
1207    return;
1208  }
1209
1210  std::vector<std::string> kernel_stack_frames;
1211  Split(kernel_stack, '\n', &kernel_stack_frames);
1212  // We skip the last stack frame because it's always equivalent to "[<ffffffff>] 0xffffffff",
1213  // which looking at the source appears to be the kernel's way of saying "that's all, folks!".
1214  kernel_stack_frames.pop_back();
1215  for (size_t i = 0; i < kernel_stack_frames.size(); ++i) {
1216    // Turn "[<ffffffff8109156d>] futex_wait_queue_me+0xcd/0x110"
1217    // into "futex_wait_queue_me+0xcd/0x110".
1218    const char* text = kernel_stack_frames[i].c_str();
1219    const char* close_bracket = strchr(text, ']');
1220    if (close_bracket != nullptr) {
1221      text = close_bracket + 2;
1222    }
1223    os << prefix;
1224    if (include_count) {
1225      os << StringPrintf("#%02zd ", i);
1226    }
1227    os << text << "\n";
1228  }
1229}
1230
1231#endif
1232
1233const char* GetAndroidRoot() {
1234  const char* android_root = getenv("ANDROID_ROOT");
1235  if (android_root == nullptr) {
1236    if (OS::DirectoryExists("/system")) {
1237      android_root = "/system";
1238    } else {
1239      LOG(FATAL) << "ANDROID_ROOT not set and /system does not exist";
1240      return "";
1241    }
1242  }
1243  if (!OS::DirectoryExists(android_root)) {
1244    LOG(FATAL) << "Failed to find ANDROID_ROOT directory " << android_root;
1245    return "";
1246  }
1247  return android_root;
1248}
1249
1250const char* GetAndroidData() {
1251  std::string error_msg;
1252  const char* dir = GetAndroidDataSafe(&error_msg);
1253  if (dir != nullptr) {
1254    return dir;
1255  } else {
1256    LOG(FATAL) << error_msg;
1257    return "";
1258  }
1259}
1260
1261const char* GetAndroidDataSafe(std::string* error_msg) {
1262  const char* android_data = getenv("ANDROID_DATA");
1263  if (android_data == nullptr) {
1264    if (OS::DirectoryExists("/data")) {
1265      android_data = "/data";
1266    } else {
1267      *error_msg = "ANDROID_DATA not set and /data does not exist";
1268      return nullptr;
1269    }
1270  }
1271  if (!OS::DirectoryExists(android_data)) {
1272    *error_msg = StringPrintf("Failed to find ANDROID_DATA directory %s", android_data);
1273    return nullptr;
1274  }
1275  return android_data;
1276}
1277
1278void GetDalvikCache(const char* subdir, const bool create_if_absent, std::string* dalvik_cache,
1279                    bool* have_android_data, bool* dalvik_cache_exists, bool* is_global_cache) {
1280  CHECK(subdir != nullptr);
1281  std::string error_msg;
1282  const char* android_data = GetAndroidDataSafe(&error_msg);
1283  if (android_data == nullptr) {
1284    *have_android_data = false;
1285    *dalvik_cache_exists = false;
1286    *is_global_cache = false;
1287    return;
1288  } else {
1289    *have_android_data = true;
1290  }
1291  const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
1292  *dalvik_cache = dalvik_cache_root + subdir;
1293  *dalvik_cache_exists = OS::DirectoryExists(dalvik_cache->c_str());
1294  *is_global_cache = strcmp(android_data, "/data") == 0;
1295  if (create_if_absent && !*dalvik_cache_exists && !*is_global_cache) {
1296    // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
1297    *dalvik_cache_exists = ((mkdir(dalvik_cache_root.c_str(), 0700) == 0 || errno == EEXIST) &&
1298                            (mkdir(dalvik_cache->c_str(), 0700) == 0 || errno == EEXIST));
1299  }
1300}
1301
1302static std::string GetDalvikCacheImpl(const char* subdir,
1303                                      const bool create_if_absent,
1304                                      const bool abort_on_error) {
1305  CHECK(subdir != nullptr);
1306  const char* android_data = GetAndroidData();
1307  const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
1308  const std::string dalvik_cache = dalvik_cache_root + subdir;
1309  if (!OS::DirectoryExists(dalvik_cache.c_str())) {
1310    if (!create_if_absent) {
1311      // TODO: Check callers. Traditional behavior is to not to abort, even when abort_on_error.
1312      return "";
1313    }
1314
1315    // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
1316    if (strcmp(android_data, "/data") == 0) {
1317      if (abort_on_error) {
1318        LOG(FATAL) << "Failed to find dalvik-cache directory " << dalvik_cache
1319                   << ", cannot create /data dalvik-cache.";
1320        UNREACHABLE();
1321      }
1322      return "";
1323    }
1324
1325    int result = mkdir(dalvik_cache_root.c_str(), 0700);
1326    if (result != 0 && errno != EEXIST) {
1327      if (abort_on_error) {
1328        PLOG(FATAL) << "Failed to create dalvik-cache root directory " << dalvik_cache_root;
1329        UNREACHABLE();
1330      }
1331      return "";
1332    }
1333
1334    result = mkdir(dalvik_cache.c_str(), 0700);
1335    if (result != 0) {
1336      if (abort_on_error) {
1337        PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache;
1338        UNREACHABLE();
1339      }
1340      return "";
1341    }
1342  }
1343  return dalvik_cache;
1344}
1345
1346std::string GetDalvikCache(const char* subdir, const bool create_if_absent) {
1347  return GetDalvikCacheImpl(subdir, create_if_absent, false);
1348}
1349
1350std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
1351  return GetDalvikCacheImpl(subdir, create_if_absent, true);
1352}
1353
1354bool GetDalvikCacheFilename(const char* location, const char* cache_location,
1355                            std::string* filename, std::string* error_msg) {
1356  if (location[0] != '/') {
1357    *error_msg = StringPrintf("Expected path in location to be absolute: %s", location);
1358    return false;
1359  }
1360  std::string cache_file(&location[1]);  // skip leading slash
1361  if (!EndsWith(location, ".dex") && !EndsWith(location, ".art") && !EndsWith(location, ".oat")) {
1362    cache_file += "/";
1363    cache_file += DexFile::kClassesDex;
1364  }
1365  std::replace(cache_file.begin(), cache_file.end(), '/', '@');
1366  *filename = StringPrintf("%s/%s", cache_location, cache_file.c_str());
1367  return true;
1368}
1369
1370std::string GetDalvikCacheFilenameOrDie(const char* location, const char* cache_location) {
1371  std::string ret;
1372  std::string error_msg;
1373  if (!GetDalvikCacheFilename(location, cache_location, &ret, &error_msg)) {
1374    LOG(FATAL) << error_msg;
1375  }
1376  return ret;
1377}
1378
1379static void InsertIsaDirectory(const InstructionSet isa, std::string* filename) {
1380  // in = /foo/bar/baz
1381  // out = /foo/bar/<isa>/baz
1382  size_t pos = filename->rfind('/');
1383  CHECK_NE(pos, std::string::npos) << *filename << " " << isa;
1384  filename->insert(pos, "/", 1);
1385  filename->insert(pos + 1, GetInstructionSetString(isa));
1386}
1387
1388std::string GetSystemImageFilename(const char* location, const InstructionSet isa) {
1389  // location = /system/framework/boot.art
1390  // filename = /system/framework/<isa>/boot.art
1391  std::string filename(location);
1392  InsertIsaDirectory(isa, &filename);
1393  return filename;
1394}
1395
1396int ExecAndReturnCode(std::vector<std::string>& arg_vector, std::string* error_msg) {
1397  const std::string command_line(Join(arg_vector, ' '));
1398  CHECK_GE(arg_vector.size(), 1U) << command_line;
1399
1400  // Convert the args to char pointers.
1401  const char* program = arg_vector[0].c_str();
1402  std::vector<char*> args;
1403  for (size_t i = 0; i < arg_vector.size(); ++i) {
1404    const std::string& arg = arg_vector[i];
1405    char* arg_str = const_cast<char*>(arg.c_str());
1406    CHECK(arg_str != nullptr) << i;
1407    args.push_back(arg_str);
1408  }
1409  args.push_back(nullptr);
1410
1411  // fork and exec
1412  pid_t pid = fork();
1413  if (pid == 0) {
1414    // no allocation allowed between fork and exec
1415
1416    // change process groups, so we don't get reaped by ProcessManager
1417    setpgid(0, 0);
1418
1419    // (b/30160149): protect subprocesses from modifications to LD_LIBRARY_PATH, etc.
1420    // Use the snapshot of the environment from the time the runtime was created.
1421    char** envp = (Runtime::Current() == nullptr) ? nullptr : Runtime::Current()->GetEnvSnapshot();
1422    if (envp == nullptr) {
1423      execv(program, &args[0]);
1424    } else {
1425      execve(program, &args[0], envp);
1426    }
1427    PLOG(ERROR) << "Failed to execve(" << command_line << ")";
1428    // _exit to avoid atexit handlers in child.
1429    _exit(1);
1430  } else {
1431    if (pid == -1) {
1432      *error_msg = StringPrintf("Failed to execv(%s) because fork failed: %s",
1433                                command_line.c_str(), strerror(errno));
1434      return -1;
1435    }
1436
1437    // wait for subprocess to finish
1438    int status = -1;
1439    pid_t got_pid = TEMP_FAILURE_RETRY(waitpid(pid, &status, 0));
1440    if (got_pid != pid) {
1441      *error_msg = StringPrintf("Failed after fork for execv(%s) because waitpid failed: "
1442                                "wanted %d, got %d: %s",
1443                                command_line.c_str(), pid, got_pid, strerror(errno));
1444      return -1;
1445    }
1446    if (WIFEXITED(status)) {
1447      return WEXITSTATUS(status);
1448    }
1449    return -1;
1450  }
1451}
1452
1453bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg) {
1454  int status = ExecAndReturnCode(arg_vector, error_msg);
1455  if (status != 0) {
1456    const std::string command_line(Join(arg_vector, ' '));
1457    *error_msg = StringPrintf("Failed execv(%s) because non-0 exit status",
1458                              command_line.c_str());
1459    return false;
1460  }
1461  return true;
1462}
1463
1464bool FileExists(const std::string& filename) {
1465  struct stat buffer;
1466  return stat(filename.c_str(), &buffer) == 0;
1467}
1468
1469bool FileExistsAndNotEmpty(const std::string& filename) {
1470  struct stat buffer;
1471  if (stat(filename.c_str(), &buffer) != 0) {
1472    return false;
1473  }
1474  return buffer.st_size > 0;
1475}
1476
1477std::string PrettyDescriptor(Primitive::Type type) {
1478  return PrettyDescriptor(Primitive::Descriptor(type));
1479}
1480
1481static void DumpMethodCFGImpl(const DexFile* dex_file,
1482                              uint32_t dex_method_idx,
1483                              const DexFile::CodeItem* code_item,
1484                              std::ostream& os) {
1485  os << "digraph {\n";
1486  os << "  # /* " << PrettyMethod(dex_method_idx, *dex_file, true) << " */\n";
1487
1488  std::set<uint32_t> dex_pc_is_branch_target;
1489  {
1490    // Go and populate.
1491    const Instruction* inst = Instruction::At(code_item->insns_);
1492    for (uint32_t dex_pc = 0;
1493         dex_pc < code_item->insns_size_in_code_units_;
1494         dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
1495      if (inst->IsBranch()) {
1496        dex_pc_is_branch_target.insert(dex_pc + inst->GetTargetOffset());
1497      } else if (inst->IsSwitch()) {
1498        const uint16_t* insns = code_item->insns_ + dex_pc;
1499        int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
1500        const uint16_t* switch_insns = insns + switch_offset;
1501        uint32_t switch_count = switch_insns[1];
1502        int32_t targets_offset;
1503        if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
1504          /* 0=sig, 1=count, 2/3=firstKey */
1505          targets_offset = 4;
1506        } else {
1507          /* 0=sig, 1=count, 2..count*2 = keys */
1508          targets_offset = 2 + 2 * switch_count;
1509        }
1510        for (uint32_t targ = 0; targ < switch_count; targ++) {
1511          int32_t offset =
1512              static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
1513              static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
1514          dex_pc_is_branch_target.insert(dex_pc + offset);
1515        }
1516      }
1517    }
1518  }
1519
1520  // Create nodes for "basic blocks."
1521  std::map<uint32_t, uint32_t> dex_pc_to_node_id;  // This only has entries for block starts.
1522  std::map<uint32_t, uint32_t> dex_pc_to_incl_id;  // This has entries for all dex pcs.
1523
1524  {
1525    const Instruction* inst = Instruction::At(code_item->insns_);
1526    bool first_in_block = true;
1527    bool force_new_block = false;
1528    for (uint32_t dex_pc = 0;
1529         dex_pc < code_item->insns_size_in_code_units_;
1530         dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
1531      if (dex_pc == 0 ||
1532          (dex_pc_is_branch_target.find(dex_pc) != dex_pc_is_branch_target.end()) ||
1533          force_new_block) {
1534        uint32_t id = dex_pc_to_node_id.size();
1535        if (id > 0) {
1536          // End last node.
1537          os << "}\"];\n";
1538        }
1539        // Start next node.
1540        os << "  node" << id << " [shape=record,label=\"{";
1541        dex_pc_to_node_id.insert(std::make_pair(dex_pc, id));
1542        first_in_block = true;
1543        force_new_block = false;
1544      }
1545
1546      // Register instruction.
1547      dex_pc_to_incl_id.insert(std::make_pair(dex_pc, dex_pc_to_node_id.size() - 1));
1548
1549      // Print instruction.
1550      if (!first_in_block) {
1551        os << " | ";
1552      } else {
1553        first_in_block = false;
1554      }
1555
1556      // Dump the instruction. Need to escape '"', '<', '>', '{' and '}'.
1557      os << "<" << "p" << dex_pc << ">";
1558      os << " 0x" << std::hex << dex_pc << std::dec << ": ";
1559      std::string inst_str = inst->DumpString(dex_file);
1560      size_t cur_start = 0;  // It's OK to start at zero, instruction dumps don't start with chars
1561                             // we need to escape.
1562      while (cur_start != std::string::npos) {
1563        size_t next_escape = inst_str.find_first_of("\"{}<>", cur_start + 1);
1564        if (next_escape == std::string::npos) {
1565          os << inst_str.substr(cur_start, inst_str.size() - cur_start);
1566          break;
1567        } else {
1568          os << inst_str.substr(cur_start, next_escape - cur_start);
1569          // Escape all necessary characters.
1570          while (next_escape < inst_str.size()) {
1571            char c = inst_str.at(next_escape);
1572            if (c == '"' || c == '{' || c == '}' || c == '<' || c == '>') {
1573              os << '\\' << c;
1574            } else {
1575              break;
1576            }
1577            next_escape++;
1578          }
1579          if (next_escape >= inst_str.size()) {
1580            next_escape = std::string::npos;
1581          }
1582          cur_start = next_escape;
1583        }
1584      }
1585
1586      // Force a new block for some fall-throughs and some instructions that terminate the "local"
1587      // control flow.
1588      force_new_block = inst->IsSwitch() || inst->IsBasicBlockEnd();
1589    }
1590    // Close last node.
1591    if (dex_pc_to_node_id.size() > 0) {
1592      os << "}\"];\n";
1593    }
1594  }
1595
1596  // Create edges between them.
1597  {
1598    std::ostringstream regular_edges;
1599    std::ostringstream taken_edges;
1600    std::ostringstream exception_edges;
1601
1602    // Common set of exception edges.
1603    std::set<uint32_t> exception_targets;
1604
1605    // These blocks (given by the first dex pc) need exception per dex-pc handling in a second
1606    // pass. In the first pass we try and see whether we can use a common set of edges.
1607    std::set<uint32_t> blocks_with_detailed_exceptions;
1608
1609    {
1610      uint32_t last_node_id = std::numeric_limits<uint32_t>::max();
1611      uint32_t old_dex_pc = 0;
1612      uint32_t block_start_dex_pc = std::numeric_limits<uint32_t>::max();
1613      const Instruction* inst = Instruction::At(code_item->insns_);
1614      for (uint32_t dex_pc = 0;
1615          dex_pc < code_item->insns_size_in_code_units_;
1616          old_dex_pc = dex_pc, dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
1617        {
1618          auto it = dex_pc_to_node_id.find(dex_pc);
1619          if (it != dex_pc_to_node_id.end()) {
1620            if (!exception_targets.empty()) {
1621              // It seems the last block had common exception handlers. Add the exception edges now.
1622              uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
1623              for (uint32_t handler_pc : exception_targets) {
1624                auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
1625                if (node_id_it != dex_pc_to_incl_id.end()) {
1626                  exception_edges << "  node" << node_id
1627                      << " -> node" << node_id_it->second << ":p" << handler_pc
1628                      << ";\n";
1629                }
1630              }
1631              exception_targets.clear();
1632            }
1633
1634            block_start_dex_pc = dex_pc;
1635
1636            // Seems to be a fall-through, connect to last_node_id. May be spurious edges for things
1637            // like switch data.
1638            uint32_t old_last = last_node_id;
1639            last_node_id = it->second;
1640            if (old_last != std::numeric_limits<uint32_t>::max()) {
1641              regular_edges << "  node" << old_last << ":p" << old_dex_pc
1642                  << " -> node" << last_node_id << ":p" << dex_pc
1643                  << ";\n";
1644            }
1645          }
1646
1647          // Look at the exceptions of the first entry.
1648          CatchHandlerIterator catch_it(*code_item, dex_pc);
1649          for (; catch_it.HasNext(); catch_it.Next()) {
1650            exception_targets.insert(catch_it.GetHandlerAddress());
1651          }
1652        }
1653
1654        // Handle instruction.
1655
1656        // Branch: something with at most two targets.
1657        if (inst->IsBranch()) {
1658          const int32_t offset = inst->GetTargetOffset();
1659          const bool conditional = !inst->IsUnconditional();
1660
1661          auto target_it = dex_pc_to_node_id.find(dex_pc + offset);
1662          if (target_it != dex_pc_to_node_id.end()) {
1663            taken_edges << "  node" << last_node_id << ":p" << dex_pc
1664                << " -> node" << target_it->second << ":p" << (dex_pc + offset)
1665                << ";\n";
1666          }
1667          if (!conditional) {
1668            // No fall-through.
1669            last_node_id = std::numeric_limits<uint32_t>::max();
1670          }
1671        } else if (inst->IsSwitch()) {
1672          // TODO: Iterate through all switch targets.
1673          const uint16_t* insns = code_item->insns_ + dex_pc;
1674          /* make sure the start of the switch is in range */
1675          int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
1676          /* offset to switch table is a relative branch-style offset */
1677          const uint16_t* switch_insns = insns + switch_offset;
1678          uint32_t switch_count = switch_insns[1];
1679          int32_t targets_offset;
1680          if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
1681            /* 0=sig, 1=count, 2/3=firstKey */
1682            targets_offset = 4;
1683          } else {
1684            /* 0=sig, 1=count, 2..count*2 = keys */
1685            targets_offset = 2 + 2 * switch_count;
1686          }
1687          /* make sure the end of the switch is in range */
1688          /* verify each switch target */
1689          for (uint32_t targ = 0; targ < switch_count; targ++) {
1690            int32_t offset =
1691                static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
1692                static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
1693            int32_t abs_offset = dex_pc + offset;
1694            auto target_it = dex_pc_to_node_id.find(abs_offset);
1695            if (target_it != dex_pc_to_node_id.end()) {
1696              // TODO: value label.
1697              taken_edges << "  node" << last_node_id << ":p" << dex_pc
1698                  << " -> node" << target_it->second << ":p" << (abs_offset)
1699                  << ";\n";
1700            }
1701          }
1702        }
1703
1704        // Exception edges. If this is not the first instruction in the block
1705        if (block_start_dex_pc != dex_pc) {
1706          std::set<uint32_t> current_handler_pcs;
1707          CatchHandlerIterator catch_it(*code_item, dex_pc);
1708          for (; catch_it.HasNext(); catch_it.Next()) {
1709            current_handler_pcs.insert(catch_it.GetHandlerAddress());
1710          }
1711          if (current_handler_pcs != exception_targets) {
1712            exception_targets.clear();  // Clear so we don't do something at the end.
1713            blocks_with_detailed_exceptions.insert(block_start_dex_pc);
1714          }
1715        }
1716
1717        if (inst->IsReturn() ||
1718            (inst->Opcode() == Instruction::THROW) ||
1719            (inst->IsBranch() && inst->IsUnconditional())) {
1720          // No fall-through.
1721          last_node_id = std::numeric_limits<uint32_t>::max();
1722        }
1723      }
1724      // Finish up the last block, if it had common exceptions.
1725      if (!exception_targets.empty()) {
1726        // It seems the last block had common exception handlers. Add the exception edges now.
1727        uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
1728        for (uint32_t handler_pc : exception_targets) {
1729          auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
1730          if (node_id_it != dex_pc_to_incl_id.end()) {
1731            exception_edges << "  node" << node_id
1732                << " -> node" << node_id_it->second << ":p" << handler_pc
1733                << ";\n";
1734          }
1735        }
1736        exception_targets.clear();
1737      }
1738    }
1739
1740    // Second pass for detailed exception blocks.
1741    // TODO
1742    // Exception edges. If this is not the first instruction in the block
1743    for (uint32_t dex_pc : blocks_with_detailed_exceptions) {
1744      const Instruction* inst = Instruction::At(&code_item->insns_[dex_pc]);
1745      uint32_t this_node_id = dex_pc_to_incl_id.find(dex_pc)->second;
1746      while (true) {
1747        CatchHandlerIterator catch_it(*code_item, dex_pc);
1748        if (catch_it.HasNext()) {
1749          std::set<uint32_t> handled_targets;
1750          for (; catch_it.HasNext(); catch_it.Next()) {
1751            uint32_t handler_pc = catch_it.GetHandlerAddress();
1752            auto it = handled_targets.find(handler_pc);
1753            if (it == handled_targets.end()) {
1754              auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
1755              if (node_id_it != dex_pc_to_incl_id.end()) {
1756                exception_edges << "  node" << this_node_id << ":p" << dex_pc
1757                    << " -> node" << node_id_it->second << ":p" << handler_pc
1758                    << ";\n";
1759              }
1760
1761              // Mark as done.
1762              handled_targets.insert(handler_pc);
1763            }
1764          }
1765        }
1766        if (inst->IsBasicBlockEnd()) {
1767          break;
1768        }
1769
1770        // Loop update. Have a break-out if the next instruction is a branch target and thus in
1771        // another block.
1772        dex_pc += inst->SizeInCodeUnits();
1773        if (dex_pc >= code_item->insns_size_in_code_units_) {
1774          break;
1775        }
1776        if (dex_pc_to_node_id.find(dex_pc) != dex_pc_to_node_id.end()) {
1777          break;
1778        }
1779        inst = inst->Next();
1780      }
1781    }
1782
1783    // Write out the sub-graphs to make edges styled.
1784    os << "\n";
1785    os << "  subgraph regular_edges {\n";
1786    os << "    edge [color=\"#000000\",weight=.3,len=3];\n\n";
1787    os << "    " << regular_edges.str() << "\n";
1788    os << "  }\n\n";
1789
1790    os << "  subgraph taken_edges {\n";
1791    os << "    edge [color=\"#00FF00\",weight=.3,len=3];\n\n";
1792    os << "    " << taken_edges.str() << "\n";
1793    os << "  }\n\n";
1794
1795    os << "  subgraph exception_edges {\n";
1796    os << "    edge [color=\"#FF0000\",weight=.3,len=3];\n\n";
1797    os << "    " << exception_edges.str() << "\n";
1798    os << "  }\n\n";
1799  }
1800
1801  os << "}\n";
1802}
1803
1804void DumpMethodCFG(ArtMethod* method, std::ostream& os) {
1805  const DexFile* dex_file = method->GetDexFile();
1806  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
1807
1808  DumpMethodCFGImpl(dex_file, method->GetDexMethodIndex(), code_item, os);
1809}
1810
1811void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os) {
1812  // This is painful, we need to find the code item. That means finding the class, and then
1813  // iterating the table.
1814  if (dex_method_idx >= dex_file->NumMethodIds()) {
1815    os << "Could not find method-idx.";
1816    return;
1817  }
1818  const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx);
1819
1820  const DexFile::ClassDef* class_def = dex_file->FindClassDef(method_id.class_idx_);
1821  if (class_def == nullptr) {
1822    os << "Could not find class-def.";
1823    return;
1824  }
1825
1826  const uint8_t* class_data = dex_file->GetClassData(*class_def);
1827  if (class_data == nullptr) {
1828    os << "No class data.";
1829    return;
1830  }
1831
1832  ClassDataItemIterator it(*dex_file, class_data);
1833  // Skip fields
1834  while (it.HasNextStaticField() || it.HasNextInstanceField()) {
1835    it.Next();
1836  }
1837
1838  // Find method, and dump it.
1839  while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
1840    uint32_t method_idx = it.GetMemberIndex();
1841    if (method_idx == dex_method_idx) {
1842      DumpMethodCFGImpl(dex_file, dex_method_idx, it.GetMethodCodeItem(), os);
1843      return;
1844    }
1845    it.Next();
1846  }
1847
1848  // Otherwise complain.
1849  os << "Something went wrong, didn't find the method in the class data.";
1850}
1851
1852static void ParseStringAfterChar(const std::string& s,
1853                                 char c,
1854                                 std::string* parsed_value,
1855                                 UsageFn Usage) {
1856  std::string::size_type colon = s.find(c);
1857  if (colon == std::string::npos) {
1858    Usage("Missing char %c in option %s\n", c, s.c_str());
1859  }
1860  // Add one to remove the char we were trimming until.
1861  *parsed_value = s.substr(colon + 1);
1862}
1863
1864void ParseDouble(const std::string& option,
1865                 char after_char,
1866                 double min,
1867                 double max,
1868                 double* parsed_value,
1869                 UsageFn Usage) {
1870  std::string substring;
1871  ParseStringAfterChar(option, after_char, &substring, Usage);
1872  bool sane_val = true;
1873  double value;
1874  if ((false)) {
1875    // TODO: this doesn't seem to work on the emulator.  b/15114595
1876    std::stringstream iss(substring);
1877    iss >> value;
1878    // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
1879    sane_val = iss.eof() && (value >= min) && (value <= max);
1880  } else {
1881    char* end = nullptr;
1882    value = strtod(substring.c_str(), &end);
1883    sane_val = *end == '\0' && value >= min && value <= max;
1884  }
1885  if (!sane_val) {
1886    Usage("Invalid double value %s for option %s\n", substring.c_str(), option.c_str());
1887  }
1888  *parsed_value = value;
1889}
1890
1891int64_t GetFileSizeBytes(const std::string& filename) {
1892  struct stat stat_buf;
1893  int rc = stat(filename.c_str(), &stat_buf);
1894  return rc == 0 ? stat_buf.st_size : -1;
1895}
1896
1897void SleepForever() {
1898  while (true) {
1899    usleep(1000000);
1900  }
1901}
1902
1903}  // namespace art
1904