1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "thread_list.h"
18
19#include <dirent.h>
20#include <sys/types.h>
21#include <unistd.h>
22
23#include <sstream>
24#include <vector>
25
26#include "android-base/stringprintf.h"
27#include "backtrace/BacktraceMap.h"
28#include "nativehelper/scoped_local_ref.h"
29#include "nativehelper/scoped_utf_chars.h"
30
31#include "base/aborting.h"
32#include "base/histogram-inl.h"
33#include "base/mutex-inl.h"
34#include "base/systrace.h"
35#include "base/time_utils.h"
36#include "base/timing_logger.h"
37#include "debugger.h"
38#include "gc/collector/concurrent_copying.h"
39#include "gc/gc_pause_listener.h"
40#include "gc/heap.h"
41#include "gc/reference_processor.h"
42#include "gc_root.h"
43#include "jni_internal.h"
44#include "lock_word.h"
45#include "monitor.h"
46#include "native_stack_dump.h"
47#include "scoped_thread_state_change-inl.h"
48#include "thread.h"
49#include "trace.h"
50#include "well_known_classes.h"
51
52#if ART_USE_FUTEXES
53#include "linux/futex.h"
54#include "sys/syscall.h"
55#ifndef SYS_futex
56#define SYS_futex __NR_futex
57#endif
58#endif  // ART_USE_FUTEXES
59
60namespace art {
61
62using android::base::StringPrintf;
63
64static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5);
65// Use 0 since we want to yield to prevent blocking for an unpredictable amount of time.
66static constexpr useconds_t kThreadSuspendInitialSleepUs = 0;
67static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000;
68static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000;
69
70// Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for
71// some history.
72static constexpr bool kDumpUnattachedThreadNativeStackForSigQuit = true;
73
74ThreadList::ThreadList(uint64_t thread_suspend_timeout_ns)
75    : suspend_all_count_(0),
76      debug_suspend_all_count_(0),
77      unregistering_count_(0),
78      suspend_all_historam_("suspend all histogram", 16, 64),
79      long_suspend_(false),
80      shut_down_(false),
81      thread_suspend_timeout_ns_(thread_suspend_timeout_ns),
82      empty_checkpoint_barrier_(new Barrier(0)) {
83  CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U)));
84}
85
86ThreadList::~ThreadList() {
87  CHECK(shut_down_);
88}
89
90void ThreadList::ShutDown() {
91  ScopedTrace trace(__PRETTY_FUNCTION__);
92  // Detach the current thread if necessary. If we failed to start, there might not be any threads.
93  // We need to detach the current thread here in case there's another thread waiting to join with
94  // us.
95  bool contains = false;
96  Thread* self = Thread::Current();
97  {
98    MutexLock mu(self, *Locks::thread_list_lock_);
99    contains = Contains(self);
100  }
101  if (contains) {
102    Runtime::Current()->DetachCurrentThread();
103  }
104  WaitForOtherNonDaemonThreadsToExit();
105  // Disable GC and wait for GC to complete in case there are still daemon threads doing
106  // allocations.
107  gc::Heap* const heap = Runtime::Current()->GetHeap();
108  heap->DisableGCForShutdown();
109  // In case a GC is in progress, wait for it to finish.
110  heap->WaitForGcToComplete(gc::kGcCauseBackground, Thread::Current());
111  // TODO: there's an unaddressed race here where a thread may attach during shutdown, see
112  //       Thread::Init.
113  SuspendAllDaemonThreadsForShutdown();
114
115  shut_down_ = true;
116}
117
118bool ThreadList::Contains(Thread* thread) {
119  return find(list_.begin(), list_.end(), thread) != list_.end();
120}
121
122bool ThreadList::Contains(pid_t tid) {
123  for (const auto& thread : list_) {
124    if (thread->GetTid() == tid) {
125      return true;
126    }
127  }
128  return false;
129}
130
131pid_t ThreadList::GetLockOwner() {
132  return Locks::thread_list_lock_->GetExclusiveOwnerTid();
133}
134
135void ThreadList::DumpNativeStacks(std::ostream& os) {
136  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
137  std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid()));
138  for (const auto& thread : list_) {
139    os << "DUMPING THREAD " << thread->GetTid() << "\n";
140    DumpNativeStack(os, thread->GetTid(), map.get(), "\t");
141    os << "\n";
142  }
143}
144
145void ThreadList::DumpForSigQuit(std::ostream& os) {
146  {
147    ScopedObjectAccess soa(Thread::Current());
148    // Only print if we have samples.
149    if (suspend_all_historam_.SampleSize() > 0) {
150      Histogram<uint64_t>::CumulativeData data;
151      suspend_all_historam_.CreateHistogram(&data);
152      suspend_all_historam_.PrintConfidenceIntervals(os, 0.99, data);  // Dump time to suspend.
153    }
154  }
155  bool dump_native_stack = Runtime::Current()->GetDumpNativeStackOnSigQuit();
156  Dump(os, dump_native_stack);
157  DumpUnattachedThreads(os, dump_native_stack && kDumpUnattachedThreadNativeStackForSigQuit);
158}
159
160static void DumpUnattachedThread(std::ostream& os, pid_t tid, bool dump_native_stack)
161    NO_THREAD_SAFETY_ANALYSIS {
162  // TODO: No thread safety analysis as DumpState with a null thread won't access fields, should
163  // refactor DumpState to avoid skipping analysis.
164  Thread::DumpState(os, nullptr, tid);
165  DumpKernelStack(os, tid, "  kernel: ", false);
166  if (dump_native_stack) {
167    DumpNativeStack(os, tid, nullptr, "  native: ");
168  }
169  os << std::endl;
170}
171
172void ThreadList::DumpUnattachedThreads(std::ostream& os, bool dump_native_stack) {
173  DIR* d = opendir("/proc/self/task");
174  if (!d) {
175    return;
176  }
177
178  Thread* self = Thread::Current();
179  dirent* e;
180  while ((e = readdir(d)) != nullptr) {
181    char* end;
182    pid_t tid = strtol(e->d_name, &end, 10);
183    if (!*end) {
184      bool contains;
185      {
186        MutexLock mu(self, *Locks::thread_list_lock_);
187        contains = Contains(tid);
188      }
189      if (!contains) {
190        DumpUnattachedThread(os, tid, dump_native_stack);
191      }
192    }
193  }
194  closedir(d);
195}
196
197// Dump checkpoint timeout in milliseconds. Larger amount on the target, since the device could be
198// overloaded with ANR dumps.
199static constexpr uint32_t kDumpWaitTimeout = kIsTargetBuild ? 100000 : 20000;
200
201// A closure used by Thread::Dump.
202class DumpCheckpoint FINAL : public Closure {
203 public:
204  DumpCheckpoint(std::ostream* os, bool dump_native_stack)
205      : os_(os),
206        barrier_(0),
207        backtrace_map_(dump_native_stack ? BacktraceMap::Create(getpid()) : nullptr),
208        dump_native_stack_(dump_native_stack) {
209    if (backtrace_map_ != nullptr) {
210      backtrace_map_->SetSuffixesToIgnore(std::vector<std::string> { "oat", "odex" });
211    }
212  }
213
214  void Run(Thread* thread) OVERRIDE {
215    // Note thread and self may not be equal if thread was already suspended at the point of the
216    // request.
217    Thread* self = Thread::Current();
218    CHECK(self != nullptr);
219    std::ostringstream local_os;
220    {
221      ScopedObjectAccess soa(self);
222      thread->Dump(local_os, dump_native_stack_, backtrace_map_.get());
223    }
224    {
225      // Use the logging lock to ensure serialization when writing to the common ostream.
226      MutexLock mu(self, *Locks::logging_lock_);
227      *os_ << local_os.str() << std::endl;
228    }
229    barrier_.Pass(self);
230  }
231
232  void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) {
233    Thread* self = Thread::Current();
234    ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
235    bool timed_out = barrier_.Increment(self, threads_running_checkpoint, kDumpWaitTimeout);
236    if (timed_out) {
237      // Avoid a recursive abort.
238      LOG((kIsDebugBuild && (gAborting == 0)) ? ::android::base::FATAL : ::android::base::ERROR)
239          << "Unexpected time out during dump checkpoint.";
240    }
241  }
242
243 private:
244  // The common stream that will accumulate all the dumps.
245  std::ostream* const os_;
246  // The barrier to be passed through and for the requestor to wait upon.
247  Barrier barrier_;
248  // A backtrace map, so that all threads use a shared info and don't reacquire/parse separately.
249  std::unique_ptr<BacktraceMap> backtrace_map_;
250  // Whether we should dump the native stack.
251  const bool dump_native_stack_;
252};
253
254void ThreadList::Dump(std::ostream& os, bool dump_native_stack) {
255  Thread* self = Thread::Current();
256  {
257    MutexLock mu(self, *Locks::thread_list_lock_);
258    os << "DALVIK THREADS (" << list_.size() << "):\n";
259  }
260  if (self != nullptr) {
261    DumpCheckpoint checkpoint(&os, dump_native_stack);
262    size_t threads_running_checkpoint;
263    {
264      // Use SOA to prevent deadlocks if multiple threads are calling Dump() at the same time.
265      ScopedObjectAccess soa(self);
266      threads_running_checkpoint = RunCheckpoint(&checkpoint);
267    }
268    if (threads_running_checkpoint != 0) {
269      checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint);
270    }
271  } else {
272    DumpUnattachedThreads(os, dump_native_stack);
273  }
274}
275
276void ThreadList::AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread* ignore2) {
277  MutexLock mu(self, *Locks::thread_list_lock_);
278  MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
279  for (const auto& thread : list_) {
280    if (thread != ignore1 && thread != ignore2) {
281      CHECK(thread->IsSuspended())
282            << "\nUnsuspended thread: <<" << *thread << "\n"
283            << "self: <<" << *Thread::Current();
284    }
285  }
286}
287
288#if HAVE_TIMED_RWLOCK
289// Attempt to rectify locks so that we dump thread list with required locks before exiting.
290NO_RETURN static void UnsafeLogFatalForThreadSuspendAllTimeout() {
291  // Increment gAborting before doing the thread list dump since we don't want any failures from
292  // AssertThreadSuspensionIsAllowable in cases where thread suspension is not allowed.
293  // See b/69044468.
294  ++gAborting;
295  Runtime* runtime = Runtime::Current();
296  std::ostringstream ss;
297  ss << "Thread suspend timeout\n";
298  Locks::mutator_lock_->Dump(ss);
299  ss << "\n";
300  runtime->GetThreadList()->Dump(ss);
301  --gAborting;
302  LOG(FATAL) << ss.str();
303  exit(0);
304}
305#endif
306
307// Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
308// individual thread requires polling. delay_us is the requested sleep wait. If delay_us is 0 then
309// we use sched_yield instead of calling usleep.
310// Although there is the possibility, here and elsewhere, that usleep could return -1 and
311// errno = EINTR, there should be no problem if interrupted, so we do not check.
312static void ThreadSuspendSleep(useconds_t delay_us) {
313  if (delay_us == 0) {
314    sched_yield();
315  } else {
316    usleep(delay_us);
317  }
318}
319
320size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback) {
321  Thread* self = Thread::Current();
322  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
323  Locks::thread_list_lock_->AssertNotHeld(self);
324  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
325
326  std::vector<Thread*> suspended_count_modified_threads;
327  size_t count = 0;
328  {
329    // Call a checkpoint function for each thread, threads which are suspend get their checkpoint
330    // manually called.
331    MutexLock mu(self, *Locks::thread_list_lock_);
332    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
333    count = list_.size();
334    for (const auto& thread : list_) {
335      if (thread != self) {
336        while (true) {
337          if (thread->RequestCheckpoint(checkpoint_function)) {
338            // This thread will run its checkpoint some time in the near future.
339            break;
340          } else {
341            // We are probably suspended, try to make sure that we stay suspended.
342            // The thread switched back to runnable.
343            if (thread->GetState() == kRunnable) {
344              // Spurious fail, try again.
345              continue;
346            }
347            bool updated = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
348            DCHECK(updated);
349            suspended_count_modified_threads.push_back(thread);
350            break;
351          }
352        }
353      }
354    }
355    // Run the callback to be called inside this critical section.
356    if (callback != nullptr) {
357      callback->Run(self);
358    }
359  }
360
361  // Run the checkpoint on ourself while we wait for threads to suspend.
362  checkpoint_function->Run(self);
363
364  // Run the checkpoint on the suspended threads.
365  for (const auto& thread : suspended_count_modified_threads) {
366    if (!thread->IsSuspended()) {
367      ScopedTrace trace([&]() {
368        std::ostringstream oss;
369        thread->ShortDump(oss);
370        return std::string("Waiting for suspension of thread ") + oss.str();
371      });
372      // Busy wait until the thread is suspended.
373      const uint64_t start_time = NanoTime();
374      do {
375        ThreadSuspendSleep(kThreadSuspendInitialSleepUs);
376      } while (!thread->IsSuspended());
377      const uint64_t total_delay = NanoTime() - start_time;
378      // Shouldn't need to wait for longer than 1000 microseconds.
379      constexpr uint64_t kLongWaitThreshold = MsToNs(1);
380      if (UNLIKELY(total_delay > kLongWaitThreshold)) {
381        LOG(WARNING) << "Long wait of " << PrettyDuration(total_delay) << " for "
382            << *thread << " suspension!";
383      }
384    }
385    // We know for sure that the thread is suspended at this point.
386    checkpoint_function->Run(thread);
387    {
388      MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
389      bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
390      DCHECK(updated);
391    }
392  }
393
394  {
395    // Imitate ResumeAll, threads may be waiting on Thread::resume_cond_ since we raised their
396    // suspend count. Now the suspend_count_ is lowered so we must do the broadcast.
397    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
398    Thread::resume_cond_->Broadcast(self);
399  }
400
401  return count;
402}
403
404void ThreadList::RunEmptyCheckpoint() {
405  Thread* self = Thread::Current();
406  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
407  Locks::thread_list_lock_->AssertNotHeld(self);
408  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
409  std::vector<uint32_t> runnable_thread_ids;
410  size_t count = 0;
411  Barrier* barrier = empty_checkpoint_barrier_.get();
412  barrier->Init(self, 0);
413  {
414    MutexLock mu(self, *Locks::thread_list_lock_);
415    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
416    for (Thread* thread : list_) {
417      if (thread != self) {
418        while (true) {
419          if (thread->RequestEmptyCheckpoint()) {
420            // This thread will run an empty checkpoint (decrement the empty checkpoint barrier)
421            // some time in the near future.
422            ++count;
423            if (kIsDebugBuild) {
424              runnable_thread_ids.push_back(thread->GetThreadId());
425            }
426            break;
427          }
428          if (thread->GetState() != kRunnable) {
429            // It's seen suspended, we are done because it must not be in the middle of a mutator
430            // heap access.
431            break;
432          }
433        }
434      }
435    }
436  }
437
438  // Wake up the threads blocking for weak ref access so that they will respond to the empty
439  // checkpoint request. Otherwise we will hang as they are blocking in the kRunnable state.
440  Runtime::Current()->GetHeap()->GetReferenceProcessor()->BroadcastForSlowPath(self);
441  Runtime::Current()->BroadcastForNewSystemWeaks(/*broadcast_for_checkpoint*/true);
442  {
443    ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
444    uint64_t total_wait_time = 0;
445    bool first_iter = true;
446    while (true) {
447      // Wake up the runnable threads blocked on the mutexes that another thread, which is blocked
448      // on a weak ref access, holds (indirectly blocking for weak ref access through another thread
449      // and a mutex.) This needs to be done periodically because the thread may be preempted
450      // between the CheckEmptyCheckpointFromMutex call and the subsequent futex wait in
451      // Mutex::ExclusiveLock, etc. when the wakeup via WakeupToRespondToEmptyCheckpoint
452      // arrives. This could cause a *very rare* deadlock, if not repeated. Most of the cases are
453      // handled in the first iteration.
454      for (BaseMutex* mutex : Locks::expected_mutexes_on_weak_ref_access_) {
455        mutex->WakeupToRespondToEmptyCheckpoint();
456      }
457      static constexpr uint64_t kEmptyCheckpointPeriodicTimeoutMs = 100;  // 100ms
458      static constexpr uint64_t kEmptyCheckpointTotalTimeoutMs = 600 * 1000;  // 10 minutes.
459      size_t barrier_count = first_iter ? count : 0;
460      first_iter = false;  // Don't add to the barrier count from the second iteration on.
461      bool timed_out = barrier->Increment(self, barrier_count, kEmptyCheckpointPeriodicTimeoutMs);
462      if (!timed_out) {
463        break;  // Success
464      }
465      // This is a very rare case.
466      total_wait_time += kEmptyCheckpointPeriodicTimeoutMs;
467      if (kIsDebugBuild && total_wait_time > kEmptyCheckpointTotalTimeoutMs) {
468        std::ostringstream ss;
469        ss << "Empty checkpoint timeout\n";
470        ss << "Barrier count " << barrier->GetCount(self) << "\n";
471        ss << "Runnable thread IDs";
472        for (uint32_t tid : runnable_thread_ids) {
473          ss << " " << tid;
474        }
475        ss << "\n";
476        Locks::mutator_lock_->Dump(ss);
477        ss << "\n";
478        LOG(FATAL_WITHOUT_ABORT) << ss.str();
479        // Some threads in 'runnable_thread_ids' are probably stuck. Try to dump their stacks.
480        // Avoid using ThreadList::Dump() initially because it is likely to get stuck as well.
481        {
482          ScopedObjectAccess soa(self);
483          MutexLock mu1(self, *Locks::thread_list_lock_);
484          for (Thread* thread : GetList()) {
485            uint32_t tid = thread->GetThreadId();
486            bool is_in_runnable_thread_ids =
487                std::find(runnable_thread_ids.begin(), runnable_thread_ids.end(), tid) !=
488                runnable_thread_ids.end();
489            if (is_in_runnable_thread_ids &&
490                thread->ReadFlag(kEmptyCheckpointRequest)) {
491              // Found a runnable thread that hasn't responded to the empty checkpoint request.
492              // Assume it's stuck and safe to dump its stack.
493              thread->Dump(LOG_STREAM(FATAL_WITHOUT_ABORT),
494                           /*dump_native_stack*/ true,
495                           /*backtrace_map*/ nullptr,
496                           /*force_dump_stack*/ true);
497            }
498          }
499        }
500        LOG(FATAL_WITHOUT_ABORT)
501            << "Dumped runnable threads that haven't responded to empty checkpoint.";
502        // Now use ThreadList::Dump() to dump more threads, noting it may get stuck.
503        Dump(LOG_STREAM(FATAL_WITHOUT_ABORT));
504        LOG(FATAL) << "Dumped all threads.";
505      }
506    }
507  }
508}
509
510// Request that a checkpoint function be run on all active (non-suspended)
511// threads.  Returns the number of successful requests.
512size_t ThreadList::RunCheckpointOnRunnableThreads(Closure* checkpoint_function) {
513  Thread* self = Thread::Current();
514  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
515  Locks::thread_list_lock_->AssertNotHeld(self);
516  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
517  CHECK_NE(self->GetState(), kRunnable);
518
519  size_t count = 0;
520  {
521    // Call a checkpoint function for each non-suspended thread.
522    MutexLock mu(self, *Locks::thread_list_lock_);
523    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
524    for (const auto& thread : list_) {
525      if (thread != self) {
526        if (thread->RequestCheckpoint(checkpoint_function)) {
527          // This thread will run its checkpoint some time in the near future.
528          count++;
529        }
530      }
531    }
532  }
533
534  // Return the number of threads that will run the checkpoint function.
535  return count;
536}
537
538// A checkpoint/suspend-all hybrid to switch thread roots from
539// from-space to to-space refs. Used to synchronize threads at a point
540// to mark the initiation of marking while maintaining the to-space
541// invariant.
542size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
543                                   Closure* flip_callback,
544                                   gc::collector::GarbageCollector* collector,
545                                   gc::GcPauseListener* pause_listener) {
546  TimingLogger::ScopedTiming split("ThreadListFlip", collector->GetTimings());
547  Thread* self = Thread::Current();
548  Locks::mutator_lock_->AssertNotHeld(self);
549  Locks::thread_list_lock_->AssertNotHeld(self);
550  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
551  CHECK_NE(self->GetState(), kRunnable);
552
553  collector->GetHeap()->ThreadFlipBegin(self);  // Sync with JNI critical calls.
554
555  // ThreadFlipBegin happens before we suspend all the threads, so it does not count towards the
556  // pause.
557  const uint64_t suspend_start_time = NanoTime();
558  SuspendAllInternal(self, self, nullptr);
559  if (pause_listener != nullptr) {
560    pause_listener->StartPause();
561  }
562
563  // Run the flip callback for the collector.
564  Locks::mutator_lock_->ExclusiveLock(self);
565  suspend_all_historam_.AdjustAndAddValue(NanoTime() - suspend_start_time);
566  flip_callback->Run(self);
567  Locks::mutator_lock_->ExclusiveUnlock(self);
568  collector->RegisterPause(NanoTime() - suspend_start_time);
569  if (pause_listener != nullptr) {
570    pause_listener->EndPause();
571  }
572
573  // Resume runnable threads.
574  size_t runnable_thread_count = 0;
575  std::vector<Thread*> other_threads;
576  {
577    TimingLogger::ScopedTiming split2("ResumeRunnableThreads", collector->GetTimings());
578    MutexLock mu(self, *Locks::thread_list_lock_);
579    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
580    --suspend_all_count_;
581    for (const auto& thread : list_) {
582      // Set the flip function for all threads because Thread::DumpState/DumpJavaStack() (invoked by
583      // a checkpoint) may cause the flip function to be run for a runnable/suspended thread before
584      // a runnable thread runs it for itself or we run it for a suspended thread below.
585      thread->SetFlipFunction(thread_flip_visitor);
586      if (thread == self) {
587        continue;
588      }
589      // Resume early the threads that were runnable but are suspended just for this thread flip or
590      // about to transition from non-runnable (eg. kNative at the SOA entry in a JNI function) to
591      // runnable (both cases waiting inside Thread::TransitionFromSuspendedToRunnable), or waiting
592      // for the thread flip to end at the JNI critical section entry (kWaitingForGcThreadFlip),
593      ThreadState state = thread->GetState();
594      if ((state == kWaitingForGcThreadFlip || thread->IsTransitioningToRunnable()) &&
595          thread->GetSuspendCount() == 1) {
596        // The thread will resume right after the broadcast.
597        bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
598        DCHECK(updated);
599        ++runnable_thread_count;
600      } else {
601        other_threads.push_back(thread);
602      }
603    }
604    Thread::resume_cond_->Broadcast(self);
605  }
606
607  collector->GetHeap()->ThreadFlipEnd(self);
608
609  // Run the closure on the other threads and let them resume.
610  {
611    TimingLogger::ScopedTiming split3("FlipOtherThreads", collector->GetTimings());
612    ReaderMutexLock mu(self, *Locks::mutator_lock_);
613    for (const auto& thread : other_threads) {
614      Closure* flip_func = thread->GetFlipFunction();
615      if (flip_func != nullptr) {
616        flip_func->Run(thread);
617      }
618    }
619    // Run it for self.
620    Closure* flip_func = self->GetFlipFunction();
621    if (flip_func != nullptr) {
622      flip_func->Run(self);
623    }
624  }
625
626  // Resume other threads.
627  {
628    TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings());
629    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
630    for (const auto& thread : other_threads) {
631      bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
632      DCHECK(updated);
633    }
634    Thread::resume_cond_->Broadcast(self);
635  }
636
637  return runnable_thread_count + other_threads.size() + 1;  // +1 for self.
638}
639
640void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
641  Thread* self = Thread::Current();
642
643  if (self != nullptr) {
644    VLOG(threads) << *self << " SuspendAll for " << cause << " starting...";
645  } else {
646    VLOG(threads) << "Thread[null] SuspendAll for " << cause << " starting...";
647  }
648  {
649    ScopedTrace trace("Suspending mutator threads");
650    const uint64_t start_time = NanoTime();
651
652    SuspendAllInternal(self, self);
653    // All threads are known to have suspended (but a thread may still own the mutator lock)
654    // Make sure this thread grabs exclusive access to the mutator lock and its protected data.
655#if HAVE_TIMED_RWLOCK
656    while (true) {
657      if (Locks::mutator_lock_->ExclusiveLockWithTimeout(self,
658                                                         NsToMs(thread_suspend_timeout_ns_),
659                                                         0)) {
660        break;
661      } else if (!long_suspend_) {
662        // Reading long_suspend without the mutator lock is slightly racy, in some rare cases, this
663        // could result in a thread suspend timeout.
664        // Timeout if we wait more than thread_suspend_timeout_ns_ nanoseconds.
665        UnsafeLogFatalForThreadSuspendAllTimeout();
666      }
667    }
668#else
669    Locks::mutator_lock_->ExclusiveLock(self);
670#endif
671
672    long_suspend_ = long_suspend;
673
674    const uint64_t end_time = NanoTime();
675    const uint64_t suspend_time = end_time - start_time;
676    suspend_all_historam_.AdjustAndAddValue(suspend_time);
677    if (suspend_time > kLongThreadSuspendThreshold) {
678      LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(suspend_time);
679    }
680
681    if (kDebugLocking) {
682      // Debug check that all threads are suspended.
683      AssertThreadsAreSuspended(self, self);
684    }
685  }
686  ATRACE_BEGIN((std::string("Mutator threads suspended for ") + cause).c_str());
687
688  if (self != nullptr) {
689    VLOG(threads) << *self << " SuspendAll complete";
690  } else {
691    VLOG(threads) << "Thread[null] SuspendAll complete";
692  }
693}
694
695// Ensures all threads running Java suspend and that those not running Java don't start.
696// Debugger thread might be set to kRunnable for a short period of time after the
697// SuspendAllInternal. This is safe because it will be set back to suspended state before
698// the SuspendAll returns.
699void ThreadList::SuspendAllInternal(Thread* self,
700                                    Thread* ignore1,
701                                    Thread* ignore2,
702                                    SuspendReason reason) {
703  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
704  Locks::thread_list_lock_->AssertNotHeld(self);
705  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
706  if (kDebugLocking && self != nullptr) {
707    CHECK_NE(self->GetState(), kRunnable);
708  }
709
710  // First request that all threads suspend, then wait for them to suspend before
711  // returning. This suspension scheme also relies on other behaviour:
712  // 1. Threads cannot be deleted while they are suspended or have a suspend-
713  //    request flag set - (see Unregister() below).
714  // 2. When threads are created, they are created in a suspended state (actually
715  //    kNative) and will never begin executing Java code without first checking
716  //    the suspend-request flag.
717
718  // The atomic counter for number of threads that need to pass the barrier.
719  AtomicInteger pending_threads;
720  uint32_t num_ignored = 0;
721  if (ignore1 != nullptr) {
722    ++num_ignored;
723  }
724  if (ignore2 != nullptr && ignore1 != ignore2) {
725    ++num_ignored;
726  }
727  {
728    MutexLock mu(self, *Locks::thread_list_lock_);
729    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
730    // Update global suspend all state for attaching threads.
731    ++suspend_all_count_;
732    if (reason == SuspendReason::kForDebugger) {
733      ++debug_suspend_all_count_;
734    }
735    pending_threads.StoreRelaxed(list_.size() - num_ignored);
736    // Increment everybody's suspend count (except those that should be ignored).
737    for (const auto& thread : list_) {
738      if (thread == ignore1 || thread == ignore2) {
739        continue;
740      }
741      VLOG(threads) << "requesting thread suspend: " << *thread;
742      bool updated = thread->ModifySuspendCount(self, +1, &pending_threads, reason);
743      DCHECK(updated);
744
745      // Must install the pending_threads counter first, then check thread->IsSuspend() and clear
746      // the counter. Otherwise there's a race with Thread::TransitionFromRunnableToSuspended()
747      // that can lead a thread to miss a call to PassActiveSuspendBarriers().
748      if (thread->IsSuspended()) {
749        // Only clear the counter for the current thread.
750        thread->ClearSuspendBarrier(&pending_threads);
751        pending_threads.FetchAndSubSequentiallyConsistent(1);
752      }
753    }
754  }
755
756  // Wait for the barrier to be passed by all runnable threads. This wait
757  // is done with a timeout so that we can detect problems.
758#if ART_USE_FUTEXES
759  timespec wait_timeout;
760  InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(thread_suspend_timeout_ns_), 0, &wait_timeout);
761#endif
762  const uint64_t start_time = NanoTime();
763  while (true) {
764    int32_t cur_val = pending_threads.LoadRelaxed();
765    if (LIKELY(cur_val > 0)) {
766#if ART_USE_FUTEXES
767      if (futex(pending_threads.Address(), FUTEX_WAIT, cur_val, &wait_timeout, nullptr, 0) != 0) {
768        // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
769        if ((errno != EAGAIN) && (errno != EINTR)) {
770          if (errno == ETIMEDOUT) {
771            LOG(kIsDebugBuild ? ::android::base::FATAL : ::android::base::ERROR)
772                << "Timed out waiting for threads to suspend, waited for "
773                << PrettyDuration(NanoTime() - start_time);
774          } else {
775            PLOG(FATAL) << "futex wait failed for SuspendAllInternal()";
776          }
777        }
778      }  // else re-check pending_threads in the next iteration (this may be a spurious wake-up).
779#else
780      // Spin wait. This is likely to be slow, but on most architecture ART_USE_FUTEXES is set.
781      UNUSED(start_time);
782#endif
783    } else {
784      CHECK_EQ(cur_val, 0);
785      break;
786    }
787  }
788}
789
790void ThreadList::ResumeAll() {
791  Thread* self = Thread::Current();
792
793  if (self != nullptr) {
794    VLOG(threads) << *self << " ResumeAll starting";
795  } else {
796    VLOG(threads) << "Thread[null] ResumeAll starting";
797  }
798
799  ATRACE_END();
800
801  ScopedTrace trace("Resuming mutator threads");
802
803  if (kDebugLocking) {
804    // Debug check that all threads are suspended.
805    AssertThreadsAreSuspended(self, self);
806  }
807
808  long_suspend_ = false;
809
810  Locks::mutator_lock_->ExclusiveUnlock(self);
811  {
812    MutexLock mu(self, *Locks::thread_list_lock_);
813    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
814    // Update global suspend all state for attaching threads.
815    --suspend_all_count_;
816    // Decrement the suspend counts for all threads.
817    for (const auto& thread : list_) {
818      if (thread == self) {
819        continue;
820      }
821      bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
822      DCHECK(updated);
823    }
824
825    // Broadcast a notification to all suspended threads, some or all of
826    // which may choose to wake up.  No need to wait for them.
827    if (self != nullptr) {
828      VLOG(threads) << *self << " ResumeAll waking others";
829    } else {
830      VLOG(threads) << "Thread[null] ResumeAll waking others";
831    }
832    Thread::resume_cond_->Broadcast(self);
833  }
834
835  if (self != nullptr) {
836    VLOG(threads) << *self << " ResumeAll complete";
837  } else {
838    VLOG(threads) << "Thread[null] ResumeAll complete";
839  }
840}
841
842bool ThreadList::Resume(Thread* thread, SuspendReason reason) {
843  // This assumes there was an ATRACE_BEGIN when we suspended the thread.
844  ATRACE_END();
845
846  Thread* self = Thread::Current();
847  DCHECK_NE(thread, self);
848  VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") starting..." << reason;
849
850  {
851    // To check Contains.
852    MutexLock mu(self, *Locks::thread_list_lock_);
853    // To check IsSuspended.
854    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
855    if (UNLIKELY(!thread->IsSuspended())) {
856      LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread)
857          << ") thread not suspended";
858      return false;
859    }
860    if (!Contains(thread)) {
861      // We only expect threads within the thread-list to have been suspended otherwise we can't
862      // stop such threads from delete-ing themselves.
863      LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread)
864          << ") thread not within thread list";
865      return false;
866    }
867    if (UNLIKELY(!thread->ModifySuspendCount(self, -1, nullptr, reason))) {
868      LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread)
869                 << ") could not modify suspend count.";
870      return false;
871    }
872  }
873
874  {
875    VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") waking others";
876    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
877    Thread::resume_cond_->Broadcast(self);
878  }
879
880  VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") complete";
881  return true;
882}
883
884static void ThreadSuspendByPeerWarning(Thread* self,
885                                       LogSeverity severity,
886                                       const char* message,
887                                       jobject peer) {
888  JNIEnvExt* env = self->GetJniEnv();
889  ScopedLocalRef<jstring>
890      scoped_name_string(env, static_cast<jstring>(env->GetObjectField(
891          peer, WellKnownClasses::java_lang_Thread_name)));
892  ScopedUtfChars scoped_name_chars(env, scoped_name_string.get());
893  if (scoped_name_chars.c_str() == nullptr) {
894      LOG(severity) << message << ": " << peer;
895      env->ExceptionClear();
896  } else {
897      LOG(severity) << message << ": " << peer << ":" << scoped_name_chars.c_str();
898  }
899}
900
901Thread* ThreadList::SuspendThreadByPeer(jobject peer,
902                                        bool request_suspension,
903                                        SuspendReason reason,
904                                        bool* timed_out) {
905  const uint64_t start_time = NanoTime();
906  useconds_t sleep_us = kThreadSuspendInitialSleepUs;
907  *timed_out = false;
908  Thread* const self = Thread::Current();
909  Thread* suspended_thread = nullptr;
910  VLOG(threads) << "SuspendThreadByPeer starting";
911  while (true) {
912    Thread* thread;
913    {
914      // Note: this will transition to runnable and potentially suspend. We ensure only one thread
915      // is requesting another suspend, to avoid deadlock, by requiring this function be called
916      // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
917      // than request thread suspension, to avoid potential cycles in threads requesting each other
918      // suspend.
919      ScopedObjectAccess soa(self);
920      MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
921      thread = Thread::FromManagedThread(soa, peer);
922      if (thread == nullptr) {
923        if (suspended_thread != nullptr) {
924          MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
925          // If we incremented the suspend count but the thread reset its peer, we need to
926          // re-decrement it since it is shutting down and may deadlock the runtime in
927          // ThreadList::WaitForOtherNonDaemonThreadsToExit.
928          bool updated = suspended_thread->ModifySuspendCount(soa.Self(),
929                                                              -1,
930                                                              nullptr,
931                                                              reason);
932          DCHECK(updated);
933        }
934        ThreadSuspendByPeerWarning(self,
935                                   ::android::base::WARNING,
936                                    "No such thread for suspend",
937                                    peer);
938        return nullptr;
939      }
940      if (!Contains(thread)) {
941        CHECK(suspended_thread == nullptr);
942        VLOG(threads) << "SuspendThreadByPeer failed for unattached thread: "
943            << reinterpret_cast<void*>(thread);
944        return nullptr;
945      }
946      VLOG(threads) << "SuspendThreadByPeer found thread: " << *thread;
947      {
948        MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
949        if (request_suspension) {
950          if (self->GetSuspendCount() > 0) {
951            // We hold the suspend count lock but another thread is trying to suspend us. Its not
952            // safe to try to suspend another thread in case we get a cycle. Start the loop again
953            // which will allow this thread to be suspended.
954            continue;
955          }
956          CHECK(suspended_thread == nullptr);
957          suspended_thread = thread;
958          bool updated = suspended_thread->ModifySuspendCount(self, +1, nullptr, reason);
959          DCHECK(updated);
960          request_suspension = false;
961        } else {
962          // If the caller isn't requesting suspension, a suspension should have already occurred.
963          CHECK_GT(thread->GetSuspendCount(), 0);
964        }
965        // IsSuspended on the current thread will fail as the current thread is changed into
966        // Runnable above. As the suspend count is now raised if this is the current thread
967        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
968        // to just explicitly handle the current thread in the callers to this code.
969        CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
970        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
971        // count, or else we've waited and it has self suspended) or is the current thread, we're
972        // done.
973        if (thread->IsSuspended()) {
974          VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread;
975          if (ATRACE_ENABLED()) {
976            std::string name;
977            thread->GetThreadName(name);
978            ATRACE_BEGIN(StringPrintf("SuspendThreadByPeer suspended %s for peer=%p", name.c_str(),
979                                      peer).c_str());
980          }
981          return thread;
982        }
983        const uint64_t total_delay = NanoTime() - start_time;
984        if (total_delay >= thread_suspend_timeout_ns_) {
985          ThreadSuspendByPeerWarning(self,
986                                     ::android::base::FATAL,
987                                     "Thread suspension timed out",
988                                     peer);
989          if (suspended_thread != nullptr) {
990            CHECK_EQ(suspended_thread, thread);
991            bool updated = suspended_thread->ModifySuspendCount(soa.Self(),
992                                                                -1,
993                                                                nullptr,
994                                                                reason);
995            DCHECK(updated);
996          }
997          *timed_out = true;
998          return nullptr;
999        } else if (sleep_us == 0 &&
1000            total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
1001          // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
1002          // excessive CPU usage.
1003          sleep_us = kThreadSuspendMaxYieldUs / 2;
1004        }
1005      }
1006      // Release locks and come out of runnable state.
1007    }
1008    VLOG(threads) << "SuspendThreadByPeer waiting to allow thread chance to suspend";
1009    ThreadSuspendSleep(sleep_us);
1010    // This may stay at 0 if sleep_us == 0, but this is WAI since we want to avoid using usleep at
1011    // all if possible. This shouldn't be an issue since time to suspend should always be small.
1012    sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs);
1013  }
1014}
1015
1016static void ThreadSuspendByThreadIdWarning(LogSeverity severity,
1017                                           const char* message,
1018                                           uint32_t thread_id) {
1019  LOG(severity) << StringPrintf("%s: %d", message, thread_id);
1020}
1021
1022Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id,
1023                                            SuspendReason reason,
1024                                            bool* timed_out) {
1025  const uint64_t start_time = NanoTime();
1026  useconds_t sleep_us = kThreadSuspendInitialSleepUs;
1027  *timed_out = false;
1028  Thread* suspended_thread = nullptr;
1029  Thread* const self = Thread::Current();
1030  CHECK_NE(thread_id, kInvalidThreadId);
1031  VLOG(threads) << "SuspendThreadByThreadId starting";
1032  while (true) {
1033    {
1034      // Note: this will transition to runnable and potentially suspend. We ensure only one thread
1035      // is requesting another suspend, to avoid deadlock, by requiring this function be called
1036      // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
1037      // than request thread suspension, to avoid potential cycles in threads requesting each other
1038      // suspend.
1039      ScopedObjectAccess soa(self);
1040      MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
1041      Thread* thread = nullptr;
1042      for (const auto& it : list_) {
1043        if (it->GetThreadId() == thread_id) {
1044          thread = it;
1045          break;
1046        }
1047      }
1048      if (thread == nullptr) {
1049        CHECK(suspended_thread == nullptr) << "Suspended thread " << suspended_thread
1050            << " no longer in thread list";
1051        // There's a race in inflating a lock and the owner giving up ownership and then dying.
1052        ThreadSuspendByThreadIdWarning(::android::base::WARNING,
1053                                       "No such thread id for suspend",
1054                                       thread_id);
1055        return nullptr;
1056      }
1057      VLOG(threads) << "SuspendThreadByThreadId found thread: " << *thread;
1058      DCHECK(Contains(thread));
1059      {
1060        MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
1061        if (suspended_thread == nullptr) {
1062          if (self->GetSuspendCount() > 0) {
1063            // We hold the suspend count lock but another thread is trying to suspend us. Its not
1064            // safe to try to suspend another thread in case we get a cycle. Start the loop again
1065            // which will allow this thread to be suspended.
1066            continue;
1067          }
1068          bool updated = thread->ModifySuspendCount(self, +1, nullptr, reason);
1069          DCHECK(updated);
1070          suspended_thread = thread;
1071        } else {
1072          CHECK_EQ(suspended_thread, thread);
1073          // If the caller isn't requesting suspension, a suspension should have already occurred.
1074          CHECK_GT(thread->GetSuspendCount(), 0);
1075        }
1076        // IsSuspended on the current thread will fail as the current thread is changed into
1077        // Runnable above. As the suspend count is now raised if this is the current thread
1078        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
1079        // to just explicitly handle the current thread in the callers to this code.
1080        CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
1081        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
1082        // count, or else we've waited and it has self suspended) or is the current thread, we're
1083        // done.
1084        if (thread->IsSuspended()) {
1085          if (ATRACE_ENABLED()) {
1086            std::string name;
1087            thread->GetThreadName(name);
1088            ATRACE_BEGIN(StringPrintf("SuspendThreadByThreadId suspended %s id=%d",
1089                                      name.c_str(), thread_id).c_str());
1090          }
1091          VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread;
1092          return thread;
1093        }
1094        const uint64_t total_delay = NanoTime() - start_time;
1095        if (total_delay >= thread_suspend_timeout_ns_) {
1096          ThreadSuspendByThreadIdWarning(::android::base::WARNING,
1097                                         "Thread suspension timed out",
1098                                         thread_id);
1099          if (suspended_thread != nullptr) {
1100            bool updated = thread->ModifySuspendCount(soa.Self(), -1, nullptr, reason);
1101            DCHECK(updated);
1102          }
1103          *timed_out = true;
1104          return nullptr;
1105        } else if (sleep_us == 0 &&
1106            total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
1107          // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
1108          // excessive CPU usage.
1109          sleep_us = kThreadSuspendMaxYieldUs / 2;
1110        }
1111      }
1112      // Release locks and come out of runnable state.
1113    }
1114    VLOG(threads) << "SuspendThreadByThreadId waiting to allow thread chance to suspend";
1115    ThreadSuspendSleep(sleep_us);
1116    sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs);
1117  }
1118}
1119
1120Thread* ThreadList::FindThreadByThreadId(uint32_t thread_id) {
1121  for (const auto& thread : list_) {
1122    if (thread->GetThreadId() == thread_id) {
1123      return thread;
1124    }
1125  }
1126  return nullptr;
1127}
1128
1129void ThreadList::SuspendAllForDebugger() {
1130  Thread* self = Thread::Current();
1131  Thread* debug_thread = Dbg::GetDebugThread();
1132
1133  VLOG(threads) << *self << " SuspendAllForDebugger starting...";
1134
1135  SuspendAllInternal(self, self, debug_thread, SuspendReason::kForDebugger);
1136  // Block on the mutator lock until all Runnable threads release their share of access then
1137  // immediately unlock again.
1138#if HAVE_TIMED_RWLOCK
1139  // Timeout if we wait more than 30 seconds.
1140  if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) {
1141    UnsafeLogFatalForThreadSuspendAllTimeout();
1142  } else {
1143    Locks::mutator_lock_->ExclusiveUnlock(self);
1144  }
1145#else
1146  Locks::mutator_lock_->ExclusiveLock(self);
1147  Locks::mutator_lock_->ExclusiveUnlock(self);
1148#endif
1149  // Disabled for the following race condition:
1150  // Thread 1 calls SuspendAllForDebugger, gets preempted after pulsing the mutator lock.
1151  // Thread 2 calls SuspendAll and SetStateUnsafe (perhaps from Dbg::Disconnected).
1152  // Thread 1 fails assertion that all threads are suspended due to thread 2 being in a runnable
1153  // state (from SetStateUnsafe).
1154  // AssertThreadsAreSuspended(self, self, debug_thread);
1155
1156  VLOG(threads) << *self << " SuspendAllForDebugger complete";
1157}
1158
1159void ThreadList::SuspendSelfForDebugger() {
1160  Thread* const self = Thread::Current();
1161  self->SetReadyForDebugInvoke(true);
1162
1163  // The debugger thread must not suspend itself due to debugger activity!
1164  Thread* debug_thread = Dbg::GetDebugThread();
1165  CHECK(self != debug_thread);
1166  CHECK_NE(self->GetState(), kRunnable);
1167  Locks::mutator_lock_->AssertNotHeld(self);
1168
1169  // The debugger may have detached while we were executing an invoke request. In that case, we
1170  // must not suspend ourself.
1171  DebugInvokeReq* pReq = self->GetInvokeReq();
1172  const bool skip_thread_suspension = (pReq != nullptr && !Dbg::IsDebuggerActive());
1173  if (!skip_thread_suspension) {
1174    // Collisions with other suspends aren't really interesting. We want
1175    // to ensure that we're the only one fiddling with the suspend count
1176    // though.
1177    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
1178    bool updated = self->ModifySuspendCount(self, +1, nullptr, SuspendReason::kForDebugger);
1179    DCHECK(updated);
1180    CHECK_GT(self->GetSuspendCount(), 0);
1181
1182    VLOG(threads) << *self << " self-suspending (debugger)";
1183  } else {
1184    // We must no longer be subject to debugger suspension.
1185    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
1186    CHECK_EQ(self->GetDebugSuspendCount(), 0) << "Debugger detached without resuming us";
1187
1188    VLOG(threads) << *self << " not self-suspending because debugger detached during invoke";
1189  }
1190
1191  // If the debugger requested an invoke, we need to send the reply and clear the request.
1192  if (pReq != nullptr) {
1193    Dbg::FinishInvokeMethod(pReq);
1194    self->ClearDebugInvokeReq();
1195    pReq = nullptr;  // object has been deleted, clear it for safety.
1196  }
1197
1198  // Tell JDWP that we've completed suspension. The JDWP thread can't
1199  // tell us to resume before we're fully asleep because we hold the
1200  // suspend count lock.
1201  Dbg::ClearWaitForEventThread();
1202
1203  {
1204    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
1205    while (self->GetSuspendCount() != 0) {
1206      Thread::resume_cond_->Wait(self);
1207      if (self->GetSuspendCount() != 0) {
1208        // The condition was signaled but we're still suspended. This
1209        // can happen when we suspend then resume all threads to
1210        // update instrumentation or compute monitor info. This can
1211        // also happen if the debugger lets go while a SIGQUIT thread
1212        // dump event is pending (assuming SignalCatcher was resumed for
1213        // just long enough to try to grab the thread-suspend lock).
1214        VLOG(jdwp) << *self << " still suspended after undo "
1215                   << "(suspend count=" << self->GetSuspendCount() << ", "
1216                   << "debug suspend count=" << self->GetDebugSuspendCount() << ")";
1217      }
1218    }
1219    CHECK_EQ(self->GetSuspendCount(), 0);
1220  }
1221
1222  self->SetReadyForDebugInvoke(false);
1223  VLOG(threads) << *self << " self-reviving (debugger)";
1224}
1225
1226void ThreadList::ResumeAllForDebugger() {
1227  Thread* self = Thread::Current();
1228  Thread* debug_thread = Dbg::GetDebugThread();
1229
1230  VLOG(threads) << *self << " ResumeAllForDebugger starting...";
1231
1232  // Threads can't resume if we exclusively hold the mutator lock.
1233  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
1234
1235  {
1236    MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
1237    {
1238      MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
1239      // Update global suspend all state for attaching threads.
1240      DCHECK_GE(suspend_all_count_, debug_suspend_all_count_);
1241      if (debug_suspend_all_count_ > 0) {
1242        --suspend_all_count_;
1243        --debug_suspend_all_count_;
1244      } else {
1245        // We've been asked to resume all threads without being asked to
1246        // suspend them all before. That may happen if a debugger tries
1247        // to resume some suspended threads (with suspend count == 1)
1248        // at once with a VirtualMachine.Resume command. Let's print a
1249        // warning.
1250        LOG(WARNING) << "Debugger attempted to resume all threads without "
1251                     << "having suspended them all before.";
1252      }
1253      // Decrement everybody's suspend count (except our own).
1254      for (const auto& thread : list_) {
1255        if (thread == self || thread == debug_thread) {
1256          continue;
1257        }
1258        if (thread->GetDebugSuspendCount() == 0) {
1259          // This thread may have been individually resumed with ThreadReference.Resume.
1260          continue;
1261        }
1262        VLOG(threads) << "requesting thread resume: " << *thread;
1263        bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kForDebugger);
1264        DCHECK(updated);
1265      }
1266    }
1267  }
1268
1269  {
1270    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
1271    Thread::resume_cond_->Broadcast(self);
1272  }
1273
1274  VLOG(threads) << *self << " ResumeAllForDebugger complete";
1275}
1276
1277void ThreadList::UndoDebuggerSuspensions() {
1278  Thread* self = Thread::Current();
1279
1280  VLOG(threads) << *self << " UndoDebuggerSuspensions starting";
1281
1282  {
1283    MutexLock mu(self, *Locks::thread_list_lock_);
1284    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1285    // Update global suspend all state for attaching threads.
1286    suspend_all_count_ -= debug_suspend_all_count_;
1287    debug_suspend_all_count_ = 0;
1288    // Update running threads.
1289    for (const auto& thread : list_) {
1290      if (thread == self || thread->GetDebugSuspendCount() == 0) {
1291        continue;
1292      }
1293      bool suspended = thread->ModifySuspendCount(self,
1294                                                  -thread->GetDebugSuspendCount(),
1295                                                  nullptr,
1296                                                  SuspendReason::kForDebugger);
1297      DCHECK(suspended);
1298    }
1299  }
1300
1301  {
1302    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
1303    Thread::resume_cond_->Broadcast(self);
1304  }
1305
1306  VLOG(threads) << "UndoDebuggerSuspensions(" << *self << ") complete";
1307}
1308
1309void ThreadList::WaitForOtherNonDaemonThreadsToExit() {
1310  ScopedTrace trace(__PRETTY_FUNCTION__);
1311  Thread* self = Thread::Current();
1312  Locks::mutator_lock_->AssertNotHeld(self);
1313  while (true) {
1314    {
1315      // No more threads can be born after we start to shutdown.
1316      MutexLock mu(self, *Locks::runtime_shutdown_lock_);
1317      CHECK(Runtime::Current()->IsShuttingDownLocked());
1318      CHECK_EQ(Runtime::Current()->NumberOfThreadsBeingBorn(), 0U);
1319    }
1320    MutexLock mu(self, *Locks::thread_list_lock_);
1321    // Also wait for any threads that are unregistering to finish. This is required so that no
1322    // threads access the thread list after it is deleted. TODO: This may not work for user daemon
1323    // threads since they could unregister at the wrong time.
1324    bool done = unregistering_count_ == 0;
1325    if (done) {
1326      for (const auto& thread : list_) {
1327        if (thread != self && !thread->IsDaemon()) {
1328          done = false;
1329          break;
1330        }
1331      }
1332    }
1333    if (done) {
1334      break;
1335    }
1336    // Wait for another thread to exit before re-checking.
1337    Locks::thread_exit_cond_->Wait(self);
1338  }
1339}
1340
1341void ThreadList::SuspendAllDaemonThreadsForShutdown() {
1342  ScopedTrace trace(__PRETTY_FUNCTION__);
1343  Thread* self = Thread::Current();
1344  size_t daemons_left = 0;
1345  {
1346    // Tell all the daemons it's time to suspend.
1347    MutexLock mu(self, *Locks::thread_list_lock_);
1348    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1349    for (const auto& thread : list_) {
1350      // This is only run after all non-daemon threads have exited, so the remainder should all be
1351      // daemons.
1352      CHECK(thread->IsDaemon()) << *thread;
1353      if (thread != self) {
1354        bool updated = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
1355        DCHECK(updated);
1356        ++daemons_left;
1357      }
1358      // We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be
1359      // the sleep forever one.
1360      thread->GetJniEnv()->SetFunctionsToRuntimeShutdownFunctions();
1361    }
1362  }
1363  // If we have any daemons left, wait 200ms to ensure they are not stuck in a place where they
1364  // are about to access runtime state and are not in a runnable state. Examples: Monitor code
1365  // or waking up from a condition variable. TODO: Try and see if there is a better way to wait
1366  // for daemon threads to be in a blocked state.
1367  if (daemons_left > 0) {
1368    static constexpr size_t kDaemonSleepTime = 200 * 1000;
1369    usleep(kDaemonSleepTime);
1370  }
1371  // Give the threads a chance to suspend, complaining if they're slow.
1372  bool have_complained = false;
1373  static constexpr size_t kTimeoutMicroseconds = 2000 * 1000;
1374  static constexpr size_t kSleepMicroseconds = 1000;
1375  for (size_t i = 0; i < kTimeoutMicroseconds / kSleepMicroseconds; ++i) {
1376    bool all_suspended = true;
1377    {
1378      MutexLock mu(self, *Locks::thread_list_lock_);
1379      for (const auto& thread : list_) {
1380        if (thread != self && thread->GetState() == kRunnable) {
1381          if (!have_complained) {
1382            LOG(WARNING) << "daemon thread not yet suspended: " << *thread;
1383            have_complained = true;
1384          }
1385          all_suspended = false;
1386        }
1387      }
1388    }
1389    if (all_suspended) {
1390      return;
1391    }
1392    usleep(kSleepMicroseconds);
1393  }
1394  LOG(WARNING) << "timed out suspending all daemon threads";
1395}
1396
1397void ThreadList::Register(Thread* self) {
1398  DCHECK_EQ(self, Thread::Current());
1399  CHECK(!shut_down_);
1400
1401  if (VLOG_IS_ON(threads)) {
1402    std::ostringstream oss;
1403    self->ShortDump(oss);  // We don't hold the mutator_lock_ yet and so cannot call Dump.
1404    LOG(INFO) << "ThreadList::Register() " << *self  << "\n" << oss.str();
1405  }
1406
1407  // Atomically add self to the thread list and make its thread_suspend_count_ reflect ongoing
1408  // SuspendAll requests.
1409  MutexLock mu(self, *Locks::thread_list_lock_);
1410  MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1411  CHECK_GE(suspend_all_count_, debug_suspend_all_count_);
1412  // Modify suspend count in increments of 1 to maintain invariants in ModifySuspendCount. While
1413  // this isn't particularly efficient the suspend counts are most commonly 0 or 1.
1414  for (int delta = debug_suspend_all_count_; delta > 0; delta--) {
1415    bool updated = self->ModifySuspendCount(self, +1, nullptr, SuspendReason::kForDebugger);
1416    DCHECK(updated);
1417  }
1418  for (int delta = suspend_all_count_ - debug_suspend_all_count_; delta > 0; delta--) {
1419    bool updated = self->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
1420    DCHECK(updated);
1421  }
1422  CHECK(!Contains(self));
1423  list_.push_back(self);
1424  if (kUseReadBarrier) {
1425    gc::collector::ConcurrentCopying* const cc =
1426        Runtime::Current()->GetHeap()->ConcurrentCopyingCollector();
1427    // Initialize according to the state of the CC collector.
1428    self->SetIsGcMarkingAndUpdateEntrypoints(cc->IsMarking());
1429    if (cc->IsUsingReadBarrierEntrypoints()) {
1430      self->SetReadBarrierEntrypoints();
1431    }
1432    self->SetWeakRefAccessEnabled(cc->IsWeakRefAccessEnabled());
1433  }
1434}
1435
1436void ThreadList::Unregister(Thread* self) {
1437  DCHECK_EQ(self, Thread::Current());
1438  CHECK_NE(self->GetState(), kRunnable);
1439  Locks::mutator_lock_->AssertNotHeld(self);
1440
1441  VLOG(threads) << "ThreadList::Unregister() " << *self;
1442
1443  {
1444    MutexLock mu(self, *Locks::thread_list_lock_);
1445    ++unregistering_count_;
1446  }
1447
1448  // Any time-consuming destruction, plus anything that can call back into managed code or
1449  // suspend and so on, must happen at this point, and not in ~Thread. The self->Destroy is what
1450  // causes the threads to join. It is important to do this after incrementing unregistering_count_
1451  // since we want the runtime to wait for the daemon threads to exit before deleting the thread
1452  // list.
1453  self->Destroy();
1454
1455  // If tracing, remember thread id and name before thread exits.
1456  Trace::StoreExitingThreadInfo(self);
1457
1458  uint32_t thin_lock_id = self->GetThreadId();
1459  while (true) {
1460    // Remove and delete the Thread* while holding the thread_list_lock_ and
1461    // thread_suspend_count_lock_ so that the unregistering thread cannot be suspended.
1462    // Note: deliberately not using MutexLock that could hold a stale self pointer.
1463    MutexLock mu(self, *Locks::thread_list_lock_);
1464    if (!Contains(self)) {
1465      std::string thread_name;
1466      self->GetThreadName(thread_name);
1467      std::ostringstream os;
1468      DumpNativeStack(os, GetTid(), nullptr, "  native: ", nullptr);
1469      LOG(ERROR) << "Request to unregister unattached thread " << thread_name << "\n" << os.str();
1470      break;
1471    } else {
1472      MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1473      if (!self->IsSuspended()) {
1474        list_.remove(self);
1475        break;
1476      }
1477    }
1478    // We failed to remove the thread due to a suspend request, loop and try again.
1479  }
1480  delete self;
1481
1482  // Release the thread ID after the thread is finished and deleted to avoid cases where we can
1483  // temporarily have multiple threads with the same thread id. When this occurs, it causes
1484  // problems in FindThreadByThreadId / SuspendThreadByThreadId.
1485  ReleaseThreadId(nullptr, thin_lock_id);
1486
1487  // Clear the TLS data, so that the underlying native thread is recognizably detached.
1488  // (It may wish to reattach later.)
1489#ifdef ART_TARGET_ANDROID
1490  __get_tls()[TLS_SLOT_ART_THREAD_SELF] = nullptr;
1491#else
1492  CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, nullptr), "detach self");
1493#endif
1494
1495  // Signal that a thread just detached.
1496  MutexLock mu(nullptr, *Locks::thread_list_lock_);
1497  --unregistering_count_;
1498  Locks::thread_exit_cond_->Broadcast(nullptr);
1499}
1500
1501void ThreadList::ForEach(void (*callback)(Thread*, void*), void* context) {
1502  for (const auto& thread : list_) {
1503    callback(thread, context);
1504  }
1505}
1506
1507void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) {
1508  Thread* const self = Thread::Current();
1509  std::vector<Thread*> threads_to_visit;
1510
1511  // Tell threads to suspend and copy them into list.
1512  {
1513    MutexLock mu(self, *Locks::thread_list_lock_);
1514    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1515    for (Thread* thread : list_) {
1516      bool suspended = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
1517      DCHECK(suspended);
1518      if (thread == self || thread->IsSuspended()) {
1519        threads_to_visit.push_back(thread);
1520      } else {
1521        bool resumed = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
1522        DCHECK(resumed);
1523      }
1524    }
1525  }
1526
1527  // Visit roots without holding thread_list_lock_ and thread_suspend_count_lock_ to prevent lock
1528  // order violations.
1529  for (Thread* thread : threads_to_visit) {
1530    thread->VisitRoots(visitor, kVisitRootFlagAllRoots);
1531  }
1532
1533  // Restore suspend counts.
1534  {
1535    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1536    for (Thread* thread : threads_to_visit) {
1537      bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
1538      DCHECK(updated);
1539    }
1540  }
1541}
1542
1543void ThreadList::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) const {
1544  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
1545  for (const auto& thread : list_) {
1546    thread->VisitRoots(visitor, flags);
1547  }
1548}
1549
1550uint32_t ThreadList::AllocThreadId(Thread* self) {
1551  MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
1552  for (size_t i = 0; i < allocated_ids_.size(); ++i) {
1553    if (!allocated_ids_[i]) {
1554      allocated_ids_.set(i);
1555      return i + 1;  // Zero is reserved to mean "invalid".
1556    }
1557  }
1558  LOG(FATAL) << "Out of internal thread ids";
1559  return 0;
1560}
1561
1562void ThreadList::ReleaseThreadId(Thread* self, uint32_t id) {
1563  MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
1564  --id;  // Zero is reserved to mean "invalid".
1565  DCHECK(allocated_ids_[id]) << id;
1566  allocated_ids_.reset(id);
1567}
1568
1569ScopedSuspendAll::ScopedSuspendAll(const char* cause, bool long_suspend) {
1570  Runtime::Current()->GetThreadList()->SuspendAll(cause, long_suspend);
1571}
1572
1573ScopedSuspendAll::~ScopedSuspendAll() {
1574  Runtime::Current()->GetThreadList()->ResumeAll();
1575}
1576
1577}  // namespace art
1578