thread_watcher.cc revision a1401311d1ab56c4ed0a474bd38c108f75cb0cd9
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/metrics/thread_watcher.h"
6
7#include <math.h>  // ceil
8
9#include "base/bind.h"
10#include "base/compiler_specific.h"
11#include "base/debug/alias.h"
12#include "base/debug/dump_without_crashing.h"
13#include "base/lazy_instance.h"
14#include "base/strings/string_number_conversions.h"
15#include "base/strings/string_split.h"
16#include "base/strings/string_tokenizer.h"
17#include "base/strings/stringprintf.h"
18#include "base/threading/thread_restrictions.h"
19#include "build/build_config.h"
20#include "chrome/browser/metrics/metrics_service.h"
21#include "chrome/common/chrome_switches.h"
22#include "chrome/common/chrome_version_info.h"
23#include "chrome/common/logging_chrome.h"
24
25#if defined(OS_WIN)
26#include "base/win/windows_version.h"
27#endif
28
29using content::BrowserThread;
30
31namespace {
32
33// The following are unique function names for forcing the crash when a thread
34// is unresponsive. This makes it possible to tell from the callstack alone what
35// thread was unresponsive.
36//
37// We disable optimizations for this block of functions so the compiler doesn't
38// merge them all together.
39MSVC_DISABLE_OPTIMIZE()
40MSVC_PUSH_DISABLE_WARNING(4748)
41
42#ifndef NDEBUG
43int* NullPointer() {
44  return reinterpret_cast<int*>(NULL);
45}
46#endif
47
48void NullPointerCrash(int line_number) {
49#ifndef NDEBUG
50  *NullPointer() = line_number;  // Crash.
51#else
52  base::debug::DumpWithoutCrashing();
53#endif
54}
55
56NOINLINE void ShutdownCrash() {
57  NullPointerCrash(__LINE__);
58}
59
60NOINLINE void ThreadUnresponsive_UI() {
61  NullPointerCrash(__LINE__);
62}
63
64NOINLINE void ThreadUnresponsive_DB() {
65  NullPointerCrash(__LINE__);
66}
67
68NOINLINE void ThreadUnresponsive_FILE() {
69  NullPointerCrash(__LINE__);
70}
71
72NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() {
73  NullPointerCrash(__LINE__);
74}
75
76NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() {
77  NullPointerCrash(__LINE__);
78}
79
80NOINLINE void ThreadUnresponsive_CACHE() {
81  NullPointerCrash(__LINE__);
82}
83
84NOINLINE void ThreadUnresponsive_IO() {
85  NullPointerCrash(__LINE__);
86}
87
88MSVC_POP_WARNING()
89MSVC_ENABLE_OPTIMIZE();
90
91void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) {
92  base::debug::Alias(&thread_id);
93
94  switch (thread_id) {
95    case BrowserThread::UI:
96      return ThreadUnresponsive_UI();
97    case BrowserThread::DB:
98      return ThreadUnresponsive_DB();
99    case BrowserThread::FILE:
100      return ThreadUnresponsive_FILE();
101    case BrowserThread::FILE_USER_BLOCKING:
102      return ThreadUnresponsive_FILE_USER_BLOCKING();
103    case BrowserThread::PROCESS_LAUNCHER:
104      return ThreadUnresponsive_PROCESS_LAUNCHER();
105    case BrowserThread::CACHE:
106      return ThreadUnresponsive_CACHE();
107    case BrowserThread::IO:
108      return ThreadUnresponsive_IO();
109    case BrowserThread::ID_COUNT:
110      CHECK(false);  // This shouldn't actually be reached!
111      break;
112
113    // Omission of the default hander is intentional -- that way the compiler
114    // should warn if our switch becomes outdated.
115  }
116
117  CHECK(false) << "Unknown thread was unresponsive.";  // Shouldn't be reached.
118}
119
120}  // namespace
121
122// ThreadWatcher methods and members.
123ThreadWatcher::ThreadWatcher(const WatchingParams& params)
124    : thread_id_(params.thread_id),
125      thread_name_(params.thread_name),
126      watched_loop_(
127          BrowserThread::GetMessageLoopProxyForThread(params.thread_id)),
128      sleep_time_(params.sleep_time),
129      unresponsive_time_(params.unresponsive_time),
130      ping_time_(base::TimeTicks::Now()),
131      pong_time_(ping_time_),
132      ping_sequence_number_(0),
133      active_(false),
134      ping_count_(params.unresponsive_threshold),
135      response_time_histogram_(NULL),
136      unresponsive_time_histogram_(NULL),
137      unresponsive_count_(0),
138      hung_processing_complete_(false),
139      unresponsive_threshold_(params.unresponsive_threshold),
140      crash_on_hang_(params.crash_on_hang),
141      live_threads_threshold_(params.live_threads_threshold),
142      weak_ptr_factory_(this) {
143  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
144  Initialize();
145}
146
147ThreadWatcher::~ThreadWatcher() {}
148
149// static
150void ThreadWatcher::StartWatching(const WatchingParams& params) {
151  DCHECK_GE(params.sleep_time.InMilliseconds(), 0);
152  DCHECK_GE(params.unresponsive_time.InMilliseconds(),
153            params.sleep_time.InMilliseconds());
154
155  // If we are not on WatchDogThread, then post a task to call StartWatching on
156  // WatchDogThread.
157  if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
158    WatchDogThread::PostTask(
159        FROM_HERE,
160        base::Bind(&ThreadWatcher::StartWatching, params));
161    return;
162  }
163
164  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
165
166  // Create a new thread watcher object for the given thread and activate it.
167  ThreadWatcher* watcher = new ThreadWatcher(params);
168
169  DCHECK(watcher);
170  // If we couldn't register the thread watcher object, we are shutting down,
171  // then don't activate thread watching.
172  if (!ThreadWatcherList::IsRegistered(params.thread_id))
173    return;
174  watcher->ActivateThreadWatching();
175}
176
177void ThreadWatcher::ActivateThreadWatching() {
178  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
179  if (active_) return;
180  active_ = true;
181  ping_count_ = unresponsive_threshold_;
182  ResetHangCounters();
183  base::MessageLoop::current()->PostTask(
184      FROM_HERE,
185      base::Bind(&ThreadWatcher::PostPingMessage,
186                 weak_ptr_factory_.GetWeakPtr()));
187}
188
189void ThreadWatcher::DeActivateThreadWatching() {
190  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
191  active_ = false;
192  ping_count_ = 0;
193  weak_ptr_factory_.InvalidateWeakPtrs();
194}
195
196void ThreadWatcher::WakeUp() {
197  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
198  // There is some user activity, PostPingMessage task of thread watcher if
199  // needed.
200  if (!active_) return;
201
202  // Throw away the previous |unresponsive_count_| and start over again. Just
203  // before going to sleep, |unresponsive_count_| could be very close to
204  // |unresponsive_threshold_| and when user becomes active,
205  // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no
206  // response for ping messages. Reset |unresponsive_count_| to start measuring
207  // the unresponsiveness of the threads when system becomes active.
208  unresponsive_count_ = 0;
209
210  if (ping_count_ <= 0) {
211    ping_count_ = unresponsive_threshold_;
212    ResetHangCounters();
213    PostPingMessage();
214  } else {
215    ping_count_ = unresponsive_threshold_;
216  }
217}
218
219void ThreadWatcher::PostPingMessage() {
220  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
221  // If we have stopped watching or if the user is idle, then stop sending
222  // ping messages.
223  if (!active_ || ping_count_ <= 0)
224    return;
225
226  // Save the current time when we have sent ping message.
227  ping_time_ = base::TimeTicks::Now();
228
229  // Send a ping message to the watched thread. Callback will be called on
230  // the WatchDogThread.
231  base::Closure callback(
232      base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(),
233                 ping_sequence_number_));
234  if (watched_loop_->PostTask(
235          FROM_HERE,
236          base::Bind(&ThreadWatcher::OnPingMessage, thread_id_,
237                     callback))) {
238      // Post a task to check the responsiveness of watched thread.
239      base::MessageLoop::current()->PostDelayedTask(
240          FROM_HERE,
241          base::Bind(&ThreadWatcher::OnCheckResponsiveness,
242                     weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
243          unresponsive_time_);
244  } else {
245    // Watched thread might have gone away, stop watching it.
246    DeActivateThreadWatching();
247  }
248}
249
250void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) {
251  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
252
253  // Record watched thread's response time.
254  base::TimeTicks now = base::TimeTicks::Now();
255  base::TimeDelta response_time = now - ping_time_;
256  response_time_histogram_->AddTime(response_time);
257
258  // Save the current time when we have got pong message.
259  pong_time_ = now;
260
261  // Check if there are any extra pings in flight.
262  DCHECK_EQ(ping_sequence_number_, ping_sequence_number);
263  if (ping_sequence_number_ != ping_sequence_number)
264    return;
265
266  // Increment sequence number for the next ping message to indicate watched
267  // thread is responsive.
268  ++ping_sequence_number_;
269
270  // If we have stopped watching or if the user is idle, then stop sending
271  // ping messages.
272  if (!active_ || --ping_count_ <= 0)
273    return;
274
275  base::MessageLoop::current()->PostDelayedTask(
276      FROM_HERE,
277      base::Bind(&ThreadWatcher::PostPingMessage,
278                 weak_ptr_factory_.GetWeakPtr()),
279      sleep_time_);
280}
281
282void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) {
283  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
284  // If we have stopped watching then consider thread as responding.
285  if (!active_) {
286    responsive_ = true;
287    return;
288  }
289  // If the latest ping_sequence_number_ is not same as the ping_sequence_number
290  // that is passed in, then we can assume OnPongMessage was called.
291  // OnPongMessage increments ping_sequence_number_.
292  if (ping_sequence_number_ != ping_sequence_number) {
293    // Reset unresponsive_count_ to zero because we got a response from the
294    // watched thread.
295    ResetHangCounters();
296
297    responsive_ = true;
298    return;
299  }
300  // Record that we got no response from watched thread.
301  GotNoResponse();
302
303  // Post a task to check the responsiveness of watched thread.
304  base::MessageLoop::current()->PostDelayedTask(
305      FROM_HERE,
306      base::Bind(&ThreadWatcher::OnCheckResponsiveness,
307                 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
308      unresponsive_time_);
309  responsive_ = false;
310}
311
312void ThreadWatcher::Initialize() {
313  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
314  ThreadWatcherList::Register(this);
315
316  const std::string response_time_histogram_name =
317      "ThreadWatcher.ResponseTime." + thread_name_;
318  response_time_histogram_ = base::Histogram::FactoryTimeGet(
319      response_time_histogram_name,
320      base::TimeDelta::FromMilliseconds(1),
321      base::TimeDelta::FromSeconds(100), 50,
322      base::Histogram::kUmaTargetedHistogramFlag);
323
324  const std::string unresponsive_time_histogram_name =
325      "ThreadWatcher.Unresponsive." + thread_name_;
326  unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet(
327      unresponsive_time_histogram_name,
328      base::TimeDelta::FromMilliseconds(1),
329      base::TimeDelta::FromSeconds(100), 50,
330      base::Histogram::kUmaTargetedHistogramFlag);
331
332  const std::string responsive_count_histogram_name =
333      "ThreadWatcher.ResponsiveThreads." + thread_name_;
334  responsive_count_histogram_ = base::LinearHistogram::FactoryGet(
335      responsive_count_histogram_name, 1, 10, 11,
336      base::Histogram::kUmaTargetedHistogramFlag);
337
338  const std::string unresponsive_count_histogram_name =
339      "ThreadWatcher.UnresponsiveThreads." + thread_name_;
340  unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet(
341      unresponsive_count_histogram_name, 1, 10, 11,
342      base::Histogram::kUmaTargetedHistogramFlag);
343}
344
345// static
346void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id,
347                                  const base::Closure& callback_task) {
348  // This method is called on watched thread.
349  DCHECK(BrowserThread::CurrentlyOn(thread_id));
350  WatchDogThread::PostTask(FROM_HERE, callback_task);
351}
352
353void ThreadWatcher::ResetHangCounters() {
354  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
355  unresponsive_count_ = 0;
356  hung_processing_complete_ = false;
357}
358
359void ThreadWatcher::GotNoResponse() {
360  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
361
362  ++unresponsive_count_;
363  if (!IsVeryUnresponsive())
364    return;
365
366  // Record total unresponsive_time since last pong message.
367  base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_;
368  unresponsive_time_histogram_->AddTime(unresponse_time);
369
370  // We have already collected stats for the non-responding watched thread.
371  if (hung_processing_complete_)
372    return;
373
374  // Record how other threads are responding.
375  uint32 responding_thread_count = 0;
376  uint32 unresponding_thread_count = 0;
377  ThreadWatcherList::GetStatusOfThreads(&responding_thread_count,
378                                        &unresponding_thread_count);
379
380  // Record how many watched threads are responding.
381  responsive_count_histogram_->Add(responding_thread_count);
382
383  // Record how many watched threads are not responding.
384  unresponsive_count_histogram_->Add(unresponding_thread_count);
385
386  // Crash the browser if the watched thread is to be crashed on hang and if the
387  // number of other threads responding is less than or equal to
388  // live_threads_threshold_ and at least one other thread is responding.
389  if (crash_on_hang_ &&
390      responding_thread_count > 0 &&
391      responding_thread_count <= live_threads_threshold_) {
392    static bool crashed_once = false;
393    if (!crashed_once) {
394      crashed_once = true;
395      CrashBecauseThreadWasUnresponsive(thread_id_);
396    }
397  }
398
399  hung_processing_complete_ = true;
400}
401
402bool ThreadWatcher::IsVeryUnresponsive() {
403  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
404  return unresponsive_count_ >= unresponsive_threshold_;
405}
406
407// ThreadWatcherList methods and members.
408//
409// static
410ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL;
411// static
412bool ThreadWatcherList::g_stopped_ = false;
413// static
414const int ThreadWatcherList::kSleepSeconds = 1;
415// static
416const int ThreadWatcherList::kUnresponsiveSeconds = 2;
417// static
418const int ThreadWatcherList::kUnresponsiveCount = 9;
419// static
420const int ThreadWatcherList::kLiveThreadsThreshold = 2;
421// static, non-const for tests.
422int ThreadWatcherList::g_initialize_delay_seconds = 120;
423
424ThreadWatcherList::CrashDataThresholds::CrashDataThresholds(
425    uint32 live_threads_threshold,
426    uint32 unresponsive_threshold)
427    : live_threads_threshold(live_threads_threshold),
428      unresponsive_threshold(unresponsive_threshold) {
429}
430
431ThreadWatcherList::CrashDataThresholds::CrashDataThresholds()
432    : live_threads_threshold(kLiveThreadsThreshold),
433      unresponsive_threshold(kUnresponsiveCount) {
434}
435
436// static
437void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) {
438  // TODO(rtenneti): Enable ThreadWatcher.
439  uint32 unresponsive_threshold;
440  CrashOnHangThreadMap crash_on_hang_threads;
441  ParseCommandLine(command_line,
442                   &unresponsive_threshold,
443                   &crash_on_hang_threads);
444
445  ThreadWatcherObserver::SetupNotifications(
446      base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold));
447
448  WatchDogThread::PostTask(
449      FROM_HERE,
450      base::Bind(&ThreadWatcherList::SetStopped, false));
451
452  WatchDogThread::PostDelayedTask(
453      FROM_HERE,
454      base::Bind(&ThreadWatcherList::InitializeAndStartWatching,
455                 unresponsive_threshold,
456                 crash_on_hang_threads),
457      base::TimeDelta::FromSeconds(g_initialize_delay_seconds));
458}
459
460// static
461void ThreadWatcherList::StopWatchingAll() {
462  // TODO(rtenneti): Enable ThreadWatcher.
463  ThreadWatcherObserver::RemoveNotifications();
464  DeleteAll();
465}
466
467// static
468void ThreadWatcherList::Register(ThreadWatcher* watcher) {
469  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
470  if (!g_thread_watcher_list_)
471    return;
472  DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id()));
473  g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher;
474}
475
476// static
477bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) {
478  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
479  return NULL != ThreadWatcherList::Find(thread_id);
480}
481
482// static
483void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count,
484                                           uint32* unresponding_thread_count) {
485  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
486  *responding_thread_count = 0;
487  *unresponding_thread_count = 0;
488  if (!g_thread_watcher_list_)
489    return;
490
491  for (RegistrationList::iterator it =
492           g_thread_watcher_list_->registered_.begin();
493       g_thread_watcher_list_->registered_.end() != it;
494       ++it) {
495    if (it->second->IsVeryUnresponsive())
496      ++(*unresponding_thread_count);
497    else
498      ++(*responding_thread_count);
499  }
500}
501
502// static
503void ThreadWatcherList::WakeUpAll() {
504  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
505  if (!g_thread_watcher_list_)
506    return;
507
508  for (RegistrationList::iterator it =
509           g_thread_watcher_list_->registered_.begin();
510       g_thread_watcher_list_->registered_.end() != it;
511       ++it)
512    it->second->WakeUp();
513}
514
515ThreadWatcherList::ThreadWatcherList() {
516  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
517  CHECK(!g_thread_watcher_list_);
518  g_thread_watcher_list_ = this;
519}
520
521ThreadWatcherList::~ThreadWatcherList() {
522  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
523  DCHECK(this == g_thread_watcher_list_);
524  g_thread_watcher_list_ = NULL;
525}
526
527// static
528void ThreadWatcherList::ParseCommandLine(
529    const CommandLine& command_line,
530    uint32* unresponsive_threshold,
531    CrashOnHangThreadMap* crash_on_hang_threads) {
532  // Initialize |unresponsive_threshold| to a default value.
533  *unresponsive_threshold = kUnresponsiveCount;
534
535  // Increase the unresponsive_threshold on the Stable and Beta channels to
536  // reduce the number of crashes due to ThreadWatcher.
537  chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
538  if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
539    *unresponsive_threshold *= 4;
540  } else if (channel == chrome::VersionInfo::CHANNEL_BETA) {
541    *unresponsive_threshold *= 2;
542  }
543
544#if defined(OS_WIN)
545  // For Windows XP (old systems), double the unresponsive_threshold to give
546  // the OS a chance to schedule UI/IO threads a time slice to respond with a
547  // pong message (to get around limitations with the OS).
548  if (base::win::GetVersion() <= base::win::VERSION_XP)
549    *unresponsive_threshold *= 2;
550#endif
551
552  uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds;
553  std::string crash_on_hang_thread_names;
554  bool has_command_line_overwrite = false;
555  if (command_line.HasSwitch(switches::kCrashOnHangThreads)) {
556    crash_on_hang_thread_names =
557        command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads);
558    has_command_line_overwrite = true;
559  } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) {
560    // Default to crashing the browser if UI or IO or FILE threads are not
561    // responsive except in stable channel.
562    crash_on_hang_thread_names = base::StringPrintf(
563        "UI:%d:%d,IO:%d:%d,FILE:%d:%d",
564        kLiveThreadsThreshold, crash_seconds,
565        kLiveThreadsThreshold, crash_seconds,
566        kLiveThreadsThreshold, crash_seconds * 5);
567  }
568
569  ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names,
570                                     kLiveThreadsThreshold,
571                                     crash_seconds,
572                                     crash_on_hang_threads);
573
574  if (channel != chrome::VersionInfo::CHANNEL_CANARY ||
575      has_command_line_overwrite) {
576    return;
577  }
578
579  // Set up a field trial for 100% of the users to crash if either UI or IO
580  // thread is not responsive for 30 seconds (or 15 pings).
581  scoped_refptr<base::FieldTrial> field_trial(
582      base::FieldTrialList::FactoryGetFieldTrial(
583          "ThreadWatcher", 100, "default_hung_threads",
584          2014, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL));
585  int hung_thread_group = field_trial->AppendGroup("hung_thread", 100);
586  if (field_trial->group() == hung_thread_group) {
587    for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin();
588         crash_on_hang_threads->end() != it;
589         ++it) {
590      if (it->first == "FILE")
591        continue;
592      it->second.live_threads_threshold = INT_MAX;
593      if (it->first == "UI") {
594        // TODO(rtenneti): set unresponsive threshold to 120 seconds to catch
595        // the worst UI hangs and for fewer crashes due to ThreadWatcher. Reduce
596        // it to a more reasonable time ala IO thread.
597        it->second.unresponsive_threshold = 60;
598      } else {
599        it->second.unresponsive_threshold = 15;
600      }
601    }
602  }
603}
604
605// static
606void ThreadWatcherList::ParseCommandLineCrashOnHangThreads(
607    const std::string& crash_on_hang_thread_names,
608    uint32 default_live_threads_threshold,
609    uint32 default_crash_seconds,
610    CrashOnHangThreadMap* crash_on_hang_threads) {
611  base::StringTokenizer tokens(crash_on_hang_thread_names, ",");
612  std::vector<std::string> values;
613  while (tokens.GetNext()) {
614    const std::string& token = tokens.token();
615    base::SplitString(token, ':', &values);
616    std::string thread_name = values[0];
617
618    uint32 live_threads_threshold = default_live_threads_threshold;
619    uint32 crash_seconds = default_crash_seconds;
620    if (values.size() >= 2 &&
621        (!base::StringToUint(values[1], &live_threads_threshold))) {
622      continue;
623    }
624    if (values.size() >= 3 &&
625        (!base::StringToUint(values[2], &crash_seconds))) {
626      continue;
627    }
628    uint32 unresponsive_threshold = static_cast<uint32>(
629        ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds));
630
631    CrashDataThresholds crash_data(live_threads_threshold,
632                                   unresponsive_threshold);
633    // Use the last specifier.
634    (*crash_on_hang_threads)[thread_name] = crash_data;
635  }
636}
637
638// static
639void ThreadWatcherList::InitializeAndStartWatching(
640    uint32 unresponsive_threshold,
641    const CrashOnHangThreadMap& crash_on_hang_threads) {
642  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
643
644  // This method is deferred in relationship to its StopWatchingAll()
645  // counterpart. If a previous initialization has already happened, or if
646  // stop has been called, there's nothing left to do here.
647  if (g_thread_watcher_list_ || g_stopped_)
648    return;
649
650  ThreadWatcherList* thread_watcher_list = new ThreadWatcherList();
651  CHECK(thread_watcher_list);
652
653  BrowserThread::PostTask(
654      BrowserThread::UI,
655      FROM_HERE,
656      base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb));
657
658  const base::TimeDelta kSleepTime =
659      base::TimeDelta::FromSeconds(kSleepSeconds);
660  const base::TimeDelta kUnresponsiveTime =
661      base::TimeDelta::FromSeconds(kUnresponsiveSeconds);
662
663  StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime,
664                unresponsive_threshold, crash_on_hang_threads);
665  StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime,
666                unresponsive_threshold, crash_on_hang_threads);
667  StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime,
668                unresponsive_threshold, crash_on_hang_threads);
669  StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime,
670                unresponsive_threshold, crash_on_hang_threads);
671  StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime,
672                unresponsive_threshold, crash_on_hang_threads);
673}
674
675// static
676void ThreadWatcherList::StartWatching(
677    const BrowserThread::ID& thread_id,
678    const std::string& thread_name,
679    const base::TimeDelta& sleep_time,
680    const base::TimeDelta& unresponsive_time,
681    uint32 unresponsive_threshold,
682    const CrashOnHangThreadMap& crash_on_hang_threads) {
683  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
684
685  CrashOnHangThreadMap::const_iterator it =
686      crash_on_hang_threads.find(thread_name);
687  bool crash_on_hang = false;
688  uint32 live_threads_threshold = 0;
689  if (it != crash_on_hang_threads.end()) {
690    crash_on_hang = true;
691    live_threads_threshold = it->second.live_threads_threshold;
692    unresponsive_threshold = it->second.unresponsive_threshold;
693  }
694
695  ThreadWatcher::StartWatching(
696      ThreadWatcher::WatchingParams(thread_id,
697                                    thread_name,
698                                    sleep_time,
699                                    unresponsive_time,
700                                    unresponsive_threshold,
701                                    crash_on_hang,
702                                    live_threads_threshold));
703}
704
705// static
706void ThreadWatcherList::DeleteAll() {
707  if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
708    WatchDogThread::PostTask(
709        FROM_HERE,
710        base::Bind(&ThreadWatcherList::DeleteAll));
711    return;
712  }
713
714  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
715
716  SetStopped(true);
717
718  if (!g_thread_watcher_list_)
719    return;
720
721  // Delete all thread watcher objects.
722  while (!g_thread_watcher_list_->registered_.empty()) {
723    RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin();
724    delete it->second;
725    g_thread_watcher_list_->registered_.erase(it);
726  }
727
728  delete g_thread_watcher_list_;
729}
730
731// static
732ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) {
733  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
734  if (!g_thread_watcher_list_)
735    return NULL;
736  RegistrationList::iterator it =
737      g_thread_watcher_list_->registered_.find(thread_id);
738  if (g_thread_watcher_list_->registered_.end() == it)
739    return NULL;
740  return it->second;
741}
742
743// static
744void ThreadWatcherList::SetStopped(bool stopped) {
745  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
746  g_stopped_ = stopped;
747}
748
749// ThreadWatcherObserver methods and members.
750//
751// static
752ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL;
753
754ThreadWatcherObserver::ThreadWatcherObserver(
755    const base::TimeDelta& wakeup_interval)
756    : last_wakeup_time_(base::TimeTicks::Now()),
757      wakeup_interval_(wakeup_interval) {
758  CHECK(!g_thread_watcher_observer_);
759  g_thread_watcher_observer_ = this;
760}
761
762ThreadWatcherObserver::~ThreadWatcherObserver() {
763  DCHECK(this == g_thread_watcher_observer_);
764  g_thread_watcher_observer_ = NULL;
765}
766
767// static
768void ThreadWatcherObserver::SetupNotifications(
769    const base::TimeDelta& wakeup_interval) {
770  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
771  ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval);
772  MetricsService::SetUpNotifications(&observer->registrar_, observer);
773}
774
775// static
776void ThreadWatcherObserver::RemoveNotifications() {
777  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
778  if (!g_thread_watcher_observer_)
779    return;
780  g_thread_watcher_observer_->registrar_.RemoveAll();
781  delete g_thread_watcher_observer_;
782}
783
784void ThreadWatcherObserver::Observe(
785    int type,
786    const content::NotificationSource& source,
787    const content::NotificationDetails& details) {
788  // There is some user activity, see if thread watchers are to be awakened.
789  base::TimeTicks now = base::TimeTicks::Now();
790  if ((now - last_wakeup_time_) < wakeup_interval_)
791    return;
792  last_wakeup_time_ = now;
793  WatchDogThread::PostTask(
794      FROM_HERE,
795      base::Bind(&ThreadWatcherList::WakeUpAll));
796}
797
798// WatchDogThread methods and members.
799
800// This lock protects g_watchdog_thread.
801static base::LazyInstance<base::Lock>::Leaky
802    g_watchdog_lock = LAZY_INSTANCE_INITIALIZER;
803
804// The singleton of this class.
805static WatchDogThread* g_watchdog_thread = NULL;
806
807WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") {
808}
809
810WatchDogThread::~WatchDogThread() {
811  Stop();
812}
813
814// static
815bool WatchDogThread::CurrentlyOnWatchDogThread() {
816  base::AutoLock lock(g_watchdog_lock.Get());
817  return g_watchdog_thread &&
818      g_watchdog_thread->message_loop() == base::MessageLoop::current();
819}
820
821// static
822bool WatchDogThread::PostTask(const tracked_objects::Location& from_here,
823                              const base::Closure& task) {
824  return PostTaskHelper(from_here, task, base::TimeDelta());
825}
826
827// static
828bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here,
829                                     const base::Closure& task,
830                                     base::TimeDelta delay) {
831  return PostTaskHelper(from_here, task, delay);
832}
833
834// static
835bool WatchDogThread::PostTaskHelper(
836    const tracked_objects::Location& from_here,
837    const base::Closure& task,
838    base::TimeDelta delay) {
839  {
840    base::AutoLock lock(g_watchdog_lock.Get());
841
842    base::MessageLoop* message_loop = g_watchdog_thread ?
843        g_watchdog_thread->message_loop() : NULL;
844    if (message_loop) {
845      message_loop->PostDelayedTask(from_here, task, delay);
846      return true;
847    }
848  }
849
850  return false;
851}
852
853void WatchDogThread::Init() {
854  // This thread shouldn't be allowed to perform any blocking disk I/O.
855  base::ThreadRestrictions::SetIOAllowed(false);
856
857  base::AutoLock lock(g_watchdog_lock.Get());
858  CHECK(!g_watchdog_thread);
859  g_watchdog_thread = this;
860}
861
862void WatchDogThread::CleanUp() {
863  base::AutoLock lock(g_watchdog_lock.Get());
864  g_watchdog_thread = NULL;
865}
866
867namespace {
868
869// StartupWatchDogThread methods and members.
870//
871// Class for detecting hangs during startup.
872class StartupWatchDogThread : public base::Watchdog {
873 public:
874  // Constructor specifies how long the StartupWatchDogThread will wait before
875  // alarming.
876  explicit StartupWatchDogThread(const base::TimeDelta& duration)
877      : base::Watchdog(duration, "Startup watchdog thread", true) {
878  }
879
880  // Alarm is called if the time expires after an Arm() without someone calling
881  // Disarm(). When Alarm goes off, in release mode we get the crash dump
882  // without crashing and in debug mode we break into the debugger.
883  virtual void Alarm() OVERRIDE {
884#ifndef NDEBUG
885    DCHECK(false);
886#else
887    base::debug::DumpWithoutCrashing();
888#endif
889  }
890
891  DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread);
892};
893
894// ShutdownWatchDogThread methods and members.
895//
896// Class for detecting hangs during shutdown.
897class ShutdownWatchDogThread : public base::Watchdog {
898 public:
899  // Constructor specifies how long the ShutdownWatchDogThread will wait before
900  // alarming.
901  explicit ShutdownWatchDogThread(const base::TimeDelta& duration)
902      : base::Watchdog(duration, "Shutdown watchdog thread", true) {
903  }
904
905  // Alarm is called if the time expires after an Arm() without someone calling
906  // Disarm(). We crash the browser if this method is called.
907  virtual void Alarm() OVERRIDE {
908    ShutdownCrash();
909  }
910
911  DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread);
912};
913}  // namespace
914
915// StartupTimeBomb methods and members.
916//
917// static
918StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL;
919
920StartupTimeBomb::StartupTimeBomb()
921    : startup_watchdog_(NULL),
922      thread_id_(base::PlatformThread::CurrentId()) {
923  CHECK(!g_startup_timebomb_);
924  g_startup_timebomb_ = this;
925}
926
927StartupTimeBomb::~StartupTimeBomb() {
928  DCHECK(this == g_startup_timebomb_);
929  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
930  if (startup_watchdog_)
931    Disarm();
932  g_startup_timebomb_ = NULL;
933}
934
935void StartupTimeBomb::Arm(const base::TimeDelta& duration) {
936  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
937  DCHECK(!startup_watchdog_);
938  startup_watchdog_ = new StartupWatchDogThread(duration);
939  startup_watchdog_->Arm();
940  return;
941}
942
943void StartupTimeBomb::Disarm() {
944  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
945  if (startup_watchdog_) {
946    startup_watchdog_->Disarm();
947    startup_watchdog_->Cleanup();
948    DeleteStartupWatchdog();
949  }
950}
951
952void StartupTimeBomb::DeleteStartupWatchdog() {
953  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
954  if (startup_watchdog_->IsJoinable()) {
955    // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns
956    // very fast.
957    base::ThreadRestrictions::SetIOAllowed(true);
958    delete startup_watchdog_;
959    startup_watchdog_ = NULL;
960    return;
961  }
962  base::MessageLoop::current()->PostDelayedTask(
963      FROM_HERE,
964      base::Bind(&StartupTimeBomb::DeleteStartupWatchdog,
965                 base::Unretained(this)),
966      base::TimeDelta::FromSeconds(10));
967}
968
969// static
970void StartupTimeBomb::DisarmStartupTimeBomb() {
971  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
972  if (g_startup_timebomb_)
973    g_startup_timebomb_->Disarm();
974}
975
976// ShutdownWatcherHelper methods and members.
977//
978// ShutdownWatcherHelper is a wrapper class for detecting hangs during
979// shutdown.
980ShutdownWatcherHelper::ShutdownWatcherHelper()
981    : shutdown_watchdog_(NULL),
982      thread_id_(base::PlatformThread::CurrentId()) {
983}
984
985ShutdownWatcherHelper::~ShutdownWatcherHelper() {
986  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
987  if (shutdown_watchdog_) {
988    shutdown_watchdog_->Disarm();
989    delete shutdown_watchdog_;
990    shutdown_watchdog_ = NULL;
991  }
992}
993
994void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) {
995  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
996  DCHECK(!shutdown_watchdog_);
997  base::TimeDelta actual_duration = duration;
998
999  chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
1000  if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
1001    actual_duration *= 20;
1002  } else if (channel == chrome::VersionInfo::CHANNEL_BETA ||
1003             channel == chrome::VersionInfo::CHANNEL_DEV) {
1004    actual_duration *= 10;
1005  }
1006
1007#if defined(OS_WIN)
1008  // On Windows XP, give twice the time for shutdown.
1009  if (base::win::GetVersion() <= base::win::VERSION_XP)
1010    actual_duration *= 2;
1011#endif
1012
1013  shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration);
1014  shutdown_watchdog_->Arm();
1015}
1016