thread_watcher.cc revision c2e0dbddbe15c98d52c4786dac06cb8952a8ae6d
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/metrics/thread_watcher.h"
6
7#include <math.h>  // ceil
8
9#include "base/bind.h"
10#include "base/compiler_specific.h"
11#include "base/debug/alias.h"
12#include "base/lazy_instance.h"
13#include "base/stringprintf.h"
14#include "base/strings/string_number_conversions.h"
15#include "base/strings/string_split.h"
16#include "base/strings/string_tokenizer.h"
17#include "base/threading/thread_restrictions.h"
18#include "build/build_config.h"
19#include "chrome/browser/metrics/metrics_service.h"
20#include "chrome/common/chrome_switches.h"
21#include "chrome/common/chrome_version_info.h"
22#include "chrome/common/dump_without_crashing.h"
23#include "chrome/common/logging_chrome.h"
24
25#if defined(OS_WIN)
26#include "base/win/windows_version.h"
27#endif
28
29using content::BrowserThread;
30
31namespace {
32
33// The following are unique function names for forcing the crash when a thread
34// is unresponsive. This makes it possible to tell from the callstack alone what
35// thread was unresponsive.
36//
37// We disable optimizations for this block of functions so the compiler doesn't
38// merge them all together.
39MSVC_DISABLE_OPTIMIZE()
40MSVC_PUSH_DISABLE_WARNING(4748)
41
42int* NullPointer() {
43  return reinterpret_cast<int*>(NULL);
44}
45
46void NullPointerCrash(int line_number) {
47#ifndef NDEBUG
48  *NullPointer() = line_number;  // Crash.
49#else
50  logging::DumpWithoutCrashing();
51#endif
52}
53
54NOINLINE void ShutdownCrash() {
55  NullPointerCrash(__LINE__);
56}
57
58NOINLINE void ThreadUnresponsive_UI() {
59  NullPointerCrash(__LINE__);
60}
61
62NOINLINE void ThreadUnresponsive_DB() {
63  NullPointerCrash(__LINE__);
64}
65
66NOINLINE void ThreadUnresponsive_WEBKIT() {
67  NullPointerCrash(__LINE__);
68}
69
70NOINLINE void ThreadUnresponsive_FILE() {
71  NullPointerCrash(__LINE__);
72}
73
74NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() {
75  NullPointerCrash(__LINE__);
76}
77
78NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() {
79  NullPointerCrash(__LINE__);
80}
81
82NOINLINE void ThreadUnresponsive_CACHE() {
83  NullPointerCrash(__LINE__);
84}
85
86NOINLINE void ThreadUnresponsive_IO() {
87  NullPointerCrash(__LINE__);
88}
89
90MSVC_POP_WARNING()
91MSVC_ENABLE_OPTIMIZE();
92
93void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) {
94  base::debug::Alias(&thread_id);
95
96  switch (thread_id) {
97    case BrowserThread::UI:
98      return ThreadUnresponsive_UI();
99    case BrowserThread::DB:
100      return ThreadUnresponsive_DB();
101    case BrowserThread::WEBKIT_DEPRECATED:
102      return ThreadUnresponsive_WEBKIT();
103    case BrowserThread::FILE:
104      return ThreadUnresponsive_FILE();
105    case BrowserThread::FILE_USER_BLOCKING:
106      return ThreadUnresponsive_FILE_USER_BLOCKING();
107    case BrowserThread::PROCESS_LAUNCHER:
108      return ThreadUnresponsive_PROCESS_LAUNCHER();
109    case BrowserThread::CACHE:
110      return ThreadUnresponsive_CACHE();
111    case BrowserThread::IO:
112      return ThreadUnresponsive_IO();
113    case BrowserThread::ID_COUNT:
114      CHECK(false);  // This shouldn't actually be reached!
115      break;
116
117    // Omission of the default hander is intentional -- that way the compiler
118    // should warn if our switch becomes outdated.
119  }
120
121  CHECK(false) << "Unknown thread was unresponsive.";  // Shouldn't be reached.
122}
123
124}  // namespace
125
126// ThreadWatcher methods and members.
127ThreadWatcher::ThreadWatcher(const WatchingParams& params)
128    : thread_id_(params.thread_id),
129      thread_name_(params.thread_name),
130      watched_loop_(
131          BrowserThread::GetMessageLoopProxyForThread(params.thread_id)),
132      sleep_time_(params.sleep_time),
133      unresponsive_time_(params.unresponsive_time),
134      ping_time_(base::TimeTicks::Now()),
135      pong_time_(ping_time_),
136      ping_sequence_number_(0),
137      active_(false),
138      ping_count_(params.unresponsive_threshold),
139      response_time_histogram_(NULL),
140      unresponsive_time_histogram_(NULL),
141      unresponsive_count_(0),
142      hung_processing_complete_(false),
143      unresponsive_threshold_(params.unresponsive_threshold),
144      crash_on_hang_(params.crash_on_hang),
145      live_threads_threshold_(params.live_threads_threshold),
146      weak_ptr_factory_(this) {
147  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
148  Initialize();
149}
150
151ThreadWatcher::~ThreadWatcher() {}
152
153// static
154void ThreadWatcher::StartWatching(const WatchingParams& params) {
155  DCHECK_GE(params.sleep_time.InMilliseconds(), 0);
156  DCHECK_GE(params.unresponsive_time.InMilliseconds(),
157            params.sleep_time.InMilliseconds());
158
159  // If we are not on WatchDogThread, then post a task to call StartWatching on
160  // WatchDogThread.
161  if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
162    WatchDogThread::PostTask(
163        FROM_HERE,
164        base::Bind(&ThreadWatcher::StartWatching, params));
165    return;
166  }
167
168  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
169
170  // Create a new thread watcher object for the given thread and activate it.
171  ThreadWatcher* watcher = new ThreadWatcher(params);
172
173  DCHECK(watcher);
174  // If we couldn't register the thread watcher object, we are shutting down,
175  // then don't activate thread watching.
176  if (!ThreadWatcherList::IsRegistered(params.thread_id))
177    return;
178  watcher->ActivateThreadWatching();
179}
180
181void ThreadWatcher::ActivateThreadWatching() {
182  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
183  if (active_) return;
184  active_ = true;
185  ping_count_ = unresponsive_threshold_;
186  ResetHangCounters();
187  MessageLoop::current()->PostTask(
188      FROM_HERE,
189      base::Bind(&ThreadWatcher::PostPingMessage,
190                 weak_ptr_factory_.GetWeakPtr()));
191}
192
193void ThreadWatcher::DeActivateThreadWatching() {
194  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
195  active_ = false;
196  ping_count_ = 0;
197  weak_ptr_factory_.InvalidateWeakPtrs();
198}
199
200void ThreadWatcher::WakeUp() {
201  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
202  // There is some user activity, PostPingMessage task of thread watcher if
203  // needed.
204  if (!active_) return;
205
206  // Throw away the previous |unresponsive_count_| and start over again. Just
207  // before going to sleep, |unresponsive_count_| could be very close to
208  // |unresponsive_threshold_| and when user becomes active,
209  // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no
210  // response for ping messages. Reset |unresponsive_count_| to start measuring
211  // the unresponsiveness of the threads when system becomes active.
212  unresponsive_count_ = 0;
213
214  if (ping_count_ <= 0) {
215    ping_count_ = unresponsive_threshold_;
216    ResetHangCounters();
217    PostPingMessage();
218  } else {
219    ping_count_ = unresponsive_threshold_;
220  }
221}
222
223void ThreadWatcher::PostPingMessage() {
224  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
225  // If we have stopped watching or if the user is idle, then stop sending
226  // ping messages.
227  if (!active_ || ping_count_ <= 0)
228    return;
229
230  // Save the current time when we have sent ping message.
231  ping_time_ = base::TimeTicks::Now();
232
233  // Send a ping message to the watched thread. Callback will be called on
234  // the WatchDogThread.
235  base::Closure callback(
236      base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(),
237                 ping_sequence_number_));
238  if (watched_loop_->PostTask(
239          FROM_HERE,
240          base::Bind(&ThreadWatcher::OnPingMessage, thread_id_,
241                     callback))) {
242      // Post a task to check the responsiveness of watched thread.
243      MessageLoop::current()->PostDelayedTask(
244          FROM_HERE,
245          base::Bind(&ThreadWatcher::OnCheckResponsiveness,
246                     weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
247          unresponsive_time_);
248  } else {
249    // Watched thread might have gone away, stop watching it.
250    DeActivateThreadWatching();
251  }
252}
253
254void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) {
255  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
256
257  // Record watched thread's response time.
258  base::TimeTicks now = base::TimeTicks::Now();
259  base::TimeDelta response_time = now - ping_time_;
260  response_time_histogram_->AddTime(response_time);
261
262  // Save the current time when we have got pong message.
263  pong_time_ = now;
264
265  // Check if there are any extra pings in flight.
266  DCHECK_EQ(ping_sequence_number_, ping_sequence_number);
267  if (ping_sequence_number_ != ping_sequence_number)
268    return;
269
270  // Increment sequence number for the next ping message to indicate watched
271  // thread is responsive.
272  ++ping_sequence_number_;
273
274  // If we have stopped watching or if the user is idle, then stop sending
275  // ping messages.
276  if (!active_ || --ping_count_ <= 0)
277    return;
278
279  MessageLoop::current()->PostDelayedTask(
280      FROM_HERE,
281      base::Bind(&ThreadWatcher::PostPingMessage,
282                 weak_ptr_factory_.GetWeakPtr()),
283      sleep_time_);
284}
285
286void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) {
287  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
288  // If we have stopped watching then consider thread as responding.
289  if (!active_) {
290    responsive_ = true;
291    return;
292  }
293  // If the latest ping_sequence_number_ is not same as the ping_sequence_number
294  // that is passed in, then we can assume OnPongMessage was called.
295  // OnPongMessage increments ping_sequence_number_.
296  if (ping_sequence_number_ != ping_sequence_number) {
297    // Reset unresponsive_count_ to zero because we got a response from the
298    // watched thread.
299    ResetHangCounters();
300
301    responsive_ = true;
302    return;
303  }
304  // Record that we got no response from watched thread.
305  GotNoResponse();
306
307  // Post a task to check the responsiveness of watched thread.
308  MessageLoop::current()->PostDelayedTask(
309      FROM_HERE,
310      base::Bind(&ThreadWatcher::OnCheckResponsiveness,
311                 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
312      unresponsive_time_);
313  responsive_ = false;
314}
315
316void ThreadWatcher::Initialize() {
317  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
318  ThreadWatcherList::Register(this);
319
320  const std::string response_time_histogram_name =
321      "ThreadWatcher.ResponseTime." + thread_name_;
322  response_time_histogram_ = base::Histogram::FactoryTimeGet(
323      response_time_histogram_name,
324      base::TimeDelta::FromMilliseconds(1),
325      base::TimeDelta::FromSeconds(100), 50,
326      base::Histogram::kUmaTargetedHistogramFlag);
327
328  const std::string unresponsive_time_histogram_name =
329      "ThreadWatcher.Unresponsive." + thread_name_;
330  unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet(
331      unresponsive_time_histogram_name,
332      base::TimeDelta::FromMilliseconds(1),
333      base::TimeDelta::FromSeconds(100), 50,
334      base::Histogram::kUmaTargetedHistogramFlag);
335
336  const std::string responsive_count_histogram_name =
337      "ThreadWatcher.ResponsiveThreads." + thread_name_;
338  responsive_count_histogram_ = base::LinearHistogram::FactoryGet(
339      responsive_count_histogram_name, 1, 10, 11,
340      base::Histogram::kUmaTargetedHistogramFlag);
341
342  const std::string unresponsive_count_histogram_name =
343      "ThreadWatcher.UnresponsiveThreads." + thread_name_;
344  unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet(
345      unresponsive_count_histogram_name, 1, 10, 11,
346      base::Histogram::kUmaTargetedHistogramFlag);
347}
348
349// static
350void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id,
351                                  const base::Closure& callback_task) {
352  // This method is called on watched thread.
353  DCHECK(BrowserThread::CurrentlyOn(thread_id));
354  WatchDogThread::PostTask(FROM_HERE, callback_task);
355}
356
357void ThreadWatcher::ResetHangCounters() {
358  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
359  unresponsive_count_ = 0;
360  hung_processing_complete_ = false;
361}
362
363void ThreadWatcher::GotNoResponse() {
364  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
365
366  ++unresponsive_count_;
367  if (!IsVeryUnresponsive())
368    return;
369
370  // Record total unresponsive_time since last pong message.
371  base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_;
372  unresponsive_time_histogram_->AddTime(unresponse_time);
373
374  // We have already collected stats for the non-responding watched thread.
375  if (hung_processing_complete_)
376    return;
377
378  // Record how other threads are responding.
379  uint32 responding_thread_count = 0;
380  uint32 unresponding_thread_count = 0;
381  ThreadWatcherList::GetStatusOfThreads(&responding_thread_count,
382                                        &unresponding_thread_count);
383
384  // Record how many watched threads are responding.
385  responsive_count_histogram_->Add(responding_thread_count);
386
387  // Record how many watched threads are not responding.
388  unresponsive_count_histogram_->Add(unresponding_thread_count);
389
390  // Crash the browser if the watched thread is to be crashed on hang and if the
391  // number of other threads responding is less than or equal to
392  // live_threads_threshold_ and at least one other thread is responding.
393  if (crash_on_hang_ &&
394      responding_thread_count > 0 &&
395      responding_thread_count <= live_threads_threshold_) {
396    static bool crashed_once = false;
397    if (!crashed_once) {
398      crashed_once = true;
399      CrashBecauseThreadWasUnresponsive(thread_id_);
400    }
401  }
402
403  hung_processing_complete_ = true;
404}
405
406bool ThreadWatcher::IsVeryUnresponsive() {
407  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
408  return unresponsive_count_ >= unresponsive_threshold_;
409}
410
411// ThreadWatcherList methods and members.
412//
413// static
414ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL;
415// static
416const int ThreadWatcherList::kSleepSeconds = 1;
417// static
418const int ThreadWatcherList::kUnresponsiveSeconds = 2;
419// static
420const int ThreadWatcherList::kUnresponsiveCount = 9;
421// static
422const int ThreadWatcherList::kLiveThreadsThreshold = 2;
423
424ThreadWatcherList::CrashDataThresholds::CrashDataThresholds(
425    uint32 live_threads_threshold,
426    uint32 unresponsive_threshold)
427    : live_threads_threshold(live_threads_threshold),
428      unresponsive_threshold(unresponsive_threshold) {
429}
430
431ThreadWatcherList::CrashDataThresholds::CrashDataThresholds()
432    : live_threads_threshold(kLiveThreadsThreshold),
433      unresponsive_threshold(kUnresponsiveCount) {
434}
435
436// static
437void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) {
438  uint32 unresponsive_threshold;
439  CrashOnHangThreadMap crash_on_hang_threads;
440  ParseCommandLine(command_line,
441                   &unresponsive_threshold,
442                   &crash_on_hang_threads);
443
444  ThreadWatcherObserver::SetupNotifications(
445      base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold));
446
447  WatchDogThread::PostDelayedTask(
448      FROM_HERE,
449      base::Bind(&ThreadWatcherList::InitializeAndStartWatching,
450                 unresponsive_threshold,
451                 crash_on_hang_threads),
452      base::TimeDelta::FromSeconds(120));
453}
454
455// static
456void ThreadWatcherList::StopWatchingAll() {
457  ThreadWatcherObserver::RemoveNotifications();
458  DeleteAll();
459}
460
461// static
462void ThreadWatcherList::Register(ThreadWatcher* watcher) {
463  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
464  if (!g_thread_watcher_list_)
465    return;
466  DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id()));
467  g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher;
468}
469
470// static
471bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) {
472  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
473  return NULL != ThreadWatcherList::Find(thread_id);
474}
475
476// static
477void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count,
478                                           uint32* unresponding_thread_count) {
479  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
480  *responding_thread_count = 0;
481  *unresponding_thread_count = 0;
482  if (!g_thread_watcher_list_)
483    return;
484
485  for (RegistrationList::iterator it =
486           g_thread_watcher_list_->registered_.begin();
487       g_thread_watcher_list_->registered_.end() != it;
488       ++it) {
489    if (it->second->IsVeryUnresponsive())
490      ++(*unresponding_thread_count);
491    else
492      ++(*responding_thread_count);
493  }
494}
495
496// static
497void ThreadWatcherList::WakeUpAll() {
498  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
499  if (!g_thread_watcher_list_)
500    return;
501
502  for (RegistrationList::iterator it =
503           g_thread_watcher_list_->registered_.begin();
504       g_thread_watcher_list_->registered_.end() != it;
505       ++it)
506    it->second->WakeUp();
507}
508
509ThreadWatcherList::ThreadWatcherList() {
510  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
511  CHECK(!g_thread_watcher_list_);
512  g_thread_watcher_list_ = this;
513}
514
515ThreadWatcherList::~ThreadWatcherList() {
516  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
517  DCHECK(this == g_thread_watcher_list_);
518  g_thread_watcher_list_ = NULL;
519}
520
521// static
522void ThreadWatcherList::ParseCommandLine(
523    const CommandLine& command_line,
524    uint32* unresponsive_threshold,
525    CrashOnHangThreadMap* crash_on_hang_threads) {
526  // Initialize |unresponsive_threshold| to a default value.
527  *unresponsive_threshold = kUnresponsiveCount;
528
529  // Increase the unresponsive_threshold on the Stable and Beta channels to
530  // reduce the number of crashes due to ThreadWatcher.
531  chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
532  if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
533    *unresponsive_threshold *= 4;
534  } else if (channel == chrome::VersionInfo::CHANNEL_BETA) {
535    *unresponsive_threshold *= 2;
536  }
537
538#if defined(OS_WIN)
539  // For Windows XP (old systems), double the unresponsive_threshold to give
540  // the OS a chance to schedule UI/IO threads a time slice to respond with a
541  // pong message (to get around limitations with the OS).
542  if (base::win::GetVersion() <= base::win::VERSION_XP)
543    *unresponsive_threshold *= 2;
544#endif
545
546  uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds;
547  std::string crash_on_hang_thread_names;
548  bool has_command_line_overwrite = false;
549  if (command_line.HasSwitch(switches::kCrashOnHangThreads)) {
550    crash_on_hang_thread_names =
551        command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads);
552    has_command_line_overwrite = true;
553  } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) {
554    // Default to crashing the browser if UI or IO or FILE threads are not
555    // responsive except in stable channel.
556    crash_on_hang_thread_names = base::StringPrintf(
557        "UI:%d:%d,IO:%d:%d,FILE:%d:%d",
558        kLiveThreadsThreshold, crash_seconds,
559        kLiveThreadsThreshold, crash_seconds,
560        kLiveThreadsThreshold, crash_seconds * 5);
561  }
562
563  ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names,
564                                     kLiveThreadsThreshold,
565                                     crash_seconds,
566                                     crash_on_hang_threads);
567
568  if (channel != chrome::VersionInfo::CHANNEL_CANARY ||
569      has_command_line_overwrite) {
570    return;
571  }
572
573  // Set up a field trial for 100% of the users to crash if either UI or IO
574  // thread is not responsive for 30 seconds (or 15 pings).
575  scoped_refptr<base::FieldTrial> field_trial(
576      base::FieldTrialList::FactoryGetFieldTrial(
577          "ThreadWatcher", 100, "default_hung_threads",
578          2013, 10, 30, NULL));
579  int hung_thread_group = field_trial->AppendGroup("hung_thread", 100);
580  if (field_trial->group() == hung_thread_group) {
581    for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin();
582         crash_on_hang_threads->end() != it;
583         ++it) {
584      if (it->first != "IO")
585        continue;
586      it->second.live_threads_threshold = INT_MAX;
587      it->second.unresponsive_threshold = 15;
588    }
589  }
590}
591
592// static
593void ThreadWatcherList::ParseCommandLineCrashOnHangThreads(
594    const std::string& crash_on_hang_thread_names,
595    uint32 default_live_threads_threshold,
596    uint32 default_crash_seconds,
597    CrashOnHangThreadMap* crash_on_hang_threads) {
598  base::StringTokenizer tokens(crash_on_hang_thread_names, ",");
599  std::vector<std::string> values;
600  while (tokens.GetNext()) {
601    const std::string& token = tokens.token();
602    base::SplitString(token, ':', &values);
603    std::string thread_name = values[0];
604
605    uint32 live_threads_threshold = default_live_threads_threshold;
606    uint32 crash_seconds = default_crash_seconds;
607    if (values.size() >= 2 &&
608        (!base::StringToUint(values[1], &live_threads_threshold))) {
609      continue;
610    }
611    if (values.size() >= 3 &&
612        (!base::StringToUint(values[2], &crash_seconds))) {
613      continue;
614    }
615    uint32 unresponsive_threshold = static_cast<uint32>(
616        ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds));
617
618    CrashDataThresholds crash_data(live_threads_threshold,
619                                   unresponsive_threshold);
620    // Use the last specifier.
621    (*crash_on_hang_threads)[thread_name] = crash_data;
622  }
623}
624
625// static
626void ThreadWatcherList::InitializeAndStartWatching(
627    uint32 unresponsive_threshold,
628    const CrashOnHangThreadMap& crash_on_hang_threads) {
629  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
630
631  ThreadWatcherList* thread_watcher_list = new ThreadWatcherList();
632  CHECK(thread_watcher_list);
633
634  BrowserThread::PostTask(
635      BrowserThread::UI,
636      FROM_HERE,
637      base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb));
638
639  const base::TimeDelta kSleepTime =
640      base::TimeDelta::FromSeconds(kSleepSeconds);
641  const base::TimeDelta kUnresponsiveTime =
642      base::TimeDelta::FromSeconds(kUnresponsiveSeconds);
643
644  StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime,
645                unresponsive_threshold, crash_on_hang_threads);
646  StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime,
647                unresponsive_threshold, crash_on_hang_threads);
648  StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime,
649                unresponsive_threshold, crash_on_hang_threads);
650  StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime,
651                unresponsive_threshold, crash_on_hang_threads);
652  StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime,
653                unresponsive_threshold, crash_on_hang_threads);
654}
655
656// static
657void ThreadWatcherList::StartWatching(
658    const BrowserThread::ID& thread_id,
659    const std::string& thread_name,
660    const base::TimeDelta& sleep_time,
661    const base::TimeDelta& unresponsive_time,
662    uint32 unresponsive_threshold,
663    const CrashOnHangThreadMap& crash_on_hang_threads) {
664  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
665
666  CrashOnHangThreadMap::const_iterator it =
667      crash_on_hang_threads.find(thread_name);
668  bool crash_on_hang = false;
669  uint32 live_threads_threshold = 0;
670  if (it != crash_on_hang_threads.end()) {
671    crash_on_hang = true;
672    live_threads_threshold = it->second.live_threads_threshold;
673    unresponsive_threshold = it->second.unresponsive_threshold;
674  }
675
676  ThreadWatcher::StartWatching(
677      ThreadWatcher::WatchingParams(thread_id,
678                                    thread_name,
679                                    sleep_time,
680                                    unresponsive_time,
681                                    unresponsive_threshold,
682                                    crash_on_hang,
683                                    live_threads_threshold));
684}
685
686// static
687void ThreadWatcherList::DeleteAll() {
688  if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
689    WatchDogThread::PostTask(
690        FROM_HERE,
691        base::Bind(&ThreadWatcherList::DeleteAll));
692    return;
693  }
694
695  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
696  if (!g_thread_watcher_list_)
697    return;
698
699  // Delete all thread watcher objects.
700  while (!g_thread_watcher_list_->registered_.empty()) {
701    RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin();
702    delete it->second;
703    g_thread_watcher_list_->registered_.erase(it);
704  }
705
706  delete g_thread_watcher_list_;
707}
708
709// static
710ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) {
711  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
712  if (!g_thread_watcher_list_)
713    return NULL;
714  RegistrationList::iterator it =
715      g_thread_watcher_list_->registered_.find(thread_id);
716  if (g_thread_watcher_list_->registered_.end() == it)
717    return NULL;
718  return it->second;
719}
720
721// ThreadWatcherObserver methods and members.
722//
723// static
724ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL;
725
726ThreadWatcherObserver::ThreadWatcherObserver(
727    const base::TimeDelta& wakeup_interval)
728    : last_wakeup_time_(base::TimeTicks::Now()),
729      wakeup_interval_(wakeup_interval) {
730  CHECK(!g_thread_watcher_observer_);
731  g_thread_watcher_observer_ = this;
732}
733
734ThreadWatcherObserver::~ThreadWatcherObserver() {
735  DCHECK(this == g_thread_watcher_observer_);
736  g_thread_watcher_observer_ = NULL;
737}
738
739// static
740void ThreadWatcherObserver::SetupNotifications(
741    const base::TimeDelta& wakeup_interval) {
742  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
743  ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval);
744  MetricsService::SetUpNotifications(&observer->registrar_, observer);
745}
746
747// static
748void ThreadWatcherObserver::RemoveNotifications() {
749  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
750  if (!g_thread_watcher_observer_)
751    return;
752  g_thread_watcher_observer_->registrar_.RemoveAll();
753  delete g_thread_watcher_observer_;
754}
755
756void ThreadWatcherObserver::Observe(
757    int type,
758    const content::NotificationSource& source,
759    const content::NotificationDetails& details) {
760  // There is some user activity, see if thread watchers are to be awakened.
761  base::TimeTicks now = base::TimeTicks::Now();
762  if ((now - last_wakeup_time_) < wakeup_interval_)
763    return;
764  last_wakeup_time_ = now;
765  WatchDogThread::PostTask(
766      FROM_HERE,
767      base::Bind(&ThreadWatcherList::WakeUpAll));
768}
769
770// WatchDogThread methods and members.
771
772// This lock protects g_watchdog_thread.
773static base::LazyInstance<base::Lock>::Leaky
774    g_watchdog_lock = LAZY_INSTANCE_INITIALIZER;
775
776// The singleton of this class.
777static WatchDogThread* g_watchdog_thread = NULL;
778
779WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") {
780}
781
782WatchDogThread::~WatchDogThread() {
783  Stop();
784}
785
786// static
787bool WatchDogThread::CurrentlyOnWatchDogThread() {
788  base::AutoLock lock(g_watchdog_lock.Get());
789  return g_watchdog_thread &&
790    g_watchdog_thread->message_loop() == MessageLoop::current();
791}
792
793// static
794bool WatchDogThread::PostTask(const tracked_objects::Location& from_here,
795                              const base::Closure& task) {
796  return PostTaskHelper(from_here, task, base::TimeDelta());
797}
798
799// static
800bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here,
801                                     const base::Closure& task,
802                                     base::TimeDelta delay) {
803  return PostTaskHelper(from_here, task, delay);
804}
805
806// static
807bool WatchDogThread::PostTaskHelper(
808    const tracked_objects::Location& from_here,
809    const base::Closure& task,
810    base::TimeDelta delay) {
811  {
812    base::AutoLock lock(g_watchdog_lock.Get());
813
814    MessageLoop* message_loop = g_watchdog_thread ?
815        g_watchdog_thread->message_loop() : NULL;
816    if (message_loop) {
817      message_loop->PostDelayedTask(from_here, task, delay);
818      return true;
819    }
820  }
821
822  return false;
823}
824
825void WatchDogThread::Init() {
826  // This thread shouldn't be allowed to perform any blocking disk I/O.
827  base::ThreadRestrictions::SetIOAllowed(false);
828
829  base::AutoLock lock(g_watchdog_lock.Get());
830  CHECK(!g_watchdog_thread);
831  g_watchdog_thread = this;
832}
833
834void WatchDogThread::CleanUp() {
835  base::AutoLock lock(g_watchdog_lock.Get());
836  g_watchdog_thread = NULL;
837}
838
839namespace {
840
841// StartupWatchDogThread methods and members.
842//
843// Class for detecting hangs during startup.
844class StartupWatchDogThread : public base::Watchdog {
845 public:
846  // Constructor specifies how long the StartupWatchDogThread will wait before
847  // alarming.
848  explicit StartupWatchDogThread(const base::TimeDelta& duration)
849      : base::Watchdog(duration, "Startup watchdog thread", true) {
850  }
851
852  // Alarm is called if the time expires after an Arm() without someone calling
853  // Disarm(). When Alarm goes off, in release mode we get the crash dump
854  // without crashing and in debug mode we break into the debugger.
855  virtual void Alarm() OVERRIDE {
856#ifndef NDEBUG
857    DCHECK(false);
858#else
859    logging::DumpWithoutCrashing();
860#endif
861  }
862
863  DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread);
864};
865
866// ShutdownWatchDogThread methods and members.
867//
868// Class for detecting hangs during shutdown.
869class ShutdownWatchDogThread : public base::Watchdog {
870 public:
871  // Constructor specifies how long the ShutdownWatchDogThread will wait before
872  // alarming.
873  explicit ShutdownWatchDogThread(const base::TimeDelta& duration)
874      : base::Watchdog(duration, "Shutdown watchdog thread", true) {
875  }
876
877  // Alarm is called if the time expires after an Arm() without someone calling
878  // Disarm(). We crash the browser if this method is called.
879  virtual void Alarm() OVERRIDE {
880    ShutdownCrash();
881  }
882
883  DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread);
884};
885}  // namespace
886
887// StartupTimeBomb methods and members.
888//
889// static
890StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL;
891
892StartupTimeBomb::StartupTimeBomb()
893    : startup_watchdog_(NULL),
894      thread_id_(base::PlatformThread::CurrentId()) {
895  CHECK(!g_startup_timebomb_);
896  g_startup_timebomb_ = this;
897}
898
899StartupTimeBomb::~StartupTimeBomb() {
900  DCHECK(this == g_startup_timebomb_);
901  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
902  if (startup_watchdog_)
903    Disarm();
904  g_startup_timebomb_ = NULL;
905}
906
907void StartupTimeBomb::Arm(const base::TimeDelta& duration) {
908  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
909  DCHECK(!startup_watchdog_);
910  // TODO(rtenneti): http://crbug.com/112970. Don't arm the startup timebomb
911  // until we fix breakpad code not to crash in logging::DumpWithoutCrashing().
912  // startup_watchdog_ = new StartupWatchDogThread(duration);
913  // startup_watchdog_->Arm();
914  return;
915}
916
917void StartupTimeBomb::Disarm() {
918  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
919  if (startup_watchdog_) {
920    startup_watchdog_->Disarm();
921    startup_watchdog_->Cleanup();
922    DeleteStartupWatchdog();
923  }
924}
925
926void StartupTimeBomb::DeleteStartupWatchdog() {
927  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
928  if (startup_watchdog_->IsJoinable()) {
929    // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns
930    // very fast.
931    base::ThreadRestrictions::SetIOAllowed(true);
932    delete startup_watchdog_;
933    startup_watchdog_ = NULL;
934    return;
935  }
936  MessageLoop::current()->PostDelayedTask(
937      FROM_HERE,
938      base::Bind(&StartupTimeBomb::DeleteStartupWatchdog,
939                 base::Unretained(this)),
940      base::TimeDelta::FromSeconds(10));
941}
942
943// static
944void StartupTimeBomb::DisarmStartupTimeBomb() {
945  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
946  if (g_startup_timebomb_)
947    g_startup_timebomb_->Disarm();
948}
949
950// ShutdownWatcherHelper methods and members.
951//
952// ShutdownWatcherHelper is a wrapper class for detecting hangs during
953// shutdown.
954ShutdownWatcherHelper::ShutdownWatcherHelper()
955    : shutdown_watchdog_(NULL),
956      thread_id_(base::PlatformThread::CurrentId()) {
957}
958
959ShutdownWatcherHelper::~ShutdownWatcherHelper() {
960  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
961  if (shutdown_watchdog_) {
962    shutdown_watchdog_->Disarm();
963    delete shutdown_watchdog_;
964    shutdown_watchdog_ = NULL;
965  }
966}
967
968void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) {
969  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
970  DCHECK(!shutdown_watchdog_);
971  base::TimeDelta actual_duration = duration;
972
973  chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
974  if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
975    actual_duration *= 20;
976  } else if (channel == chrome::VersionInfo::CHANNEL_BETA ||
977             channel == chrome::VersionInfo::CHANNEL_DEV) {
978    actual_duration *= 10;
979  }
980
981#if defined(OS_WIN)
982  // On Windows XP, give twice the time for shutdown.
983  if (base::win::GetVersion() <= base::win::VERSION_XP)
984    actual_duration *= 2;
985#endif
986
987  shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration);
988  shutdown_watchdog_->Arm();
989}
990