thread_watcher.cc revision 4e180b6a0b4720a9b8e9e959a882386f690f08ff
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/metrics/thread_watcher.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <math.h>  // ceil
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/bind.h"
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/compiler_specific.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/debug/alias.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/lazy_instance.h"
132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/strings/string_number_conversions.h"
142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/strings/string_split.h"
152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/strings/string_tokenizer.h"
16868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/stringprintf.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/threading/thread_restrictions.h"
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "build/build_config.h"
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/metrics/metrics_service.h"
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/chrome_switches.h"
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/chrome_version_info.h"
222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/common/dump_without_crashing.h"
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/logging_chrome.h"
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(OS_WIN)
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/win/windows_version.h"
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using content::BrowserThread;
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The following are unique function names for forcing the crash when a thread
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// is unresponsive. This makes it possible to tell from the callstack alone what
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// thread was unresponsive.
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We disable optimizations for this block of functions so the compiler doesn't
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// merge them all together.
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MSVC_DISABLE_OPTIMIZE()
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MSVC_PUSH_DISABLE_WARNING(4748)
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int* NullPointer() {
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return reinterpret_cast<int*>(NULL);
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void NullPointerCrash(int line_number) {
472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#ifndef NDEBUG
482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  *NullPointer() = line_number;  // Crash.
492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#else
502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  logging::DumpWithoutCrashing();
512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#endif
522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)NOINLINE void ShutdownCrash() {
55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  NullPointerCrash(__LINE__);
56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NOINLINE void ThreadUnresponsive_UI() {
592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  NullPointerCrash(__LINE__);
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NOINLINE void ThreadUnresponsive_DB() {
632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  NullPointerCrash(__LINE__);
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NOINLINE void ThreadUnresponsive_FILE() {
672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  NullPointerCrash(__LINE__);
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() {
712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  NullPointerCrash(__LINE__);
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() {
752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  NullPointerCrash(__LINE__);
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NOINLINE void ThreadUnresponsive_CACHE() {
792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  NullPointerCrash(__LINE__);
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NOINLINE void ThreadUnresponsive_IO() {
832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  NullPointerCrash(__LINE__);
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MSVC_POP_WARNING()
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MSVC_ENABLE_OPTIMIZE();
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) {
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::debug::Alias(&thread_id);
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (thread_id) {
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case BrowserThread::UI:
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return ThreadUnresponsive_UI();
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case BrowserThread::DB:
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return ThreadUnresponsive_DB();
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case BrowserThread::FILE:
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return ThreadUnresponsive_FILE();
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case BrowserThread::FILE_USER_BLOCKING:
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return ThreadUnresponsive_FILE_USER_BLOCKING();
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case BrowserThread::PROCESS_LAUNCHER:
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return ThreadUnresponsive_PROCESS_LAUNCHER();
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case BrowserThread::CACHE:
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return ThreadUnresponsive_CACHE();
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case BrowserThread::IO:
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return ThreadUnresponsive_IO();
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case BrowserThread::ID_COUNT:
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CHECK(false);  // This shouldn't actually be reached!
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Omission of the default hander is intentional -- that way the compiler
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // should warn if our switch becomes outdated.
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK(false) << "Unknown thread was unresponsive.";  // Shouldn't be reached.
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ThreadWatcher methods and members.
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ThreadWatcher::ThreadWatcher(const WatchingParams& params)
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : thread_id_(params.thread_id),
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      thread_name_(params.thread_name),
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      watched_loop_(
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          BrowserThread::GetMessageLoopProxyForThread(params.thread_id)),
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      sleep_time_(params.sleep_time),
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      unresponsive_time_(params.unresponsive_time),
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ping_time_(base::TimeTicks::Now()),
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      pong_time_(ping_time_),
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ping_sequence_number_(0),
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      active_(false),
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ping_count_(params.unresponsive_threshold),
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      response_time_histogram_(NULL),
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      unresponsive_time_histogram_(NULL),
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      unresponsive_count_(0),
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      hung_processing_complete_(false),
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      unresponsive_threshold_(params.unresponsive_threshold),
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      crash_on_hang_(params.crash_on_hang),
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      live_threads_threshold_(params.live_threads_threshold),
140c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      weak_ptr_factory_(this) {
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Initialize();
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ThreadWatcher::~ThreadWatcher() {}
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcher::StartWatching(const WatchingParams& params) {
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_GE(params.sleep_time.InMilliseconds(), 0);
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_GE(params.unresponsive_time.InMilliseconds(),
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            params.sleep_time.InMilliseconds());
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If we are not on WatchDogThread, then post a task to call StartWatching on
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // WatchDogThread.
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WatchDogThread::PostTask(
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        FROM_HERE,
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        base::Bind(&ThreadWatcher::StartWatching, params));
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Create a new thread watcher object for the given thread and activate it.
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ThreadWatcher* watcher = new ThreadWatcher(params);
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(watcher);
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If we couldn't register the thread watcher object, we are shutting down,
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // then don't activate thread watching.
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!ThreadWatcherList::IsRegistered(params.thread_id))
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  watcher->ActivateThreadWatching();
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcher::ActivateThreadWatching() {
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (active_) return;
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  active_ = true;
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ping_count_ = unresponsive_threshold_;
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ResetHangCounters();
18190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  base::MessageLoop::current()->PostTask(
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      FROM_HERE,
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ThreadWatcher::PostPingMessage,
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 weak_ptr_factory_.GetWeakPtr()));
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcher::DeActivateThreadWatching() {
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  active_ = false;
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ping_count_ = 0;
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  weak_ptr_factory_.InvalidateWeakPtrs();
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcher::WakeUp() {
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // There is some user activity, PostPingMessage task of thread watcher if
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // needed.
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!active_) return;
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Throw away the previous |unresponsive_count_| and start over again. Just
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // before going to sleep, |unresponsive_count_| could be very close to
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // |unresponsive_threshold_| and when user becomes active,
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // response for ping messages. Reset |unresponsive_count_| to start measuring
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the unresponsiveness of the threads when system becomes active.
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unresponsive_count_ = 0;
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (ping_count_ <= 0) {
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ping_count_ = unresponsive_threshold_;
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ResetHangCounters();
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    PostPingMessage();
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ping_count_ = unresponsive_threshold_;
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcher::PostPingMessage() {
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If we have stopped watching or if the user is idle, then stop sending
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // ping messages.
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!active_ || ping_count_ <= 0)
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Save the current time when we have sent ping message.
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ping_time_ = base::TimeTicks::Now();
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Send a ping message to the watched thread. Callback will be called on
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the WatchDogThread.
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::Closure callback(
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(),
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 ping_sequence_number_));
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (watched_loop_->PostTask(
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          FROM_HERE,
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          base::Bind(&ThreadWatcher::OnPingMessage, thread_id_,
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     callback))) {
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Post a task to check the responsiveness of watched thread.
23790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      base::MessageLoop::current()->PostDelayedTask(
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          FROM_HERE,
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          base::Bind(&ThreadWatcher::OnCheckResponsiveness,
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          unresponsive_time_);
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Watched thread might have gone away, stop watching it.
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DeActivateThreadWatching();
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) {
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Record watched thread's response time.
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::TimeTicks now = base::TimeTicks::Now();
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::TimeDelta response_time = now - ping_time_;
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  response_time_histogram_->AddTime(response_time);
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Save the current time when we have got pong message.
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  pong_time_ = now;
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check if there are any extra pings in flight.
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_EQ(ping_sequence_number_, ping_sequence_number);
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (ping_sequence_number_ != ping_sequence_number)
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Increment sequence number for the next ping message to indicate watched
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // thread is responsive.
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ++ping_sequence_number_;
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If we have stopped watching or if the user is idle, then stop sending
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // ping messages.
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!active_ || --ping_count_ <= 0)
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
27390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  base::MessageLoop::current()->PostDelayedTask(
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      FROM_HERE,
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ThreadWatcher::PostPingMessage,
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 weak_ptr_factory_.GetWeakPtr()),
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      sleep_time_);
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) {
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If we have stopped watching then consider thread as responding.
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!active_) {
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    responsive_ = true;
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If the latest ping_sequence_number_ is not same as the ping_sequence_number
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that is passed in, then we can assume OnPongMessage was called.
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // OnPongMessage increments ping_sequence_number_.
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (ping_sequence_number_ != ping_sequence_number) {
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Reset unresponsive_count_ to zero because we got a response from the
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // watched thread.
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ResetHangCounters();
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    responsive_ = true;
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Record that we got no response from watched thread.
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GotNoResponse();
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Post a task to check the responsiveness of watched thread.
30290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  base::MessageLoop::current()->PostDelayedTask(
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      FROM_HERE,
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ThreadWatcher::OnCheckResponsiveness,
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      unresponsive_time_);
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  responsive_ = false;
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcher::Initialize() {
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ThreadWatcherList::Register(this);
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const std::string response_time_histogram_name =
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "ThreadWatcher.ResponseTime." + thread_name_;
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  response_time_histogram_ = base::Histogram::FactoryTimeGet(
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      response_time_histogram_name,
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::TimeDelta::FromMilliseconds(1),
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::TimeDelta::FromSeconds(100), 50,
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Histogram::kUmaTargetedHistogramFlag);
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const std::string unresponsive_time_histogram_name =
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "ThreadWatcher.Unresponsive." + thread_name_;
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet(
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      unresponsive_time_histogram_name,
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::TimeDelta::FromMilliseconds(1),
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::TimeDelta::FromSeconds(100), 50,
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Histogram::kUmaTargetedHistogramFlag);
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const std::string responsive_count_histogram_name =
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "ThreadWatcher.ResponsiveThreads." + thread_name_;
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  responsive_count_histogram_ = base::LinearHistogram::FactoryGet(
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      responsive_count_histogram_name, 1, 10, 11,
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Histogram::kUmaTargetedHistogramFlag);
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const std::string unresponsive_count_histogram_name =
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "ThreadWatcher.UnresponsiveThreads." + thread_name_;
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet(
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      unresponsive_count_histogram_name, 1, 10, 11,
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Histogram::kUmaTargetedHistogramFlag);
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id,
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                  const base::Closure& callback_task) {
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This method is called on watched thread.
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(BrowserThread::CurrentlyOn(thread_id));
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  WatchDogThread::PostTask(FROM_HERE, callback_task);
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcher::ResetHangCounters() {
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unresponsive_count_ = 0;
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  hung_processing_complete_ = false;
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcher::GotNoResponse() {
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ++unresponsive_count_;
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!IsVeryUnresponsive())
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Record total unresponsive_time since last pong message.
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_;
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unresponsive_time_histogram_->AddTime(unresponse_time);
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We have already collected stats for the non-responding watched thread.
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (hung_processing_complete_)
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Record how other threads are responding.
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 responding_thread_count = 0;
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 unresponding_thread_count = 0;
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ThreadWatcherList::GetStatusOfThreads(&responding_thread_count,
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                        &unresponding_thread_count);
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Record how many watched threads are responding.
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  responsive_count_histogram_->Add(responding_thread_count);
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Record how many watched threads are not responding.
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unresponsive_count_histogram_->Add(unresponding_thread_count);
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Crash the browser if the watched thread is to be crashed on hang and if the
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // number of other threads responding is less than or equal to
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // live_threads_threshold_ and at least one other thread is responding.
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (crash_on_hang_ &&
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      responding_thread_count > 0 &&
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      responding_thread_count <= live_threads_threshold_) {
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    static bool crashed_once = false;
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!crashed_once) {
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      crashed_once = true;
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CrashBecauseThreadWasUnresponsive(thread_id_);
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  hung_processing_complete_ = true;
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ThreadWatcher::IsVeryUnresponsive() {
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return unresponsive_count_ >= unresponsive_threshold_;
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ThreadWatcherList methods and members.
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL;
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const int ThreadWatcherList::kSleepSeconds = 1;
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const int ThreadWatcherList::kUnresponsiveSeconds = 2;
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
41458e6fbe4ee35d65e14b626c557d37565bf8ad179Ben Murdochconst int ThreadWatcherList::kUnresponsiveCount = 9;
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
416c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)const int ThreadWatcherList::kLiveThreadsThreshold = 2;
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)ThreadWatcherList::CrashDataThresholds::CrashDataThresholds(
4192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    uint32 live_threads_threshold,
4202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    uint32 unresponsive_threshold)
4212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    : live_threads_threshold(live_threads_threshold),
4222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      unresponsive_threshold(unresponsive_threshold) {
4232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
4242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)ThreadWatcherList::CrashDataThresholds::CrashDataThresholds()
4262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    : live_threads_threshold(kLiveThreadsThreshold),
4272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      unresponsive_threshold(kUnresponsiveCount) {
4282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
4292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) {
4324e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // TODO(rtenneti): Enable ThreadWatcher.
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 unresponsive_threshold;
4342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  CrashOnHangThreadMap crash_on_hang_threads;
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ParseCommandLine(command_line,
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   &unresponsive_threshold,
4372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                   &crash_on_hang_threads);
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ThreadWatcherObserver::SetupNotifications(
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold));
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  WatchDogThread::PostDelayedTask(
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      FROM_HERE,
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ThreadWatcherList::InitializeAndStartWatching,
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 unresponsive_threshold,
4462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                 crash_on_hang_threads),
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::TimeDelta::FromSeconds(120));
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcherList::StopWatchingAll() {
4524e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // TODO(rtenneti): Enable ThreadWatcher.
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ThreadWatcherObserver::RemoveNotifications();
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DeleteAll();
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcherList::Register(ThreadWatcher* watcher) {
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!g_thread_watcher_list_)
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id()));
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher;
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) {
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return NULL != ThreadWatcherList::Find(thread_id);
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count,
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                           uint32* unresponding_thread_count) {
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *responding_thread_count = 0;
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *unresponding_thread_count = 0;
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!g_thread_watcher_list_)
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (RegistrationList::iterator it =
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           g_thread_watcher_list_->registered_.begin();
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       g_thread_watcher_list_->registered_.end() != it;
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++it) {
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (it->second->IsVeryUnresponsive())
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++(*unresponding_thread_count);
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++(*responding_thread_count);
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcherList::WakeUpAll() {
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!g_thread_watcher_list_)
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (RegistrationList::iterator it =
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           g_thread_watcher_list_->registered_.begin();
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       g_thread_watcher_list_->registered_.end() != it;
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++it)
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    it->second->WakeUp();
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ThreadWatcherList::ThreadWatcherList() {
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK(!g_thread_watcher_list_);
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  g_thread_watcher_list_ = this;
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ThreadWatcherList::~ThreadWatcherList() {
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(this == g_thread_watcher_list_);
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  g_thread_watcher_list_ = NULL;
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcherList::ParseCommandLine(
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const CommandLine& command_line,
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32* unresponsive_threshold,
5212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    CrashOnHangThreadMap* crash_on_hang_threads) {
5222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Initialize |unresponsive_threshold| to a default value.
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *unresponsive_threshold = kUnresponsiveCount;
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Increase the unresponsive_threshold on the Stable and Beta channels to
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // reduce the number of crashes due to ThreadWatcher.
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *unresponsive_threshold *= 4;
5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (channel == chrome::VersionInfo::CHANNEL_BETA) {
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *unresponsive_threshold *= 2;
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(OS_WIN)
5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // For Windows XP (old systems), double the unresponsive_threshold to give
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the OS a chance to schedule UI/IO threads a time slice to respond with a
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // pong message (to get around limitations with the OS).
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (base::win::GetVersion() <= base::win::VERSION_XP)
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *unresponsive_threshold *= 2;
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds;
5432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  std::string crash_on_hang_thread_names;
5442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool has_command_line_overwrite = false;
5452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (command_line.HasSwitch(switches::kCrashOnHangThreads)) {
5462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    crash_on_hang_thread_names =
5472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads);
5482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    has_command_line_overwrite = true;
5492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) {
5502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // Default to crashing the browser if UI or IO or FILE threads are not
5512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // responsive except in stable channel.
5522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    crash_on_hang_thread_names = base::StringPrintf(
5532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        "UI:%d:%d,IO:%d:%d,FILE:%d:%d",
5542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        kLiveThreadsThreshold, crash_seconds,
5552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        kLiveThreadsThreshold, crash_seconds,
5562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        kLiveThreadsThreshold, crash_seconds * 5);
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names,
5602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                     kLiveThreadsThreshold,
5612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                     crash_seconds,
5622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                     crash_on_hang_threads);
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (channel != chrome::VersionInfo::CHANNEL_CANARY ||
5652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      has_command_line_overwrite) {
5662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return;
5672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Set up a field trial for 100% of the users to crash if either UI or IO
5702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // thread is not responsive for 30 seconds (or 15 pings).
5712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  scoped_refptr<base::FieldTrial> field_trial(
5722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      base::FieldTrialList::FactoryGetFieldTrial(
5732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          "ThreadWatcher", 100, "default_hung_threads",
5744e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)          2014, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL));
5752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  int hung_thread_group = field_trial->AppendGroup("hung_thread", 100);
5762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (field_trial->group() == hung_thread_group) {
5772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin();
5782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)         crash_on_hang_threads->end() != it;
5792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)         ++it) {
5804e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)      if (it->first == "FILE")
5812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        continue;
5822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      it->second.live_threads_threshold = INT_MAX;
5834e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)      if (it->first == "UI") {
5844e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)        // TODO(rtenneti): set unresponsive threshold to 120 seconds to catch
5854e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)        // the worst UI hangs and for fewer crashes due to ThreadWatcher. Reduce
5864e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)        // it to a more reasonable time ala IO thread.
5874e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)        it->second.unresponsive_threshold = 60;
5884e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)      } else {
5894e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)        it->second.unresponsive_threshold = 15;
5904e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)      }
5912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
5942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
5952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// static
5962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void ThreadWatcherList::ParseCommandLineCrashOnHangThreads(
5972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const std::string& crash_on_hang_thread_names,
5982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    uint32 default_live_threads_threshold,
5992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    uint32 default_crash_seconds,
6002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    CrashOnHangThreadMap* crash_on_hang_threads) {
6012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  base::StringTokenizer tokens(crash_on_hang_thread_names, ",");
6022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  std::vector<std::string> values;
6032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  while (tokens.GetNext()) {
6042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const std::string& token = tokens.token();
6052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    base::SplitString(token, ':', &values);
6062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    std::string thread_name = values[0];
6072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    uint32 live_threads_threshold = default_live_threads_threshold;
6092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    uint32 crash_seconds = default_crash_seconds;
6102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (values.size() >= 2 &&
6112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        (!base::StringToUint(values[1], &live_threads_threshold))) {
6122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      continue;
6132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
6142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (values.size() >= 3 &&
6152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        (!base::StringToUint(values[2], &crash_seconds))) {
6162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      continue;
6172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
6182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    uint32 unresponsive_threshold = static_cast<uint32>(
6192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds));
6202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    CrashDataThresholds crash_data(live_threads_threshold,
6222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                   unresponsive_threshold);
6232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // Use the last specifier.
6242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    (*crash_on_hang_threads)[thread_name] = crash_data;
6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcherList::InitializeAndStartWatching(
6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32 unresponsive_threshold,
6312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const CrashOnHangThreadMap& crash_on_hang_threads) {
6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ThreadWatcherList* thread_watcher_list = new ThreadWatcherList();
6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK(thread_watcher_list);
6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BrowserThread::PostTask(
6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      BrowserThread::UI,
6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      FROM_HERE,
6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb));
6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const base::TimeDelta kSleepTime =
6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::TimeDelta::FromSeconds(kSleepSeconds);
6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const base::TimeDelta kUnresponsiveTime =
6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::TimeDelta::FromSeconds(kUnresponsiveSeconds);
6465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime,
6482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                unresponsive_threshold, crash_on_hang_threads);
6495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime,
6502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                unresponsive_threshold, crash_on_hang_threads);
6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime,
6522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                unresponsive_threshold, crash_on_hang_threads);
6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime,
6542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                unresponsive_threshold, crash_on_hang_threads);
6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime,
6562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                unresponsive_threshold, crash_on_hang_threads);
6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
6605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcherList::StartWatching(
6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const BrowserThread::ID& thread_id,
6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& thread_name,
6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const base::TimeDelta& sleep_time,
6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const base::TimeDelta& unresponsive_time,
6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32 unresponsive_threshold,
6662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const CrashOnHangThreadMap& crash_on_hang_threads) {
6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  CrashOnHangThreadMap::const_iterator it =
6702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      crash_on_hang_threads.find(thread_name);
6712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool crash_on_hang = false;
6722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  uint32 live_threads_threshold = 0;
6732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (it != crash_on_hang_threads.end()) {
6742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    crash_on_hang = true;
6752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    live_threads_threshold = it->second.live_threads_threshold;
6762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    unresponsive_threshold = it->second.unresponsive_threshold;
6772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ThreadWatcher::StartWatching(
6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ThreadWatcher::WatchingParams(thread_id,
6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    thread_name,
6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    sleep_time,
6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    unresponsive_time,
6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    unresponsive_threshold,
6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    crash_on_hang,
6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    live_threads_threshold));
6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcherList::DeleteAll() {
6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WatchDogThread::PostTask(
6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        FROM_HERE,
6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        base::Bind(&ThreadWatcherList::DeleteAll));
6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!g_thread_watcher_list_)
7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Delete all thread watcher objects.
7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (!g_thread_watcher_list_->registered_.empty()) {
7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin();
7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete it->second;
7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    g_thread_watcher_list_->registered_.erase(it);
7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete g_thread_watcher_list_;
7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) {
7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!g_thread_watcher_list_)
7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return NULL;
7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RegistrationList::iterator it =
7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      g_thread_watcher_list_->registered_.find(thread_id);
7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (g_thread_watcher_list_->registered_.end() == it)
7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return NULL;
7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return it->second;
7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ThreadWatcherObserver methods and members.
7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL;
7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ThreadWatcherObserver::ThreadWatcherObserver(
7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const base::TimeDelta& wakeup_interval)
7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : last_wakeup_time_(base::TimeTicks::Now()),
7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      wakeup_interval_(wakeup_interval) {
7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK(!g_thread_watcher_observer_);
7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  g_thread_watcher_observer_ = this;
7355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ThreadWatcherObserver::~ThreadWatcherObserver() {
7385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(this == g_thread_watcher_observer_);
7395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  g_thread_watcher_observer_ = NULL;
7405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
7435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcherObserver::SetupNotifications(
7445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const base::TimeDelta& wakeup_interval) {
7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
7465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval);
7475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  MetricsService::SetUpNotifications(&observer->registrar_, observer);
7485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
7515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcherObserver::RemoveNotifications() {
7525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
7535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!g_thread_watcher_observer_)
7545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
7555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  g_thread_watcher_observer_->registrar_.RemoveAll();
7565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete g_thread_watcher_observer_;
7575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ThreadWatcherObserver::Observe(
7605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int type,
7615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const content::NotificationSource& source,
7625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const content::NotificationDetails& details) {
7635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // There is some user activity, see if thread watchers are to be awakened.
7645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::TimeTicks now = base::TimeTicks::Now();
7655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ((now - last_wakeup_time_) < wakeup_interval_)
7665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
7675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  last_wakeup_time_ = now;
7685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  WatchDogThread::PostTask(
7695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      FROM_HERE,
7705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&ThreadWatcherList::WakeUpAll));
7715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// WatchDogThread methods and members.
7745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This lock protects g_watchdog_thread.
7765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static base::LazyInstance<base::Lock>::Leaky
7775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    g_watchdog_lock = LAZY_INSTANCE_INITIALIZER;
7785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The singleton of this class.
7805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WatchDogThread* g_watchdog_thread = NULL;
7815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") {
7835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)WatchDogThread::~WatchDogThread() {
7865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Stop();
7875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
7905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool WatchDogThread::CurrentlyOnWatchDogThread() {
7915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::AutoLock lock(g_watchdog_lock.Get());
7925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return g_watchdog_thread &&
79390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      g_watchdog_thread->message_loop() == base::MessageLoop::current();
7945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
7975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool WatchDogThread::PostTask(const tracked_objects::Location& from_here,
7985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              const base::Closure& task) {
7995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return PostTaskHelper(from_here, task, base::TimeDelta());
8005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
8035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here,
8045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                     const base::Closure& task,
8055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                     base::TimeDelta delay) {
8065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return PostTaskHelper(from_here, task, delay);
8075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
8105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool WatchDogThread::PostTaskHelper(
8115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const tracked_objects::Location& from_here,
8125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const base::Closure& task,
8135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::TimeDelta delay) {
8145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  {
8155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::AutoLock lock(g_watchdog_lock.Get());
8165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
81790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    base::MessageLoop* message_loop = g_watchdog_thread ?
8185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        g_watchdog_thread->message_loop() : NULL;
8195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (message_loop) {
8205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      message_loop->PostDelayedTask(from_here, task, delay);
8215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return true;
8225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
8235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return false;
8265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void WatchDogThread::Init() {
8295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This thread shouldn't be allowed to perform any blocking disk I/O.
8305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::ThreadRestrictions::SetIOAllowed(false);
8315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::AutoLock lock(g_watchdog_lock.Get());
8335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK(!g_watchdog_thread);
8345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  g_watchdog_thread = this;
8355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void WatchDogThread::CleanUp() {
8385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::AutoLock lock(g_watchdog_lock.Get());
8395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  g_watchdog_thread = NULL;
8405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
8435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// StartupWatchDogThread methods and members.
8455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
8465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Class for detecting hangs during startup.
8475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class StartupWatchDogThread : public base::Watchdog {
8485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
8495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Constructor specifies how long the StartupWatchDogThread will wait before
8505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // alarming.
8515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  explicit StartupWatchDogThread(const base::TimeDelta& duration)
8525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      : base::Watchdog(duration, "Startup watchdog thread", true) {
8535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Alarm is called if the time expires after an Arm() without someone calling
8565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Disarm(). When Alarm goes off, in release mode we get the crash dump
8575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // without crashing and in debug mode we break into the debugger.
8582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  virtual void Alarm() OVERRIDE {
8595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef NDEBUG
8605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DCHECK(false);
8615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#else
8625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    logging::DumpWithoutCrashing();
8635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
8645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread);
8675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
8685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ShutdownWatchDogThread methods and members.
8705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
8715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Class for detecting hangs during shutdown.
8725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class ShutdownWatchDogThread : public base::Watchdog {
8735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
8745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Constructor specifies how long the ShutdownWatchDogThread will wait before
8755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // alarming.
8765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  explicit ShutdownWatchDogThread(const base::TimeDelta& duration)
8775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      : base::Watchdog(duration, "Shutdown watchdog thread", true) {
8785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Alarm is called if the time expires after an Arm() without someone calling
8815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Disarm(). We crash the browser if this method is called.
8822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  virtual void Alarm() OVERRIDE {
883c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    ShutdownCrash();
8845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread);
8875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
8885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace
8895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// StartupTimeBomb methods and members.
8915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
8925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
8935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL;
8945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)StartupTimeBomb::StartupTimeBomb()
8965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : startup_watchdog_(NULL),
8975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      thread_id_(base::PlatformThread::CurrentId()) {
8985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK(!g_startup_timebomb_);
8995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  g_startup_timebomb_ = this;
9005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)StartupTimeBomb::~StartupTimeBomb() {
9035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(this == g_startup_timebomb_);
9045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
9055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (startup_watchdog_)
9065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Disarm();
9075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  g_startup_timebomb_ = NULL;
9085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void StartupTimeBomb::Arm(const base::TimeDelta& duration) {
9115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
9125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(!startup_watchdog_);
9133551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  startup_watchdog_ = new StartupWatchDogThread(duration);
9143551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  startup_watchdog_->Arm();
9155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
9165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void StartupTimeBomb::Disarm() {
9195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
9205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (startup_watchdog_) {
9215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    startup_watchdog_->Disarm();
9225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    startup_watchdog_->Cleanup();
9235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DeleteStartupWatchdog();
9245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void StartupTimeBomb::DeleteStartupWatchdog() {
9285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
9295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (startup_watchdog_->IsJoinable()) {
9305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns
9315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // very fast.
9325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::ThreadRestrictions::SetIOAllowed(true);
9335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete startup_watchdog_;
9345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    startup_watchdog_ = NULL;
9355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
9365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
93790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  base::MessageLoop::current()->PostDelayedTask(
9385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      FROM_HERE,
9395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::Bind(&StartupTimeBomb::DeleteStartupWatchdog,
9405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 base::Unretained(this)),
9415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      base::TimeDelta::FromSeconds(10));
9425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
9455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void StartupTimeBomb::DisarmStartupTimeBomb() {
9465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
9475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (g_startup_timebomb_)
9485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    g_startup_timebomb_->Disarm();
9495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ShutdownWatcherHelper methods and members.
9525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
9535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ShutdownWatcherHelper is a wrapper class for detecting hangs during
9545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// shutdown.
9555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ShutdownWatcherHelper::ShutdownWatcherHelper()
9565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : shutdown_watchdog_(NULL),
9575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      thread_id_(base::PlatformThread::CurrentId()) {
9585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ShutdownWatcherHelper::~ShutdownWatcherHelper() {
9615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
9625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (shutdown_watchdog_) {
9635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    shutdown_watchdog_->Disarm();
9645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete shutdown_watchdog_;
9655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    shutdown_watchdog_ = NULL;
9665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) {
9705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
9715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(!shutdown_watchdog_);
9725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::TimeDelta actual_duration = duration;
9735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
9755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
9765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    actual_duration *= 20;
9775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (channel == chrome::VersionInfo::CHANNEL_BETA ||
9785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             channel == chrome::VersionInfo::CHANNEL_DEV) {
9795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    actual_duration *= 10;
9805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(OS_WIN)
9835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // On Windows XP, give twice the time for shutdown.
9845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (base::win::GetVersion() <= base::win::VERSION_XP)
9855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    actual_duration *= 2;
9865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
9875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration);
9895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  shutdown_watchdog_->Arm();
9905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
991