thread_watcher.cc revision f2477e01787aa58f445919b809d89e252beef54f
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/metrics/thread_watcher.h" 6 7#include <math.h> // ceil 8 9#include "base/bind.h" 10#include "base/compiler_specific.h" 11#include "base/debug/alias.h" 12#include "base/lazy_instance.h" 13#include "base/strings/string_number_conversions.h" 14#include "base/strings/string_split.h" 15#include "base/strings/string_tokenizer.h" 16#include "base/strings/stringprintf.h" 17#include "base/threading/thread_restrictions.h" 18#include "build/build_config.h" 19#include "chrome/browser/metrics/metrics_service.h" 20#include "chrome/common/chrome_switches.h" 21#include "chrome/common/chrome_version_info.h" 22#include "chrome/common/dump_without_crashing.h" 23#include "chrome/common/logging_chrome.h" 24 25#if defined(OS_WIN) 26#include "base/win/windows_version.h" 27#endif 28 29using content::BrowserThread; 30 31namespace { 32 33// The following are unique function names for forcing the crash when a thread 34// is unresponsive. This makes it possible to tell from the callstack alone what 35// thread was unresponsive. 36// 37// We disable optimizations for this block of functions so the compiler doesn't 38// merge them all together. 39MSVC_DISABLE_OPTIMIZE() 40MSVC_PUSH_DISABLE_WARNING(4748) 41 42#ifndef NDEBUG 43int* NullPointer() { 44 return reinterpret_cast<int*>(NULL); 45} 46#endif 47 48void NullPointerCrash(int line_number) { 49#ifndef NDEBUG 50 *NullPointer() = line_number; // Crash. 51#else 52 logging::DumpWithoutCrashing(); 53#endif 54} 55 56NOINLINE void ShutdownCrash() { 57 NullPointerCrash(__LINE__); 58} 59 60NOINLINE void ThreadUnresponsive_UI() { 61 NullPointerCrash(__LINE__); 62} 63 64NOINLINE void ThreadUnresponsive_DB() { 65 NullPointerCrash(__LINE__); 66} 67 68NOINLINE void ThreadUnresponsive_FILE() { 69 NullPointerCrash(__LINE__); 70} 71 72NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() { 73 NullPointerCrash(__LINE__); 74} 75 76NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() { 77 NullPointerCrash(__LINE__); 78} 79 80NOINLINE void ThreadUnresponsive_CACHE() { 81 NullPointerCrash(__LINE__); 82} 83 84NOINLINE void ThreadUnresponsive_IO() { 85 NullPointerCrash(__LINE__); 86} 87 88MSVC_POP_WARNING() 89MSVC_ENABLE_OPTIMIZE(); 90 91void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) { 92 base::debug::Alias(&thread_id); 93 94 switch (thread_id) { 95 case BrowserThread::UI: 96 return ThreadUnresponsive_UI(); 97 case BrowserThread::DB: 98 return ThreadUnresponsive_DB(); 99 case BrowserThread::FILE: 100 return ThreadUnresponsive_FILE(); 101 case BrowserThread::FILE_USER_BLOCKING: 102 return ThreadUnresponsive_FILE_USER_BLOCKING(); 103 case BrowserThread::PROCESS_LAUNCHER: 104 return ThreadUnresponsive_PROCESS_LAUNCHER(); 105 case BrowserThread::CACHE: 106 return ThreadUnresponsive_CACHE(); 107 case BrowserThread::IO: 108 return ThreadUnresponsive_IO(); 109 case BrowserThread::ID_COUNT: 110 CHECK(false); // This shouldn't actually be reached! 111 break; 112 113 // Omission of the default hander is intentional -- that way the compiler 114 // should warn if our switch becomes outdated. 115 } 116 117 CHECK(false) << "Unknown thread was unresponsive."; // Shouldn't be reached. 118} 119 120} // namespace 121 122// ThreadWatcher methods and members. 123ThreadWatcher::ThreadWatcher(const WatchingParams& params) 124 : thread_id_(params.thread_id), 125 thread_name_(params.thread_name), 126 watched_loop_( 127 BrowserThread::GetMessageLoopProxyForThread(params.thread_id)), 128 sleep_time_(params.sleep_time), 129 unresponsive_time_(params.unresponsive_time), 130 ping_time_(base::TimeTicks::Now()), 131 pong_time_(ping_time_), 132 ping_sequence_number_(0), 133 active_(false), 134 ping_count_(params.unresponsive_threshold), 135 response_time_histogram_(NULL), 136 unresponsive_time_histogram_(NULL), 137 unresponsive_count_(0), 138 hung_processing_complete_(false), 139 unresponsive_threshold_(params.unresponsive_threshold), 140 crash_on_hang_(params.crash_on_hang), 141 live_threads_threshold_(params.live_threads_threshold), 142 weak_ptr_factory_(this) { 143 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 144 Initialize(); 145} 146 147ThreadWatcher::~ThreadWatcher() {} 148 149// static 150void ThreadWatcher::StartWatching(const WatchingParams& params) { 151 DCHECK_GE(params.sleep_time.InMilliseconds(), 0); 152 DCHECK_GE(params.unresponsive_time.InMilliseconds(), 153 params.sleep_time.InMilliseconds()); 154 155 // If we are not on WatchDogThread, then post a task to call StartWatching on 156 // WatchDogThread. 157 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 158 WatchDogThread::PostTask( 159 FROM_HERE, 160 base::Bind(&ThreadWatcher::StartWatching, params)); 161 return; 162 } 163 164 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 165 166 // Create a new thread watcher object for the given thread and activate it. 167 ThreadWatcher* watcher = new ThreadWatcher(params); 168 169 DCHECK(watcher); 170 // If we couldn't register the thread watcher object, we are shutting down, 171 // then don't activate thread watching. 172 if (!ThreadWatcherList::IsRegistered(params.thread_id)) 173 return; 174 watcher->ActivateThreadWatching(); 175} 176 177void ThreadWatcher::ActivateThreadWatching() { 178 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 179 if (active_) return; 180 active_ = true; 181 ping_count_ = unresponsive_threshold_; 182 ResetHangCounters(); 183 base::MessageLoop::current()->PostTask( 184 FROM_HERE, 185 base::Bind(&ThreadWatcher::PostPingMessage, 186 weak_ptr_factory_.GetWeakPtr())); 187} 188 189void ThreadWatcher::DeActivateThreadWatching() { 190 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 191 active_ = false; 192 ping_count_ = 0; 193 weak_ptr_factory_.InvalidateWeakPtrs(); 194} 195 196void ThreadWatcher::WakeUp() { 197 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 198 // There is some user activity, PostPingMessage task of thread watcher if 199 // needed. 200 if (!active_) return; 201 202 // Throw away the previous |unresponsive_count_| and start over again. Just 203 // before going to sleep, |unresponsive_count_| could be very close to 204 // |unresponsive_threshold_| and when user becomes active, 205 // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no 206 // response for ping messages. Reset |unresponsive_count_| to start measuring 207 // the unresponsiveness of the threads when system becomes active. 208 unresponsive_count_ = 0; 209 210 if (ping_count_ <= 0) { 211 ping_count_ = unresponsive_threshold_; 212 ResetHangCounters(); 213 PostPingMessage(); 214 } else { 215 ping_count_ = unresponsive_threshold_; 216 } 217} 218 219void ThreadWatcher::PostPingMessage() { 220 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 221 // If we have stopped watching or if the user is idle, then stop sending 222 // ping messages. 223 if (!active_ || ping_count_ <= 0) 224 return; 225 226 // Save the current time when we have sent ping message. 227 ping_time_ = base::TimeTicks::Now(); 228 229 // Send a ping message to the watched thread. Callback will be called on 230 // the WatchDogThread. 231 base::Closure callback( 232 base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(), 233 ping_sequence_number_)); 234 if (watched_loop_->PostTask( 235 FROM_HERE, 236 base::Bind(&ThreadWatcher::OnPingMessage, thread_id_, 237 callback))) { 238 // Post a task to check the responsiveness of watched thread. 239 base::MessageLoop::current()->PostDelayedTask( 240 FROM_HERE, 241 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 242 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 243 unresponsive_time_); 244 } else { 245 // Watched thread might have gone away, stop watching it. 246 DeActivateThreadWatching(); 247 } 248} 249 250void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) { 251 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 252 253 // Record watched thread's response time. 254 base::TimeTicks now = base::TimeTicks::Now(); 255 base::TimeDelta response_time = now - ping_time_; 256 response_time_histogram_->AddTime(response_time); 257 258 // Save the current time when we have got pong message. 259 pong_time_ = now; 260 261 // Check if there are any extra pings in flight. 262 DCHECK_EQ(ping_sequence_number_, ping_sequence_number); 263 if (ping_sequence_number_ != ping_sequence_number) 264 return; 265 266 // Increment sequence number for the next ping message to indicate watched 267 // thread is responsive. 268 ++ping_sequence_number_; 269 270 // If we have stopped watching or if the user is idle, then stop sending 271 // ping messages. 272 if (!active_ || --ping_count_ <= 0) 273 return; 274 275 base::MessageLoop::current()->PostDelayedTask( 276 FROM_HERE, 277 base::Bind(&ThreadWatcher::PostPingMessage, 278 weak_ptr_factory_.GetWeakPtr()), 279 sleep_time_); 280} 281 282void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) { 283 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 284 // If we have stopped watching then consider thread as responding. 285 if (!active_) { 286 responsive_ = true; 287 return; 288 } 289 // If the latest ping_sequence_number_ is not same as the ping_sequence_number 290 // that is passed in, then we can assume OnPongMessage was called. 291 // OnPongMessage increments ping_sequence_number_. 292 if (ping_sequence_number_ != ping_sequence_number) { 293 // Reset unresponsive_count_ to zero because we got a response from the 294 // watched thread. 295 ResetHangCounters(); 296 297 responsive_ = true; 298 return; 299 } 300 // Record that we got no response from watched thread. 301 GotNoResponse(); 302 303 // Post a task to check the responsiveness of watched thread. 304 base::MessageLoop::current()->PostDelayedTask( 305 FROM_HERE, 306 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 307 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 308 unresponsive_time_); 309 responsive_ = false; 310} 311 312void ThreadWatcher::Initialize() { 313 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 314 ThreadWatcherList::Register(this); 315 316 const std::string response_time_histogram_name = 317 "ThreadWatcher.ResponseTime." + thread_name_; 318 response_time_histogram_ = base::Histogram::FactoryTimeGet( 319 response_time_histogram_name, 320 base::TimeDelta::FromMilliseconds(1), 321 base::TimeDelta::FromSeconds(100), 50, 322 base::Histogram::kUmaTargetedHistogramFlag); 323 324 const std::string unresponsive_time_histogram_name = 325 "ThreadWatcher.Unresponsive." + thread_name_; 326 unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet( 327 unresponsive_time_histogram_name, 328 base::TimeDelta::FromMilliseconds(1), 329 base::TimeDelta::FromSeconds(100), 50, 330 base::Histogram::kUmaTargetedHistogramFlag); 331 332 const std::string responsive_count_histogram_name = 333 "ThreadWatcher.ResponsiveThreads." + thread_name_; 334 responsive_count_histogram_ = base::LinearHistogram::FactoryGet( 335 responsive_count_histogram_name, 1, 10, 11, 336 base::Histogram::kUmaTargetedHistogramFlag); 337 338 const std::string unresponsive_count_histogram_name = 339 "ThreadWatcher.UnresponsiveThreads." + thread_name_; 340 unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet( 341 unresponsive_count_histogram_name, 1, 10, 11, 342 base::Histogram::kUmaTargetedHistogramFlag); 343} 344 345// static 346void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id, 347 const base::Closure& callback_task) { 348 // This method is called on watched thread. 349 DCHECK(BrowserThread::CurrentlyOn(thread_id)); 350 WatchDogThread::PostTask(FROM_HERE, callback_task); 351} 352 353void ThreadWatcher::ResetHangCounters() { 354 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 355 unresponsive_count_ = 0; 356 hung_processing_complete_ = false; 357} 358 359void ThreadWatcher::GotNoResponse() { 360 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 361 362 ++unresponsive_count_; 363 if (!IsVeryUnresponsive()) 364 return; 365 366 // Record total unresponsive_time since last pong message. 367 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; 368 unresponsive_time_histogram_->AddTime(unresponse_time); 369 370 // We have already collected stats for the non-responding watched thread. 371 if (hung_processing_complete_) 372 return; 373 374 // Record how other threads are responding. 375 uint32 responding_thread_count = 0; 376 uint32 unresponding_thread_count = 0; 377 ThreadWatcherList::GetStatusOfThreads(&responding_thread_count, 378 &unresponding_thread_count); 379 380 // Record how many watched threads are responding. 381 responsive_count_histogram_->Add(responding_thread_count); 382 383 // Record how many watched threads are not responding. 384 unresponsive_count_histogram_->Add(unresponding_thread_count); 385 386 // Crash the browser if the watched thread is to be crashed on hang and if the 387 // number of other threads responding is less than or equal to 388 // live_threads_threshold_ and at least one other thread is responding. 389 if (crash_on_hang_ && 390 responding_thread_count > 0 && 391 responding_thread_count <= live_threads_threshold_) { 392 static bool crashed_once = false; 393 if (!crashed_once) { 394 crashed_once = true; 395 CrashBecauseThreadWasUnresponsive(thread_id_); 396 } 397 } 398 399 hung_processing_complete_ = true; 400} 401 402bool ThreadWatcher::IsVeryUnresponsive() { 403 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 404 return unresponsive_count_ >= unresponsive_threshold_; 405} 406 407// ThreadWatcherList methods and members. 408// 409// static 410ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL; 411// static 412const int ThreadWatcherList::kSleepSeconds = 1; 413// static 414const int ThreadWatcherList::kUnresponsiveSeconds = 2; 415// static 416const int ThreadWatcherList::kUnresponsiveCount = 9; 417// static 418const int ThreadWatcherList::kLiveThreadsThreshold = 2; 419 420ThreadWatcherList::CrashDataThresholds::CrashDataThresholds( 421 uint32 live_threads_threshold, 422 uint32 unresponsive_threshold) 423 : live_threads_threshold(live_threads_threshold), 424 unresponsive_threshold(unresponsive_threshold) { 425} 426 427ThreadWatcherList::CrashDataThresholds::CrashDataThresholds() 428 : live_threads_threshold(kLiveThreadsThreshold), 429 unresponsive_threshold(kUnresponsiveCount) { 430} 431 432// static 433void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) { 434 // TODO(rtenneti): Enable ThreadWatcher. 435 uint32 unresponsive_threshold; 436 CrashOnHangThreadMap crash_on_hang_threads; 437 ParseCommandLine(command_line, 438 &unresponsive_threshold, 439 &crash_on_hang_threads); 440 441 ThreadWatcherObserver::SetupNotifications( 442 base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold)); 443 444 WatchDogThread::PostDelayedTask( 445 FROM_HERE, 446 base::Bind(&ThreadWatcherList::InitializeAndStartWatching, 447 unresponsive_threshold, 448 crash_on_hang_threads), 449 base::TimeDelta::FromSeconds(120)); 450} 451 452// static 453void ThreadWatcherList::StopWatchingAll() { 454 // TODO(rtenneti): Enable ThreadWatcher. 455 ThreadWatcherObserver::RemoveNotifications(); 456 DeleteAll(); 457} 458 459// static 460void ThreadWatcherList::Register(ThreadWatcher* watcher) { 461 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 462 if (!g_thread_watcher_list_) 463 return; 464 DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id())); 465 g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher; 466} 467 468// static 469bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) { 470 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 471 return NULL != ThreadWatcherList::Find(thread_id); 472} 473 474// static 475void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count, 476 uint32* unresponding_thread_count) { 477 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 478 *responding_thread_count = 0; 479 *unresponding_thread_count = 0; 480 if (!g_thread_watcher_list_) 481 return; 482 483 for (RegistrationList::iterator it = 484 g_thread_watcher_list_->registered_.begin(); 485 g_thread_watcher_list_->registered_.end() != it; 486 ++it) { 487 if (it->second->IsVeryUnresponsive()) 488 ++(*unresponding_thread_count); 489 else 490 ++(*responding_thread_count); 491 } 492} 493 494// static 495void ThreadWatcherList::WakeUpAll() { 496 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 497 if (!g_thread_watcher_list_) 498 return; 499 500 for (RegistrationList::iterator it = 501 g_thread_watcher_list_->registered_.begin(); 502 g_thread_watcher_list_->registered_.end() != it; 503 ++it) 504 it->second->WakeUp(); 505} 506 507ThreadWatcherList::ThreadWatcherList() { 508 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 509 CHECK(!g_thread_watcher_list_); 510 g_thread_watcher_list_ = this; 511} 512 513ThreadWatcherList::~ThreadWatcherList() { 514 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 515 DCHECK(this == g_thread_watcher_list_); 516 g_thread_watcher_list_ = NULL; 517} 518 519// static 520void ThreadWatcherList::ParseCommandLine( 521 const CommandLine& command_line, 522 uint32* unresponsive_threshold, 523 CrashOnHangThreadMap* crash_on_hang_threads) { 524 // Initialize |unresponsive_threshold| to a default value. 525 *unresponsive_threshold = kUnresponsiveCount; 526 527 // Increase the unresponsive_threshold on the Stable and Beta channels to 528 // reduce the number of crashes due to ThreadWatcher. 529 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 530 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 531 *unresponsive_threshold *= 4; 532 } else if (channel == chrome::VersionInfo::CHANNEL_BETA) { 533 *unresponsive_threshold *= 2; 534 } 535 536#if defined(OS_WIN) 537 // For Windows XP (old systems), double the unresponsive_threshold to give 538 // the OS a chance to schedule UI/IO threads a time slice to respond with a 539 // pong message (to get around limitations with the OS). 540 if (base::win::GetVersion() <= base::win::VERSION_XP) 541 *unresponsive_threshold *= 2; 542#endif 543 544 uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds; 545 std::string crash_on_hang_thread_names; 546 bool has_command_line_overwrite = false; 547 if (command_line.HasSwitch(switches::kCrashOnHangThreads)) { 548 crash_on_hang_thread_names = 549 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); 550 has_command_line_overwrite = true; 551 } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) { 552 // Default to crashing the browser if UI or IO or FILE threads are not 553 // responsive except in stable channel. 554 crash_on_hang_thread_names = base::StringPrintf( 555 "UI:%d:%d,IO:%d:%d,FILE:%d:%d", 556 kLiveThreadsThreshold, crash_seconds, 557 kLiveThreadsThreshold, crash_seconds, 558 kLiveThreadsThreshold, crash_seconds * 5); 559 } 560 561 ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names, 562 kLiveThreadsThreshold, 563 crash_seconds, 564 crash_on_hang_threads); 565 566 if (channel != chrome::VersionInfo::CHANNEL_CANARY || 567 has_command_line_overwrite) { 568 return; 569 } 570 571 // Set up a field trial for 100% of the users to crash if either UI or IO 572 // thread is not responsive for 30 seconds (or 15 pings). 573 scoped_refptr<base::FieldTrial> field_trial( 574 base::FieldTrialList::FactoryGetFieldTrial( 575 "ThreadWatcher", 100, "default_hung_threads", 576 2014, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL)); 577 int hung_thread_group = field_trial->AppendGroup("hung_thread", 100); 578 if (field_trial->group() == hung_thread_group) { 579 for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin(); 580 crash_on_hang_threads->end() != it; 581 ++it) { 582 if (it->first == "FILE") 583 continue; 584 it->second.live_threads_threshold = INT_MAX; 585 if (it->first == "UI") { 586 // TODO(rtenneti): set unresponsive threshold to 120 seconds to catch 587 // the worst UI hangs and for fewer crashes due to ThreadWatcher. Reduce 588 // it to a more reasonable time ala IO thread. 589 it->second.unresponsive_threshold = 60; 590 } else { 591 it->second.unresponsive_threshold = 15; 592 } 593 } 594 } 595} 596 597// static 598void ThreadWatcherList::ParseCommandLineCrashOnHangThreads( 599 const std::string& crash_on_hang_thread_names, 600 uint32 default_live_threads_threshold, 601 uint32 default_crash_seconds, 602 CrashOnHangThreadMap* crash_on_hang_threads) { 603 base::StringTokenizer tokens(crash_on_hang_thread_names, ","); 604 std::vector<std::string> values; 605 while (tokens.GetNext()) { 606 const std::string& token = tokens.token(); 607 base::SplitString(token, ':', &values); 608 std::string thread_name = values[0]; 609 610 uint32 live_threads_threshold = default_live_threads_threshold; 611 uint32 crash_seconds = default_crash_seconds; 612 if (values.size() >= 2 && 613 (!base::StringToUint(values[1], &live_threads_threshold))) { 614 continue; 615 } 616 if (values.size() >= 3 && 617 (!base::StringToUint(values[2], &crash_seconds))) { 618 continue; 619 } 620 uint32 unresponsive_threshold = static_cast<uint32>( 621 ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); 622 623 CrashDataThresholds crash_data(live_threads_threshold, 624 unresponsive_threshold); 625 // Use the last specifier. 626 (*crash_on_hang_threads)[thread_name] = crash_data; 627 } 628} 629 630// static 631void ThreadWatcherList::InitializeAndStartWatching( 632 uint32 unresponsive_threshold, 633 const CrashOnHangThreadMap& crash_on_hang_threads) { 634 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 635 636 ThreadWatcherList* thread_watcher_list = new ThreadWatcherList(); 637 CHECK(thread_watcher_list); 638 639 BrowserThread::PostTask( 640 BrowserThread::UI, 641 FROM_HERE, 642 base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb)); 643 644 const base::TimeDelta kSleepTime = 645 base::TimeDelta::FromSeconds(kSleepSeconds); 646 const base::TimeDelta kUnresponsiveTime = 647 base::TimeDelta::FromSeconds(kUnresponsiveSeconds); 648 649 StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime, 650 unresponsive_threshold, crash_on_hang_threads); 651 StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime, 652 unresponsive_threshold, crash_on_hang_threads); 653 StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime, 654 unresponsive_threshold, crash_on_hang_threads); 655 StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime, 656 unresponsive_threshold, crash_on_hang_threads); 657 StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime, 658 unresponsive_threshold, crash_on_hang_threads); 659} 660 661// static 662void ThreadWatcherList::StartWatching( 663 const BrowserThread::ID& thread_id, 664 const std::string& thread_name, 665 const base::TimeDelta& sleep_time, 666 const base::TimeDelta& unresponsive_time, 667 uint32 unresponsive_threshold, 668 const CrashOnHangThreadMap& crash_on_hang_threads) { 669 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 670 671 CrashOnHangThreadMap::const_iterator it = 672 crash_on_hang_threads.find(thread_name); 673 bool crash_on_hang = false; 674 uint32 live_threads_threshold = 0; 675 if (it != crash_on_hang_threads.end()) { 676 crash_on_hang = true; 677 live_threads_threshold = it->second.live_threads_threshold; 678 unresponsive_threshold = it->second.unresponsive_threshold; 679 } 680 681 ThreadWatcher::StartWatching( 682 ThreadWatcher::WatchingParams(thread_id, 683 thread_name, 684 sleep_time, 685 unresponsive_time, 686 unresponsive_threshold, 687 crash_on_hang, 688 live_threads_threshold)); 689} 690 691// static 692void ThreadWatcherList::DeleteAll() { 693 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 694 WatchDogThread::PostTask( 695 FROM_HERE, 696 base::Bind(&ThreadWatcherList::DeleteAll)); 697 return; 698 } 699 700 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 701 if (!g_thread_watcher_list_) 702 return; 703 704 // Delete all thread watcher objects. 705 while (!g_thread_watcher_list_->registered_.empty()) { 706 RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin(); 707 delete it->second; 708 g_thread_watcher_list_->registered_.erase(it); 709 } 710 711 delete g_thread_watcher_list_; 712} 713 714// static 715ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) { 716 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 717 if (!g_thread_watcher_list_) 718 return NULL; 719 RegistrationList::iterator it = 720 g_thread_watcher_list_->registered_.find(thread_id); 721 if (g_thread_watcher_list_->registered_.end() == it) 722 return NULL; 723 return it->second; 724} 725 726// ThreadWatcherObserver methods and members. 727// 728// static 729ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL; 730 731ThreadWatcherObserver::ThreadWatcherObserver( 732 const base::TimeDelta& wakeup_interval) 733 : last_wakeup_time_(base::TimeTicks::Now()), 734 wakeup_interval_(wakeup_interval) { 735 CHECK(!g_thread_watcher_observer_); 736 g_thread_watcher_observer_ = this; 737} 738 739ThreadWatcherObserver::~ThreadWatcherObserver() { 740 DCHECK(this == g_thread_watcher_observer_); 741 g_thread_watcher_observer_ = NULL; 742} 743 744// static 745void ThreadWatcherObserver::SetupNotifications( 746 const base::TimeDelta& wakeup_interval) { 747 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 748 ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval); 749 MetricsService::SetUpNotifications(&observer->registrar_, observer); 750} 751 752// static 753void ThreadWatcherObserver::RemoveNotifications() { 754 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 755 if (!g_thread_watcher_observer_) 756 return; 757 g_thread_watcher_observer_->registrar_.RemoveAll(); 758 delete g_thread_watcher_observer_; 759} 760 761void ThreadWatcherObserver::Observe( 762 int type, 763 const content::NotificationSource& source, 764 const content::NotificationDetails& details) { 765 // There is some user activity, see if thread watchers are to be awakened. 766 base::TimeTicks now = base::TimeTicks::Now(); 767 if ((now - last_wakeup_time_) < wakeup_interval_) 768 return; 769 last_wakeup_time_ = now; 770 WatchDogThread::PostTask( 771 FROM_HERE, 772 base::Bind(&ThreadWatcherList::WakeUpAll)); 773} 774 775// WatchDogThread methods and members. 776 777// This lock protects g_watchdog_thread. 778static base::LazyInstance<base::Lock>::Leaky 779 g_watchdog_lock = LAZY_INSTANCE_INITIALIZER; 780 781// The singleton of this class. 782static WatchDogThread* g_watchdog_thread = NULL; 783 784WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") { 785} 786 787WatchDogThread::~WatchDogThread() { 788 Stop(); 789} 790 791// static 792bool WatchDogThread::CurrentlyOnWatchDogThread() { 793 base::AutoLock lock(g_watchdog_lock.Get()); 794 return g_watchdog_thread && 795 g_watchdog_thread->message_loop() == base::MessageLoop::current(); 796} 797 798// static 799bool WatchDogThread::PostTask(const tracked_objects::Location& from_here, 800 const base::Closure& task) { 801 return PostTaskHelper(from_here, task, base::TimeDelta()); 802} 803 804// static 805bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here, 806 const base::Closure& task, 807 base::TimeDelta delay) { 808 return PostTaskHelper(from_here, task, delay); 809} 810 811// static 812bool WatchDogThread::PostTaskHelper( 813 const tracked_objects::Location& from_here, 814 const base::Closure& task, 815 base::TimeDelta delay) { 816 { 817 base::AutoLock lock(g_watchdog_lock.Get()); 818 819 base::MessageLoop* message_loop = g_watchdog_thread ? 820 g_watchdog_thread->message_loop() : NULL; 821 if (message_loop) { 822 message_loop->PostDelayedTask(from_here, task, delay); 823 return true; 824 } 825 } 826 827 return false; 828} 829 830void WatchDogThread::Init() { 831 // This thread shouldn't be allowed to perform any blocking disk I/O. 832 base::ThreadRestrictions::SetIOAllowed(false); 833 834 base::AutoLock lock(g_watchdog_lock.Get()); 835 CHECK(!g_watchdog_thread); 836 g_watchdog_thread = this; 837} 838 839void WatchDogThread::CleanUp() { 840 base::AutoLock lock(g_watchdog_lock.Get()); 841 g_watchdog_thread = NULL; 842} 843 844namespace { 845 846// StartupWatchDogThread methods and members. 847// 848// Class for detecting hangs during startup. 849class StartupWatchDogThread : public base::Watchdog { 850 public: 851 // Constructor specifies how long the StartupWatchDogThread will wait before 852 // alarming. 853 explicit StartupWatchDogThread(const base::TimeDelta& duration) 854 : base::Watchdog(duration, "Startup watchdog thread", true) { 855 } 856 857 // Alarm is called if the time expires after an Arm() without someone calling 858 // Disarm(). When Alarm goes off, in release mode we get the crash dump 859 // without crashing and in debug mode we break into the debugger. 860 virtual void Alarm() OVERRIDE { 861#ifndef NDEBUG 862 DCHECK(false); 863#else 864 logging::DumpWithoutCrashing(); 865#endif 866 } 867 868 DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread); 869}; 870 871// ShutdownWatchDogThread methods and members. 872// 873// Class for detecting hangs during shutdown. 874class ShutdownWatchDogThread : public base::Watchdog { 875 public: 876 // Constructor specifies how long the ShutdownWatchDogThread will wait before 877 // alarming. 878 explicit ShutdownWatchDogThread(const base::TimeDelta& duration) 879 : base::Watchdog(duration, "Shutdown watchdog thread", true) { 880 } 881 882 // Alarm is called if the time expires after an Arm() without someone calling 883 // Disarm(). We crash the browser if this method is called. 884 virtual void Alarm() OVERRIDE { 885 ShutdownCrash(); 886 } 887 888 DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread); 889}; 890} // namespace 891 892// StartupTimeBomb methods and members. 893// 894// static 895StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL; 896 897StartupTimeBomb::StartupTimeBomb() 898 : startup_watchdog_(NULL), 899 thread_id_(base::PlatformThread::CurrentId()) { 900 CHECK(!g_startup_timebomb_); 901 g_startup_timebomb_ = this; 902} 903 904StartupTimeBomb::~StartupTimeBomb() { 905 DCHECK(this == g_startup_timebomb_); 906 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 907 if (startup_watchdog_) 908 Disarm(); 909 g_startup_timebomb_ = NULL; 910} 911 912void StartupTimeBomb::Arm(const base::TimeDelta& duration) { 913 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 914 DCHECK(!startup_watchdog_); 915 startup_watchdog_ = new StartupWatchDogThread(duration); 916 startup_watchdog_->Arm(); 917 return; 918} 919 920void StartupTimeBomb::Disarm() { 921 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 922 if (startup_watchdog_) { 923 startup_watchdog_->Disarm(); 924 startup_watchdog_->Cleanup(); 925 DeleteStartupWatchdog(); 926 } 927} 928 929void StartupTimeBomb::DeleteStartupWatchdog() { 930 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 931 if (startup_watchdog_->IsJoinable()) { 932 // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns 933 // very fast. 934 base::ThreadRestrictions::SetIOAllowed(true); 935 delete startup_watchdog_; 936 startup_watchdog_ = NULL; 937 return; 938 } 939 base::MessageLoop::current()->PostDelayedTask( 940 FROM_HERE, 941 base::Bind(&StartupTimeBomb::DeleteStartupWatchdog, 942 base::Unretained(this)), 943 base::TimeDelta::FromSeconds(10)); 944} 945 946// static 947void StartupTimeBomb::DisarmStartupTimeBomb() { 948 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 949 if (g_startup_timebomb_) 950 g_startup_timebomb_->Disarm(); 951} 952 953// ShutdownWatcherHelper methods and members. 954// 955// ShutdownWatcherHelper is a wrapper class for detecting hangs during 956// shutdown. 957ShutdownWatcherHelper::ShutdownWatcherHelper() 958 : shutdown_watchdog_(NULL), 959 thread_id_(base::PlatformThread::CurrentId()) { 960} 961 962ShutdownWatcherHelper::~ShutdownWatcherHelper() { 963 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 964 if (shutdown_watchdog_) { 965 shutdown_watchdog_->Disarm(); 966 delete shutdown_watchdog_; 967 shutdown_watchdog_ = NULL; 968 } 969} 970 971void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) { 972 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 973 DCHECK(!shutdown_watchdog_); 974 base::TimeDelta actual_duration = duration; 975 976 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 977 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 978 actual_duration *= 20; 979 } else if (channel == chrome::VersionInfo::CHANNEL_BETA || 980 channel == chrome::VersionInfo::CHANNEL_DEV) { 981 actual_duration *= 10; 982 } 983 984#if defined(OS_WIN) 985 // On Windows XP, give twice the time for shutdown. 986 if (base::win::GetVersion() <= base::win::VERSION_XP) 987 actual_duration *= 2; 988#endif 989 990 shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration); 991 shutdown_watchdog_->Arm(); 992} 993