thread_watcher.cc revision 868fa2fe829687343ffae624259930155e16dbd8
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/metrics/thread_watcher.h" 6 7#include <math.h> // ceil 8 9#include "base/bind.h" 10#include "base/compiler_specific.h" 11#include "base/debug/alias.h" 12#include "base/lazy_instance.h" 13#include "base/strings/string_number_conversions.h" 14#include "base/strings/string_split.h" 15#include "base/strings/string_tokenizer.h" 16#include "base/strings/stringprintf.h" 17#include "base/threading/thread_restrictions.h" 18#include "build/build_config.h" 19#include "chrome/browser/metrics/metrics_service.h" 20#include "chrome/common/chrome_switches.h" 21#include "chrome/common/chrome_version_info.h" 22#include "chrome/common/dump_without_crashing.h" 23#include "chrome/common/logging_chrome.h" 24 25#if defined(OS_WIN) 26#include "base/win/windows_version.h" 27#endif 28 29using content::BrowserThread; 30 31namespace { 32 33// The following are unique function names for forcing the crash when a thread 34// is unresponsive. This makes it possible to tell from the callstack alone what 35// thread was unresponsive. 36// 37// We disable optimizations for this block of functions so the compiler doesn't 38// merge them all together. 39MSVC_DISABLE_OPTIMIZE() 40MSVC_PUSH_DISABLE_WARNING(4748) 41 42int* NullPointer() { 43 return reinterpret_cast<int*>(NULL); 44} 45 46void NullPointerCrash(int line_number) { 47#ifndef NDEBUG 48 *NullPointer() = line_number; // Crash. 49#else 50 logging::DumpWithoutCrashing(); 51#endif 52} 53 54NOINLINE void ShutdownCrash() { 55 NullPointerCrash(__LINE__); 56} 57 58NOINLINE void ThreadUnresponsive_UI() { 59 NullPointerCrash(__LINE__); 60} 61 62NOINLINE void ThreadUnresponsive_DB() { 63 NullPointerCrash(__LINE__); 64} 65 66NOINLINE void ThreadUnresponsive_WEBKIT() { 67 NullPointerCrash(__LINE__); 68} 69 70NOINLINE void ThreadUnresponsive_FILE() { 71 NullPointerCrash(__LINE__); 72} 73 74NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() { 75 NullPointerCrash(__LINE__); 76} 77 78NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() { 79 NullPointerCrash(__LINE__); 80} 81 82NOINLINE void ThreadUnresponsive_CACHE() { 83 NullPointerCrash(__LINE__); 84} 85 86NOINLINE void ThreadUnresponsive_IO() { 87 NullPointerCrash(__LINE__); 88} 89 90MSVC_POP_WARNING() 91MSVC_ENABLE_OPTIMIZE(); 92 93void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) { 94 base::debug::Alias(&thread_id); 95 96 switch (thread_id) { 97 case BrowserThread::UI: 98 return ThreadUnresponsive_UI(); 99 case BrowserThread::DB: 100 return ThreadUnresponsive_DB(); 101 case BrowserThread::WEBKIT_DEPRECATED: 102 return ThreadUnresponsive_WEBKIT(); 103 case BrowserThread::FILE: 104 return ThreadUnresponsive_FILE(); 105 case BrowserThread::FILE_USER_BLOCKING: 106 return ThreadUnresponsive_FILE_USER_BLOCKING(); 107 case BrowserThread::PROCESS_LAUNCHER: 108 return ThreadUnresponsive_PROCESS_LAUNCHER(); 109 case BrowserThread::CACHE: 110 return ThreadUnresponsive_CACHE(); 111 case BrowserThread::IO: 112 return ThreadUnresponsive_IO(); 113 case BrowserThread::ID_COUNT: 114 CHECK(false); // This shouldn't actually be reached! 115 break; 116 117 // Omission of the default hander is intentional -- that way the compiler 118 // should warn if our switch becomes outdated. 119 } 120 121 CHECK(false) << "Unknown thread was unresponsive."; // Shouldn't be reached. 122} 123 124} // namespace 125 126// ThreadWatcher methods and members. 127ThreadWatcher::ThreadWatcher(const WatchingParams& params) 128 : thread_id_(params.thread_id), 129 thread_name_(params.thread_name), 130 watched_loop_( 131 BrowserThread::GetMessageLoopProxyForThread(params.thread_id)), 132 sleep_time_(params.sleep_time), 133 unresponsive_time_(params.unresponsive_time), 134 ping_time_(base::TimeTicks::Now()), 135 pong_time_(ping_time_), 136 ping_sequence_number_(0), 137 active_(false), 138 ping_count_(params.unresponsive_threshold), 139 response_time_histogram_(NULL), 140 unresponsive_time_histogram_(NULL), 141 unresponsive_count_(0), 142 hung_processing_complete_(false), 143 unresponsive_threshold_(params.unresponsive_threshold), 144 crash_on_hang_(params.crash_on_hang), 145 live_threads_threshold_(params.live_threads_threshold), 146 weak_ptr_factory_(this) { 147 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 148 Initialize(); 149} 150 151ThreadWatcher::~ThreadWatcher() {} 152 153// static 154void ThreadWatcher::StartWatching(const WatchingParams& params) { 155 DCHECK_GE(params.sleep_time.InMilliseconds(), 0); 156 DCHECK_GE(params.unresponsive_time.InMilliseconds(), 157 params.sleep_time.InMilliseconds()); 158 159 // If we are not on WatchDogThread, then post a task to call StartWatching on 160 // WatchDogThread. 161 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 162 WatchDogThread::PostTask( 163 FROM_HERE, 164 base::Bind(&ThreadWatcher::StartWatching, params)); 165 return; 166 } 167 168 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 169 170 // Create a new thread watcher object for the given thread and activate it. 171 ThreadWatcher* watcher = new ThreadWatcher(params); 172 173 DCHECK(watcher); 174 // If we couldn't register the thread watcher object, we are shutting down, 175 // then don't activate thread watching. 176 if (!ThreadWatcherList::IsRegistered(params.thread_id)) 177 return; 178 watcher->ActivateThreadWatching(); 179} 180 181void ThreadWatcher::ActivateThreadWatching() { 182 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 183 if (active_) return; 184 active_ = true; 185 ping_count_ = unresponsive_threshold_; 186 ResetHangCounters(); 187 base::MessageLoop::current()->PostTask( 188 FROM_HERE, 189 base::Bind(&ThreadWatcher::PostPingMessage, 190 weak_ptr_factory_.GetWeakPtr())); 191} 192 193void ThreadWatcher::DeActivateThreadWatching() { 194 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 195 active_ = false; 196 ping_count_ = 0; 197 weak_ptr_factory_.InvalidateWeakPtrs(); 198} 199 200void ThreadWatcher::WakeUp() { 201 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 202 // There is some user activity, PostPingMessage task of thread watcher if 203 // needed. 204 if (!active_) return; 205 206 // Throw away the previous |unresponsive_count_| and start over again. Just 207 // before going to sleep, |unresponsive_count_| could be very close to 208 // |unresponsive_threshold_| and when user becomes active, 209 // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no 210 // response for ping messages. Reset |unresponsive_count_| to start measuring 211 // the unresponsiveness of the threads when system becomes active. 212 unresponsive_count_ = 0; 213 214 if (ping_count_ <= 0) { 215 ping_count_ = unresponsive_threshold_; 216 ResetHangCounters(); 217 PostPingMessage(); 218 } else { 219 ping_count_ = unresponsive_threshold_; 220 } 221} 222 223void ThreadWatcher::PostPingMessage() { 224 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 225 // If we have stopped watching or if the user is idle, then stop sending 226 // ping messages. 227 if (!active_ || ping_count_ <= 0) 228 return; 229 230 // Save the current time when we have sent ping message. 231 ping_time_ = base::TimeTicks::Now(); 232 233 // Send a ping message to the watched thread. Callback will be called on 234 // the WatchDogThread. 235 base::Closure callback( 236 base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(), 237 ping_sequence_number_)); 238 if (watched_loop_->PostTask( 239 FROM_HERE, 240 base::Bind(&ThreadWatcher::OnPingMessage, thread_id_, 241 callback))) { 242 // Post a task to check the responsiveness of watched thread. 243 base::MessageLoop::current()->PostDelayedTask( 244 FROM_HERE, 245 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 246 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 247 unresponsive_time_); 248 } else { 249 // Watched thread might have gone away, stop watching it. 250 DeActivateThreadWatching(); 251 } 252} 253 254void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) { 255 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 256 257 // Record watched thread's response time. 258 base::TimeTicks now = base::TimeTicks::Now(); 259 base::TimeDelta response_time = now - ping_time_; 260 response_time_histogram_->AddTime(response_time); 261 262 // Save the current time when we have got pong message. 263 pong_time_ = now; 264 265 // Check if there are any extra pings in flight. 266 DCHECK_EQ(ping_sequence_number_, ping_sequence_number); 267 if (ping_sequence_number_ != ping_sequence_number) 268 return; 269 270 // Increment sequence number for the next ping message to indicate watched 271 // thread is responsive. 272 ++ping_sequence_number_; 273 274 // If we have stopped watching or if the user is idle, then stop sending 275 // ping messages. 276 if (!active_ || --ping_count_ <= 0) 277 return; 278 279 base::MessageLoop::current()->PostDelayedTask( 280 FROM_HERE, 281 base::Bind(&ThreadWatcher::PostPingMessage, 282 weak_ptr_factory_.GetWeakPtr()), 283 sleep_time_); 284} 285 286void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) { 287 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 288 // If we have stopped watching then consider thread as responding. 289 if (!active_) { 290 responsive_ = true; 291 return; 292 } 293 // If the latest ping_sequence_number_ is not same as the ping_sequence_number 294 // that is passed in, then we can assume OnPongMessage was called. 295 // OnPongMessage increments ping_sequence_number_. 296 if (ping_sequence_number_ != ping_sequence_number) { 297 // Reset unresponsive_count_ to zero because we got a response from the 298 // watched thread. 299 ResetHangCounters(); 300 301 responsive_ = true; 302 return; 303 } 304 // Record that we got no response from watched thread. 305 GotNoResponse(); 306 307 // Post a task to check the responsiveness of watched thread. 308 base::MessageLoop::current()->PostDelayedTask( 309 FROM_HERE, 310 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 311 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 312 unresponsive_time_); 313 responsive_ = false; 314} 315 316void ThreadWatcher::Initialize() { 317 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 318 ThreadWatcherList::Register(this); 319 320 const std::string response_time_histogram_name = 321 "ThreadWatcher.ResponseTime." + thread_name_; 322 response_time_histogram_ = base::Histogram::FactoryTimeGet( 323 response_time_histogram_name, 324 base::TimeDelta::FromMilliseconds(1), 325 base::TimeDelta::FromSeconds(100), 50, 326 base::Histogram::kUmaTargetedHistogramFlag); 327 328 const std::string unresponsive_time_histogram_name = 329 "ThreadWatcher.Unresponsive." + thread_name_; 330 unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet( 331 unresponsive_time_histogram_name, 332 base::TimeDelta::FromMilliseconds(1), 333 base::TimeDelta::FromSeconds(100), 50, 334 base::Histogram::kUmaTargetedHistogramFlag); 335 336 const std::string responsive_count_histogram_name = 337 "ThreadWatcher.ResponsiveThreads." + thread_name_; 338 responsive_count_histogram_ = base::LinearHistogram::FactoryGet( 339 responsive_count_histogram_name, 1, 10, 11, 340 base::Histogram::kUmaTargetedHistogramFlag); 341 342 const std::string unresponsive_count_histogram_name = 343 "ThreadWatcher.UnresponsiveThreads." + thread_name_; 344 unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet( 345 unresponsive_count_histogram_name, 1, 10, 11, 346 base::Histogram::kUmaTargetedHistogramFlag); 347} 348 349// static 350void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id, 351 const base::Closure& callback_task) { 352 // This method is called on watched thread. 353 DCHECK(BrowserThread::CurrentlyOn(thread_id)); 354 WatchDogThread::PostTask(FROM_HERE, callback_task); 355} 356 357void ThreadWatcher::ResetHangCounters() { 358 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 359 unresponsive_count_ = 0; 360 hung_processing_complete_ = false; 361} 362 363void ThreadWatcher::GotNoResponse() { 364 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 365 366 ++unresponsive_count_; 367 if (!IsVeryUnresponsive()) 368 return; 369 370 // Record total unresponsive_time since last pong message. 371 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; 372 unresponsive_time_histogram_->AddTime(unresponse_time); 373 374 // We have already collected stats for the non-responding watched thread. 375 if (hung_processing_complete_) 376 return; 377 378 // Record how other threads are responding. 379 uint32 responding_thread_count = 0; 380 uint32 unresponding_thread_count = 0; 381 ThreadWatcherList::GetStatusOfThreads(&responding_thread_count, 382 &unresponding_thread_count); 383 384 // Record how many watched threads are responding. 385 responsive_count_histogram_->Add(responding_thread_count); 386 387 // Record how many watched threads are not responding. 388 unresponsive_count_histogram_->Add(unresponding_thread_count); 389 390 // Crash the browser if the watched thread is to be crashed on hang and if the 391 // number of other threads responding is less than or equal to 392 // live_threads_threshold_ and at least one other thread is responding. 393 if (crash_on_hang_ && 394 responding_thread_count > 0 && 395 responding_thread_count <= live_threads_threshold_) { 396 static bool crashed_once = false; 397 if (!crashed_once) { 398 crashed_once = true; 399 CrashBecauseThreadWasUnresponsive(thread_id_); 400 } 401 } 402 403 hung_processing_complete_ = true; 404} 405 406bool ThreadWatcher::IsVeryUnresponsive() { 407 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 408 return unresponsive_count_ >= unresponsive_threshold_; 409} 410 411// ThreadWatcherList methods and members. 412// 413// static 414ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL; 415// static 416const int ThreadWatcherList::kSleepSeconds = 1; 417// static 418const int ThreadWatcherList::kUnresponsiveSeconds = 2; 419// static 420const int ThreadWatcherList::kUnresponsiveCount = 9; 421// static 422const int ThreadWatcherList::kLiveThreadsThreshold = 2; 423 424ThreadWatcherList::CrashDataThresholds::CrashDataThresholds( 425 uint32 live_threads_threshold, 426 uint32 unresponsive_threshold) 427 : live_threads_threshold(live_threads_threshold), 428 unresponsive_threshold(unresponsive_threshold) { 429} 430 431ThreadWatcherList::CrashDataThresholds::CrashDataThresholds() 432 : live_threads_threshold(kLiveThreadsThreshold), 433 unresponsive_threshold(kUnresponsiveCount) { 434} 435 436// static 437void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) { 438 uint32 unresponsive_threshold; 439 CrashOnHangThreadMap crash_on_hang_threads; 440 ParseCommandLine(command_line, 441 &unresponsive_threshold, 442 &crash_on_hang_threads); 443 444 ThreadWatcherObserver::SetupNotifications( 445 base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold)); 446 447 WatchDogThread::PostDelayedTask( 448 FROM_HERE, 449 base::Bind(&ThreadWatcherList::InitializeAndStartWatching, 450 unresponsive_threshold, 451 crash_on_hang_threads), 452 base::TimeDelta::FromSeconds(120)); 453} 454 455// static 456void ThreadWatcherList::StopWatchingAll() { 457 ThreadWatcherObserver::RemoveNotifications(); 458 DeleteAll(); 459} 460 461// static 462void ThreadWatcherList::Register(ThreadWatcher* watcher) { 463 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 464 if (!g_thread_watcher_list_) 465 return; 466 DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id())); 467 g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher; 468} 469 470// static 471bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) { 472 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 473 return NULL != ThreadWatcherList::Find(thread_id); 474} 475 476// static 477void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count, 478 uint32* unresponding_thread_count) { 479 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 480 *responding_thread_count = 0; 481 *unresponding_thread_count = 0; 482 if (!g_thread_watcher_list_) 483 return; 484 485 for (RegistrationList::iterator it = 486 g_thread_watcher_list_->registered_.begin(); 487 g_thread_watcher_list_->registered_.end() != it; 488 ++it) { 489 if (it->second->IsVeryUnresponsive()) 490 ++(*unresponding_thread_count); 491 else 492 ++(*responding_thread_count); 493 } 494} 495 496// static 497void ThreadWatcherList::WakeUpAll() { 498 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 499 if (!g_thread_watcher_list_) 500 return; 501 502 for (RegistrationList::iterator it = 503 g_thread_watcher_list_->registered_.begin(); 504 g_thread_watcher_list_->registered_.end() != it; 505 ++it) 506 it->second->WakeUp(); 507} 508 509ThreadWatcherList::ThreadWatcherList() { 510 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 511 CHECK(!g_thread_watcher_list_); 512 g_thread_watcher_list_ = this; 513} 514 515ThreadWatcherList::~ThreadWatcherList() { 516 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 517 DCHECK(this == g_thread_watcher_list_); 518 g_thread_watcher_list_ = NULL; 519} 520 521// static 522void ThreadWatcherList::ParseCommandLine( 523 const CommandLine& command_line, 524 uint32* unresponsive_threshold, 525 CrashOnHangThreadMap* crash_on_hang_threads) { 526 // Initialize |unresponsive_threshold| to a default value. 527 *unresponsive_threshold = kUnresponsiveCount; 528 529 // Increase the unresponsive_threshold on the Stable and Beta channels to 530 // reduce the number of crashes due to ThreadWatcher. 531 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 532 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 533 *unresponsive_threshold *= 4; 534 } else if (channel == chrome::VersionInfo::CHANNEL_BETA) { 535 *unresponsive_threshold *= 2; 536 } 537 538#if defined(OS_WIN) 539 // For Windows XP (old systems), double the unresponsive_threshold to give 540 // the OS a chance to schedule UI/IO threads a time slice to respond with a 541 // pong message (to get around limitations with the OS). 542 if (base::win::GetVersion() <= base::win::VERSION_XP) 543 *unresponsive_threshold *= 2; 544#endif 545 546 uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds; 547 std::string crash_on_hang_thread_names; 548 bool has_command_line_overwrite = false; 549 if (command_line.HasSwitch(switches::kCrashOnHangThreads)) { 550 crash_on_hang_thread_names = 551 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); 552 has_command_line_overwrite = true; 553 } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) { 554 // Default to crashing the browser if UI or IO or FILE threads are not 555 // responsive except in stable channel. 556 crash_on_hang_thread_names = base::StringPrintf( 557 "UI:%d:%d,IO:%d:%d,FILE:%d:%d", 558 kLiveThreadsThreshold, crash_seconds, 559 kLiveThreadsThreshold, crash_seconds, 560 kLiveThreadsThreshold, crash_seconds * 5); 561 } 562 563 ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names, 564 kLiveThreadsThreshold, 565 crash_seconds, 566 crash_on_hang_threads); 567 568 if (channel != chrome::VersionInfo::CHANNEL_CANARY || 569 has_command_line_overwrite) { 570 return; 571 } 572 573 // Set up a field trial for 100% of the users to crash if either UI or IO 574 // thread is not responsive for 30 seconds (or 15 pings). 575 scoped_refptr<base::FieldTrial> field_trial( 576 base::FieldTrialList::FactoryGetFieldTrial( 577 "ThreadWatcher", 100, "default_hung_threads", 578 2013, 10, 30, NULL)); 579 int hung_thread_group = field_trial->AppendGroup("hung_thread", 100); 580 if (field_trial->group() == hung_thread_group) { 581 for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin(); 582 crash_on_hang_threads->end() != it; 583 ++it) { 584 if (it->first != "IO") 585 continue; 586 it->second.live_threads_threshold = INT_MAX; 587 it->second.unresponsive_threshold = 15; 588 } 589 } 590} 591 592// static 593void ThreadWatcherList::ParseCommandLineCrashOnHangThreads( 594 const std::string& crash_on_hang_thread_names, 595 uint32 default_live_threads_threshold, 596 uint32 default_crash_seconds, 597 CrashOnHangThreadMap* crash_on_hang_threads) { 598 base::StringTokenizer tokens(crash_on_hang_thread_names, ","); 599 std::vector<std::string> values; 600 while (tokens.GetNext()) { 601 const std::string& token = tokens.token(); 602 base::SplitString(token, ':', &values); 603 std::string thread_name = values[0]; 604 605 uint32 live_threads_threshold = default_live_threads_threshold; 606 uint32 crash_seconds = default_crash_seconds; 607 if (values.size() >= 2 && 608 (!base::StringToUint(values[1], &live_threads_threshold))) { 609 continue; 610 } 611 if (values.size() >= 3 && 612 (!base::StringToUint(values[2], &crash_seconds))) { 613 continue; 614 } 615 uint32 unresponsive_threshold = static_cast<uint32>( 616 ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); 617 618 CrashDataThresholds crash_data(live_threads_threshold, 619 unresponsive_threshold); 620 // Use the last specifier. 621 (*crash_on_hang_threads)[thread_name] = crash_data; 622 } 623} 624 625// static 626void ThreadWatcherList::InitializeAndStartWatching( 627 uint32 unresponsive_threshold, 628 const CrashOnHangThreadMap& crash_on_hang_threads) { 629 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 630 631 ThreadWatcherList* thread_watcher_list = new ThreadWatcherList(); 632 CHECK(thread_watcher_list); 633 634 BrowserThread::PostTask( 635 BrowserThread::UI, 636 FROM_HERE, 637 base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb)); 638 639 const base::TimeDelta kSleepTime = 640 base::TimeDelta::FromSeconds(kSleepSeconds); 641 const base::TimeDelta kUnresponsiveTime = 642 base::TimeDelta::FromSeconds(kUnresponsiveSeconds); 643 644 StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime, 645 unresponsive_threshold, crash_on_hang_threads); 646 StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime, 647 unresponsive_threshold, crash_on_hang_threads); 648 StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime, 649 unresponsive_threshold, crash_on_hang_threads); 650 StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime, 651 unresponsive_threshold, crash_on_hang_threads); 652 StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime, 653 unresponsive_threshold, crash_on_hang_threads); 654} 655 656// static 657void ThreadWatcherList::StartWatching( 658 const BrowserThread::ID& thread_id, 659 const std::string& thread_name, 660 const base::TimeDelta& sleep_time, 661 const base::TimeDelta& unresponsive_time, 662 uint32 unresponsive_threshold, 663 const CrashOnHangThreadMap& crash_on_hang_threads) { 664 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 665 666 CrashOnHangThreadMap::const_iterator it = 667 crash_on_hang_threads.find(thread_name); 668 bool crash_on_hang = false; 669 uint32 live_threads_threshold = 0; 670 if (it != crash_on_hang_threads.end()) { 671 crash_on_hang = true; 672 live_threads_threshold = it->second.live_threads_threshold; 673 unresponsive_threshold = it->second.unresponsive_threshold; 674 } 675 676 ThreadWatcher::StartWatching( 677 ThreadWatcher::WatchingParams(thread_id, 678 thread_name, 679 sleep_time, 680 unresponsive_time, 681 unresponsive_threshold, 682 crash_on_hang, 683 live_threads_threshold)); 684} 685 686// static 687void ThreadWatcherList::DeleteAll() { 688 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 689 WatchDogThread::PostTask( 690 FROM_HERE, 691 base::Bind(&ThreadWatcherList::DeleteAll)); 692 return; 693 } 694 695 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 696 if (!g_thread_watcher_list_) 697 return; 698 699 // Delete all thread watcher objects. 700 while (!g_thread_watcher_list_->registered_.empty()) { 701 RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin(); 702 delete it->second; 703 g_thread_watcher_list_->registered_.erase(it); 704 } 705 706 delete g_thread_watcher_list_; 707} 708 709// static 710ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) { 711 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 712 if (!g_thread_watcher_list_) 713 return NULL; 714 RegistrationList::iterator it = 715 g_thread_watcher_list_->registered_.find(thread_id); 716 if (g_thread_watcher_list_->registered_.end() == it) 717 return NULL; 718 return it->second; 719} 720 721// ThreadWatcherObserver methods and members. 722// 723// static 724ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL; 725 726ThreadWatcherObserver::ThreadWatcherObserver( 727 const base::TimeDelta& wakeup_interval) 728 : last_wakeup_time_(base::TimeTicks::Now()), 729 wakeup_interval_(wakeup_interval) { 730 CHECK(!g_thread_watcher_observer_); 731 g_thread_watcher_observer_ = this; 732} 733 734ThreadWatcherObserver::~ThreadWatcherObserver() { 735 DCHECK(this == g_thread_watcher_observer_); 736 g_thread_watcher_observer_ = NULL; 737} 738 739// static 740void ThreadWatcherObserver::SetupNotifications( 741 const base::TimeDelta& wakeup_interval) { 742 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 743 ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval); 744 MetricsService::SetUpNotifications(&observer->registrar_, observer); 745} 746 747// static 748void ThreadWatcherObserver::RemoveNotifications() { 749 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 750 if (!g_thread_watcher_observer_) 751 return; 752 g_thread_watcher_observer_->registrar_.RemoveAll(); 753 delete g_thread_watcher_observer_; 754} 755 756void ThreadWatcherObserver::Observe( 757 int type, 758 const content::NotificationSource& source, 759 const content::NotificationDetails& details) { 760 // There is some user activity, see if thread watchers are to be awakened. 761 base::TimeTicks now = base::TimeTicks::Now(); 762 if ((now - last_wakeup_time_) < wakeup_interval_) 763 return; 764 last_wakeup_time_ = now; 765 WatchDogThread::PostTask( 766 FROM_HERE, 767 base::Bind(&ThreadWatcherList::WakeUpAll)); 768} 769 770// WatchDogThread methods and members. 771 772// This lock protects g_watchdog_thread. 773static base::LazyInstance<base::Lock>::Leaky 774 g_watchdog_lock = LAZY_INSTANCE_INITIALIZER; 775 776// The singleton of this class. 777static WatchDogThread* g_watchdog_thread = NULL; 778 779WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") { 780} 781 782WatchDogThread::~WatchDogThread() { 783 Stop(); 784} 785 786// static 787bool WatchDogThread::CurrentlyOnWatchDogThread() { 788 base::AutoLock lock(g_watchdog_lock.Get()); 789 return g_watchdog_thread && 790 g_watchdog_thread->message_loop() == base::MessageLoop::current(); 791} 792 793// static 794bool WatchDogThread::PostTask(const tracked_objects::Location& from_here, 795 const base::Closure& task) { 796 return PostTaskHelper(from_here, task, base::TimeDelta()); 797} 798 799// static 800bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here, 801 const base::Closure& task, 802 base::TimeDelta delay) { 803 return PostTaskHelper(from_here, task, delay); 804} 805 806// static 807bool WatchDogThread::PostTaskHelper( 808 const tracked_objects::Location& from_here, 809 const base::Closure& task, 810 base::TimeDelta delay) { 811 { 812 base::AutoLock lock(g_watchdog_lock.Get()); 813 814 base::MessageLoop* message_loop = g_watchdog_thread ? 815 g_watchdog_thread->message_loop() : NULL; 816 if (message_loop) { 817 message_loop->PostDelayedTask(from_here, task, delay); 818 return true; 819 } 820 } 821 822 return false; 823} 824 825void WatchDogThread::Init() { 826 // This thread shouldn't be allowed to perform any blocking disk I/O. 827 base::ThreadRestrictions::SetIOAllowed(false); 828 829 base::AutoLock lock(g_watchdog_lock.Get()); 830 CHECK(!g_watchdog_thread); 831 g_watchdog_thread = this; 832} 833 834void WatchDogThread::CleanUp() { 835 base::AutoLock lock(g_watchdog_lock.Get()); 836 g_watchdog_thread = NULL; 837} 838 839namespace { 840 841// StartupWatchDogThread methods and members. 842// 843// Class for detecting hangs during startup. 844class StartupWatchDogThread : public base::Watchdog { 845 public: 846 // Constructor specifies how long the StartupWatchDogThread will wait before 847 // alarming. 848 explicit StartupWatchDogThread(const base::TimeDelta& duration) 849 : base::Watchdog(duration, "Startup watchdog thread", true) { 850 } 851 852 // Alarm is called if the time expires after an Arm() without someone calling 853 // Disarm(). When Alarm goes off, in release mode we get the crash dump 854 // without crashing and in debug mode we break into the debugger. 855 virtual void Alarm() OVERRIDE { 856#ifndef NDEBUG 857 DCHECK(false); 858#else 859 logging::DumpWithoutCrashing(); 860#endif 861 } 862 863 DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread); 864}; 865 866// ShutdownWatchDogThread methods and members. 867// 868// Class for detecting hangs during shutdown. 869class ShutdownWatchDogThread : public base::Watchdog { 870 public: 871 // Constructor specifies how long the ShutdownWatchDogThread will wait before 872 // alarming. 873 explicit ShutdownWatchDogThread(const base::TimeDelta& duration) 874 : base::Watchdog(duration, "Shutdown watchdog thread", true) { 875 } 876 877 // Alarm is called if the time expires after an Arm() without someone calling 878 // Disarm(). We crash the browser if this method is called. 879 virtual void Alarm() OVERRIDE { 880 ShutdownCrash(); 881 } 882 883 DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread); 884}; 885} // namespace 886 887// StartupTimeBomb methods and members. 888// 889// static 890StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL; 891 892StartupTimeBomb::StartupTimeBomb() 893 : startup_watchdog_(NULL), 894 thread_id_(base::PlatformThread::CurrentId()) { 895 CHECK(!g_startup_timebomb_); 896 g_startup_timebomb_ = this; 897} 898 899StartupTimeBomb::~StartupTimeBomb() { 900 DCHECK(this == g_startup_timebomb_); 901 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 902 if (startup_watchdog_) 903 Disarm(); 904 g_startup_timebomb_ = NULL; 905} 906 907void StartupTimeBomb::Arm(const base::TimeDelta& duration) { 908 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 909 DCHECK(!startup_watchdog_); 910 // TODO(rtenneti): http://crbug.com/112970. Don't arm the startup timebomb 911 // until we fix breakpad code not to crash in logging::DumpWithoutCrashing(). 912 // startup_watchdog_ = new StartupWatchDogThread(duration); 913 // startup_watchdog_->Arm(); 914 return; 915} 916 917void StartupTimeBomb::Disarm() { 918 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 919 if (startup_watchdog_) { 920 startup_watchdog_->Disarm(); 921 startup_watchdog_->Cleanup(); 922 DeleteStartupWatchdog(); 923 } 924} 925 926void StartupTimeBomb::DeleteStartupWatchdog() { 927 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 928 if (startup_watchdog_->IsJoinable()) { 929 // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns 930 // very fast. 931 base::ThreadRestrictions::SetIOAllowed(true); 932 delete startup_watchdog_; 933 startup_watchdog_ = NULL; 934 return; 935 } 936 base::MessageLoop::current()->PostDelayedTask( 937 FROM_HERE, 938 base::Bind(&StartupTimeBomb::DeleteStartupWatchdog, 939 base::Unretained(this)), 940 base::TimeDelta::FromSeconds(10)); 941} 942 943// static 944void StartupTimeBomb::DisarmStartupTimeBomb() { 945 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 946 if (g_startup_timebomb_) 947 g_startup_timebomb_->Disarm(); 948} 949 950// ShutdownWatcherHelper methods and members. 951// 952// ShutdownWatcherHelper is a wrapper class for detecting hangs during 953// shutdown. 954ShutdownWatcherHelper::ShutdownWatcherHelper() 955 : shutdown_watchdog_(NULL), 956 thread_id_(base::PlatformThread::CurrentId()) { 957} 958 959ShutdownWatcherHelper::~ShutdownWatcherHelper() { 960 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 961 if (shutdown_watchdog_) { 962 shutdown_watchdog_->Disarm(); 963 delete shutdown_watchdog_; 964 shutdown_watchdog_ = NULL; 965 } 966} 967 968void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) { 969 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 970 DCHECK(!shutdown_watchdog_); 971 base::TimeDelta actual_duration = duration; 972 973 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 974 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 975 actual_duration *= 20; 976 } else if (channel == chrome::VersionInfo::CHANNEL_BETA || 977 channel == chrome::VersionInfo::CHANNEL_DEV) { 978 actual_duration *= 10; 979 } 980 981#if defined(OS_WIN) 982 // On Windows XP, give twice the time for shutdown. 983 if (base::win::GetVersion() <= base::win::VERSION_XP) 984 actual_duration *= 2; 985#endif 986 987 shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration); 988 shutdown_watchdog_->Arm(); 989} 990