thread_watcher.cc revision e5d81f57cb97b3b6b7fccc9c5610d21eb81db09d
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/metrics/thread_watcher.h" 6 7#include <math.h> // ceil 8 9#include "base/bind.h" 10#include "base/compiler_specific.h" 11#include "base/debug/alias.h" 12#include "base/debug/dump_without_crashing.h" 13#include "base/lazy_instance.h" 14#include "base/strings/string_number_conversions.h" 15#include "base/strings/string_split.h" 16#include "base/strings/string_tokenizer.h" 17#include "base/strings/stringprintf.h" 18#include "base/threading/thread_restrictions.h" 19#include "build/build_config.h" 20#include "chrome/browser/metrics/metrics_service.h" 21#include "chrome/common/chrome_switches.h" 22#include "chrome/common/chrome_version_info.h" 23#include "chrome/common/logging_chrome.h" 24 25#if defined(OS_WIN) 26#include "base/win/windows_version.h" 27#endif 28 29using content::BrowserThread; 30 31namespace { 32 33// The following are unique function names for forcing the crash when a thread 34// is unresponsive. This makes it possible to tell from the callstack alone what 35// thread was unresponsive. 36// 37// We disable optimizations for this block of functions so the compiler doesn't 38// merge them all together. 39MSVC_DISABLE_OPTIMIZE() 40MSVC_PUSH_DISABLE_WARNING(4748) 41 42#ifndef NDEBUG 43int* NullPointer() { 44 return reinterpret_cast<int*>(NULL); 45} 46#endif 47 48void NullPointerCrash(int line_number) { 49#ifndef NDEBUG 50 *NullPointer() = line_number; // Crash. 51#else 52 base::debug::DumpWithoutCrashing(); 53#endif 54} 55 56NOINLINE void ShutdownCrash() { 57 NullPointerCrash(__LINE__); 58} 59 60NOINLINE void ThreadUnresponsive_UI() { 61 NullPointerCrash(__LINE__); 62} 63 64NOINLINE void ThreadUnresponsive_DB() { 65 NullPointerCrash(__LINE__); 66} 67 68NOINLINE void ThreadUnresponsive_FILE() { 69 NullPointerCrash(__LINE__); 70} 71 72NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() { 73 NullPointerCrash(__LINE__); 74} 75 76NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() { 77 NullPointerCrash(__LINE__); 78} 79 80NOINLINE void ThreadUnresponsive_CACHE() { 81 NullPointerCrash(__LINE__); 82} 83 84NOINLINE void ThreadUnresponsive_IO() { 85 NullPointerCrash(__LINE__); 86} 87 88MSVC_POP_WARNING() 89MSVC_ENABLE_OPTIMIZE(); 90 91void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) { 92 base::debug::Alias(&thread_id); 93 94 switch (thread_id) { 95 case BrowserThread::UI: 96 return ThreadUnresponsive_UI(); 97 case BrowserThread::DB: 98 return ThreadUnresponsive_DB(); 99 case BrowserThread::FILE: 100 return ThreadUnresponsive_FILE(); 101 case BrowserThread::FILE_USER_BLOCKING: 102 return ThreadUnresponsive_FILE_USER_BLOCKING(); 103 case BrowserThread::PROCESS_LAUNCHER: 104 return ThreadUnresponsive_PROCESS_LAUNCHER(); 105 case BrowserThread::CACHE: 106 return ThreadUnresponsive_CACHE(); 107 case BrowserThread::IO: 108 return ThreadUnresponsive_IO(); 109 case BrowserThread::ID_COUNT: 110 CHECK(false); // This shouldn't actually be reached! 111 break; 112 113 // Omission of the default hander is intentional -- that way the compiler 114 // should warn if our switch becomes outdated. 115 } 116 117 CHECK(false) << "Unknown thread was unresponsive."; // Shouldn't be reached. 118} 119 120} // namespace 121 122// ThreadWatcher methods and members. 123ThreadWatcher::ThreadWatcher(const WatchingParams& params) 124 : thread_id_(params.thread_id), 125 thread_name_(params.thread_name), 126 watched_loop_( 127 BrowserThread::GetMessageLoopProxyForThread(params.thread_id)), 128 sleep_time_(params.sleep_time), 129 unresponsive_time_(params.unresponsive_time), 130 ping_time_(base::TimeTicks::Now()), 131 pong_time_(ping_time_), 132 ping_sequence_number_(0), 133 active_(false), 134 ping_count_(params.unresponsive_threshold), 135 response_time_histogram_(NULL), 136 unresponsive_time_histogram_(NULL), 137 unresponsive_count_(0), 138 hung_processing_complete_(false), 139 unresponsive_threshold_(params.unresponsive_threshold), 140 crash_on_hang_(params.crash_on_hang), 141 live_threads_threshold_(params.live_threads_threshold), 142 weak_ptr_factory_(this) { 143 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 144 Initialize(); 145} 146 147ThreadWatcher::~ThreadWatcher() {} 148 149// static 150void ThreadWatcher::StartWatching(const WatchingParams& params) { 151 DCHECK_GE(params.sleep_time.InMilliseconds(), 0); 152 DCHECK_GE(params.unresponsive_time.InMilliseconds(), 153 params.sleep_time.InMilliseconds()); 154 155 // If we are not on WatchDogThread, then post a task to call StartWatching on 156 // WatchDogThread. 157 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 158 WatchDogThread::PostTask( 159 FROM_HERE, 160 base::Bind(&ThreadWatcher::StartWatching, params)); 161 return; 162 } 163 164 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 165 166 // Create a new thread watcher object for the given thread and activate it. 167 ThreadWatcher* watcher = new ThreadWatcher(params); 168 169 DCHECK(watcher); 170 // If we couldn't register the thread watcher object, we are shutting down, 171 // then don't activate thread watching. 172 if (!ThreadWatcherList::IsRegistered(params.thread_id)) 173 return; 174 watcher->ActivateThreadWatching(); 175} 176 177void ThreadWatcher::ActivateThreadWatching() { 178 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 179 if (active_) return; 180 active_ = true; 181 ping_count_ = unresponsive_threshold_; 182 ResetHangCounters(); 183 base::MessageLoop::current()->PostTask( 184 FROM_HERE, 185 base::Bind(&ThreadWatcher::PostPingMessage, 186 weak_ptr_factory_.GetWeakPtr())); 187} 188 189void ThreadWatcher::DeActivateThreadWatching() { 190 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 191 active_ = false; 192 ping_count_ = 0; 193 weak_ptr_factory_.InvalidateWeakPtrs(); 194} 195 196void ThreadWatcher::WakeUp() { 197 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 198 // There is some user activity, PostPingMessage task of thread watcher if 199 // needed. 200 if (!active_) return; 201 202 // Throw away the previous |unresponsive_count_| and start over again. Just 203 // before going to sleep, |unresponsive_count_| could be very close to 204 // |unresponsive_threshold_| and when user becomes active, 205 // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no 206 // response for ping messages. Reset |unresponsive_count_| to start measuring 207 // the unresponsiveness of the threads when system becomes active. 208 unresponsive_count_ = 0; 209 210 if (ping_count_ <= 0) { 211 ping_count_ = unresponsive_threshold_; 212 ResetHangCounters(); 213 PostPingMessage(); 214 } else { 215 ping_count_ = unresponsive_threshold_; 216 } 217} 218 219void ThreadWatcher::PostPingMessage() { 220 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 221 // If we have stopped watching or if the user is idle, then stop sending 222 // ping messages. 223 if (!active_ || ping_count_ <= 0) 224 return; 225 226 // Save the current time when we have sent ping message. 227 ping_time_ = base::TimeTicks::Now(); 228 229 // Send a ping message to the watched thread. Callback will be called on 230 // the WatchDogThread. 231 base::Closure callback( 232 base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(), 233 ping_sequence_number_)); 234 if (watched_loop_->PostTask( 235 FROM_HERE, 236 base::Bind(&ThreadWatcher::OnPingMessage, thread_id_, 237 callback))) { 238 // Post a task to check the responsiveness of watched thread. 239 base::MessageLoop::current()->PostDelayedTask( 240 FROM_HERE, 241 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 242 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 243 unresponsive_time_); 244 } else { 245 // Watched thread might have gone away, stop watching it. 246 DeActivateThreadWatching(); 247 } 248} 249 250void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) { 251 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 252 253 // Record watched thread's response time. 254 base::TimeTicks now = base::TimeTicks::Now(); 255 base::TimeDelta response_time = now - ping_time_; 256 response_time_histogram_->AddTime(response_time); 257 258 // Save the current time when we have got pong message. 259 pong_time_ = now; 260 261 // Check if there are any extra pings in flight. 262 DCHECK_EQ(ping_sequence_number_, ping_sequence_number); 263 if (ping_sequence_number_ != ping_sequence_number) 264 return; 265 266 // Increment sequence number for the next ping message to indicate watched 267 // thread is responsive. 268 ++ping_sequence_number_; 269 270 // If we have stopped watching or if the user is idle, then stop sending 271 // ping messages. 272 if (!active_ || --ping_count_ <= 0) 273 return; 274 275 base::MessageLoop::current()->PostDelayedTask( 276 FROM_HERE, 277 base::Bind(&ThreadWatcher::PostPingMessage, 278 weak_ptr_factory_.GetWeakPtr()), 279 sleep_time_); 280} 281 282void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) { 283 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 284 // If we have stopped watching then consider thread as responding. 285 if (!active_) { 286 responsive_ = true; 287 return; 288 } 289 // If the latest ping_sequence_number_ is not same as the ping_sequence_number 290 // that is passed in, then we can assume OnPongMessage was called. 291 // OnPongMessage increments ping_sequence_number_. 292 if (ping_sequence_number_ != ping_sequence_number) { 293 // Reset unresponsive_count_ to zero because we got a response from the 294 // watched thread. 295 ResetHangCounters(); 296 297 responsive_ = true; 298 return; 299 } 300 // Record that we got no response from watched thread. 301 GotNoResponse(); 302 303 // Post a task to check the responsiveness of watched thread. 304 base::MessageLoop::current()->PostDelayedTask( 305 FROM_HERE, 306 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 307 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 308 unresponsive_time_); 309 responsive_ = false; 310} 311 312void ThreadWatcher::Initialize() { 313 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 314 ThreadWatcherList::Register(this); 315 316 const std::string response_time_histogram_name = 317 "ThreadWatcher.ResponseTime." + thread_name_; 318 response_time_histogram_ = base::Histogram::FactoryTimeGet( 319 response_time_histogram_name, 320 base::TimeDelta::FromMilliseconds(1), 321 base::TimeDelta::FromSeconds(100), 50, 322 base::Histogram::kUmaTargetedHistogramFlag); 323 324 const std::string unresponsive_time_histogram_name = 325 "ThreadWatcher.Unresponsive." + thread_name_; 326 unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet( 327 unresponsive_time_histogram_name, 328 base::TimeDelta::FromMilliseconds(1), 329 base::TimeDelta::FromSeconds(100), 50, 330 base::Histogram::kUmaTargetedHistogramFlag); 331 332 const std::string responsive_count_histogram_name = 333 "ThreadWatcher.ResponsiveThreads." + thread_name_; 334 responsive_count_histogram_ = base::LinearHistogram::FactoryGet( 335 responsive_count_histogram_name, 1, 10, 11, 336 base::Histogram::kUmaTargetedHistogramFlag); 337 338 const std::string unresponsive_count_histogram_name = 339 "ThreadWatcher.UnresponsiveThreads." + thread_name_; 340 unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet( 341 unresponsive_count_histogram_name, 1, 10, 11, 342 base::Histogram::kUmaTargetedHistogramFlag); 343} 344 345// static 346void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id, 347 const base::Closure& callback_task) { 348 // This method is called on watched thread. 349 DCHECK(BrowserThread::CurrentlyOn(thread_id)); 350 WatchDogThread::PostTask(FROM_HERE, callback_task); 351} 352 353void ThreadWatcher::ResetHangCounters() { 354 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 355 unresponsive_count_ = 0; 356 hung_processing_complete_ = false; 357} 358 359void ThreadWatcher::GotNoResponse() { 360 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 361 362 ++unresponsive_count_; 363 if (!IsVeryUnresponsive()) 364 return; 365 366 // Record total unresponsive_time since last pong message. 367 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; 368 unresponsive_time_histogram_->AddTime(unresponse_time); 369 370 // We have already collected stats for the non-responding watched thread. 371 if (hung_processing_complete_) 372 return; 373 374 // Record how other threads are responding. 375 uint32 responding_thread_count = 0; 376 uint32 unresponding_thread_count = 0; 377 ThreadWatcherList::GetStatusOfThreads(&responding_thread_count, 378 &unresponding_thread_count); 379 380 // Record how many watched threads are responding. 381 responsive_count_histogram_->Add(responding_thread_count); 382 383 // Record how many watched threads are not responding. 384 unresponsive_count_histogram_->Add(unresponding_thread_count); 385 386 // Crash the browser if the watched thread is to be crashed on hang and if the 387 // number of other threads responding is less than or equal to 388 // live_threads_threshold_ and at least one other thread is responding. 389 if (crash_on_hang_ && 390 responding_thread_count > 0 && 391 responding_thread_count <= live_threads_threshold_) { 392 static bool crashed_once = false; 393 if (!crashed_once) { 394 crashed_once = true; 395 CrashBecauseThreadWasUnresponsive(thread_id_); 396 } 397 } 398 399 hung_processing_complete_ = true; 400} 401 402bool ThreadWatcher::IsVeryUnresponsive() { 403 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 404 return unresponsive_count_ >= unresponsive_threshold_; 405} 406 407// ThreadWatcherList methods and members. 408// 409// static 410ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL; 411// static 412bool ThreadWatcherList::g_stopped_ = false; 413// static 414const int ThreadWatcherList::kSleepSeconds = 1; 415// static 416const int ThreadWatcherList::kUnresponsiveSeconds = 2; 417// static 418const int ThreadWatcherList::kUnresponsiveCount = 9; 419// static 420const int ThreadWatcherList::kLiveThreadsThreshold = 2; 421// static, non-const for tests. 422int ThreadWatcherList::g_initialize_delay_seconds = 120; 423 424ThreadWatcherList::CrashDataThresholds::CrashDataThresholds( 425 uint32 live_threads_threshold, 426 uint32 unresponsive_threshold) 427 : live_threads_threshold(live_threads_threshold), 428 unresponsive_threshold(unresponsive_threshold) { 429} 430 431ThreadWatcherList::CrashDataThresholds::CrashDataThresholds() 432 : live_threads_threshold(kLiveThreadsThreshold), 433 unresponsive_threshold(kUnresponsiveCount) { 434} 435 436// static 437void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) { 438 // TODO(rtenneti): Enable ThreadWatcher. 439 uint32 unresponsive_threshold; 440 CrashOnHangThreadMap crash_on_hang_threads; 441 ParseCommandLine(command_line, 442 &unresponsive_threshold, 443 &crash_on_hang_threads); 444 445 ThreadWatcherObserver::SetupNotifications( 446 base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold)); 447 448 WatchDogThread::PostTask( 449 FROM_HERE, 450 base::Bind(&ThreadWatcherList::SetStopped, false)); 451 452 WatchDogThread::PostDelayedTask( 453 FROM_HERE, 454 base::Bind(&ThreadWatcherList::InitializeAndStartWatching, 455 unresponsive_threshold, 456 crash_on_hang_threads), 457 base::TimeDelta::FromSeconds(g_initialize_delay_seconds)); 458} 459 460// static 461void ThreadWatcherList::StopWatchingAll() { 462 // TODO(rtenneti): Enable ThreadWatcher. 463 ThreadWatcherObserver::RemoveNotifications(); 464 DeleteAll(); 465} 466 467// static 468void ThreadWatcherList::Register(ThreadWatcher* watcher) { 469 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 470 if (!g_thread_watcher_list_) 471 return; 472 DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id())); 473 g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher; 474} 475 476// static 477bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) { 478 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 479 return NULL != ThreadWatcherList::Find(thread_id); 480} 481 482// static 483void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count, 484 uint32* unresponding_thread_count) { 485 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 486 *responding_thread_count = 0; 487 *unresponding_thread_count = 0; 488 if (!g_thread_watcher_list_) 489 return; 490 491 for (RegistrationList::iterator it = 492 g_thread_watcher_list_->registered_.begin(); 493 g_thread_watcher_list_->registered_.end() != it; 494 ++it) { 495 if (it->second->IsVeryUnresponsive()) 496 ++(*unresponding_thread_count); 497 else 498 ++(*responding_thread_count); 499 } 500} 501 502// static 503void ThreadWatcherList::WakeUpAll() { 504 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 505 if (!g_thread_watcher_list_) 506 return; 507 508 for (RegistrationList::iterator it = 509 g_thread_watcher_list_->registered_.begin(); 510 g_thread_watcher_list_->registered_.end() != it; 511 ++it) 512 it->second->WakeUp(); 513} 514 515ThreadWatcherList::ThreadWatcherList() { 516 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 517 CHECK(!g_thread_watcher_list_); 518 g_thread_watcher_list_ = this; 519} 520 521ThreadWatcherList::~ThreadWatcherList() { 522 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 523 DCHECK(this == g_thread_watcher_list_); 524 g_thread_watcher_list_ = NULL; 525} 526 527// static 528void ThreadWatcherList::ParseCommandLine( 529 const CommandLine& command_line, 530 uint32* unresponsive_threshold, 531 CrashOnHangThreadMap* crash_on_hang_threads) { 532 // Initialize |unresponsive_threshold| to a default value. 533 *unresponsive_threshold = kUnresponsiveCount; 534 535 // Increase the unresponsive_threshold on the Stable and Beta channels to 536 // reduce the number of crashes due to ThreadWatcher. 537 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 538 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 539 *unresponsive_threshold *= 4; 540 } else if (channel == chrome::VersionInfo::CHANNEL_BETA) { 541 *unresponsive_threshold *= 2; 542 } 543 544#if defined(OS_WIN) 545 // For Windows XP (old systems), double the unresponsive_threshold to give 546 // the OS a chance to schedule UI/IO threads a time slice to respond with a 547 // pong message (to get around limitations with the OS). 548 if (base::win::GetVersion() <= base::win::VERSION_XP) 549 *unresponsive_threshold *= 2; 550#endif 551 552 uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds; 553 std::string crash_on_hang_thread_names; 554 bool has_command_line_overwrite = false; 555 if (command_line.HasSwitch(switches::kCrashOnHangThreads)) { 556 crash_on_hang_thread_names = 557 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); 558 has_command_line_overwrite = true; 559 } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) { 560 // Default to crashing the browser if UI or IO or FILE threads are not 561 // responsive except in stable channel. 562 crash_on_hang_thread_names = base::StringPrintf( 563 "UI:%d:%d,IO:%d:%d,FILE:%d:%d", 564 kLiveThreadsThreshold, crash_seconds, 565 kLiveThreadsThreshold, crash_seconds, 566 kLiveThreadsThreshold, crash_seconds * 5); 567 } 568 569 ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names, 570 kLiveThreadsThreshold, 571 crash_seconds, 572 crash_on_hang_threads); 573 574 if (channel != chrome::VersionInfo::CHANNEL_CANARY || 575 has_command_line_overwrite) { 576 return; 577 } 578 579 // Set up a field trial for 100% of the users to crash if either UI or IO 580 // thread is not responsive for 30 seconds (or 15 pings). 581 scoped_refptr<base::FieldTrial> field_trial( 582 base::FieldTrialList::FactoryGetFieldTrial( 583 "ThreadWatcher", 100, "default_hung_threads", 584 2014, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL)); 585 int hung_thread_group = field_trial->AppendGroup("hung_thread", 100); 586 if (field_trial->group() == hung_thread_group) { 587 for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin(); 588 crash_on_hang_threads->end() != it; 589 ++it) { 590 if (it->first == "FILE") 591 continue; 592 it->second.live_threads_threshold = INT_MAX; 593 if (it->first == "UI") { 594 // TODO(rtenneti): set unresponsive threshold to 120 seconds to catch 595 // the worst UI hangs and for fewer crashes due to ThreadWatcher. Reduce 596 // it to a more reasonable time ala IO thread. 597 it->second.unresponsive_threshold = 60; 598 } else { 599 it->second.unresponsive_threshold = 15; 600 } 601 } 602 } 603} 604 605// static 606void ThreadWatcherList::ParseCommandLineCrashOnHangThreads( 607 const std::string& crash_on_hang_thread_names, 608 uint32 default_live_threads_threshold, 609 uint32 default_crash_seconds, 610 CrashOnHangThreadMap* crash_on_hang_threads) { 611 base::StringTokenizer tokens(crash_on_hang_thread_names, ","); 612 std::vector<std::string> values; 613 while (tokens.GetNext()) { 614 const std::string& token = tokens.token(); 615 base::SplitString(token, ':', &values); 616 std::string thread_name = values[0]; 617 618 uint32 live_threads_threshold = default_live_threads_threshold; 619 uint32 crash_seconds = default_crash_seconds; 620 if (values.size() >= 2 && 621 (!base::StringToUint(values[1], &live_threads_threshold))) { 622 continue; 623 } 624 if (values.size() >= 3 && 625 (!base::StringToUint(values[2], &crash_seconds))) { 626 continue; 627 } 628 uint32 unresponsive_threshold = static_cast<uint32>( 629 ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); 630 631 CrashDataThresholds crash_data(live_threads_threshold, 632 unresponsive_threshold); 633 // Use the last specifier. 634 (*crash_on_hang_threads)[thread_name] = crash_data; 635 } 636} 637 638// static 639void ThreadWatcherList::InitializeAndStartWatching( 640 uint32 unresponsive_threshold, 641 const CrashOnHangThreadMap& crash_on_hang_threads) { 642 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 643 644 // This method is deferred in relationship to its StopWatchingAll() 645 // counterpart. If a previous initialization has already happened, or if 646 // stop has been called, there's nothing left to do here. 647 if (g_thread_watcher_list_ || g_stopped_) 648 return; 649 650 ThreadWatcherList* thread_watcher_list = new ThreadWatcherList(); 651 CHECK(thread_watcher_list); 652 653 BrowserThread::PostTask( 654 BrowserThread::UI, 655 FROM_HERE, 656 base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb)); 657 658 const base::TimeDelta kSleepTime = 659 base::TimeDelta::FromSeconds(kSleepSeconds); 660 const base::TimeDelta kUnresponsiveTime = 661 base::TimeDelta::FromSeconds(kUnresponsiveSeconds); 662 663 StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime, 664 unresponsive_threshold, crash_on_hang_threads); 665 StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime, 666 unresponsive_threshold, crash_on_hang_threads); 667 StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime, 668 unresponsive_threshold, crash_on_hang_threads); 669 StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime, 670 unresponsive_threshold, crash_on_hang_threads); 671 StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime, 672 unresponsive_threshold, crash_on_hang_threads); 673} 674 675// static 676void ThreadWatcherList::StartWatching( 677 const BrowserThread::ID& thread_id, 678 const std::string& thread_name, 679 const base::TimeDelta& sleep_time, 680 const base::TimeDelta& unresponsive_time, 681 uint32 unresponsive_threshold, 682 const CrashOnHangThreadMap& crash_on_hang_threads) { 683 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 684 685 CrashOnHangThreadMap::const_iterator it = 686 crash_on_hang_threads.find(thread_name); 687 bool crash_on_hang = false; 688 uint32 live_threads_threshold = 0; 689 if (it != crash_on_hang_threads.end()) { 690 crash_on_hang = true; 691 live_threads_threshold = it->second.live_threads_threshold; 692 unresponsive_threshold = it->second.unresponsive_threshold; 693 } 694 695 ThreadWatcher::StartWatching( 696 ThreadWatcher::WatchingParams(thread_id, 697 thread_name, 698 sleep_time, 699 unresponsive_time, 700 unresponsive_threshold, 701 crash_on_hang, 702 live_threads_threshold)); 703} 704 705// static 706void ThreadWatcherList::DeleteAll() { 707 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 708 WatchDogThread::PostTask( 709 FROM_HERE, 710 base::Bind(&ThreadWatcherList::DeleteAll)); 711 return; 712 } 713 714 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 715 716 SetStopped(true); 717 718 if (!g_thread_watcher_list_) 719 return; 720 721 // Delete all thread watcher objects. 722 while (!g_thread_watcher_list_->registered_.empty()) { 723 RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin(); 724 delete it->second; 725 g_thread_watcher_list_->registered_.erase(it); 726 } 727 728 delete g_thread_watcher_list_; 729} 730 731// static 732ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) { 733 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 734 if (!g_thread_watcher_list_) 735 return NULL; 736 RegistrationList::iterator it = 737 g_thread_watcher_list_->registered_.find(thread_id); 738 if (g_thread_watcher_list_->registered_.end() == it) 739 return NULL; 740 return it->second; 741} 742 743// static 744void ThreadWatcherList::SetStopped(bool stopped) { 745 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 746 g_stopped_ = stopped; 747} 748 749// ThreadWatcherObserver methods and members. 750// 751// static 752ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL; 753 754ThreadWatcherObserver::ThreadWatcherObserver( 755 const base::TimeDelta& wakeup_interval) 756 : last_wakeup_time_(base::TimeTicks::Now()), 757 wakeup_interval_(wakeup_interval) { 758 CHECK(!g_thread_watcher_observer_); 759 g_thread_watcher_observer_ = this; 760} 761 762ThreadWatcherObserver::~ThreadWatcherObserver() { 763 DCHECK(this == g_thread_watcher_observer_); 764 g_thread_watcher_observer_ = NULL; 765} 766 767// static 768void ThreadWatcherObserver::SetupNotifications( 769 const base::TimeDelta& wakeup_interval) { 770 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 771 ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval); 772 MetricsService::SetUpNotifications(&observer->registrar_, observer); 773} 774 775// static 776void ThreadWatcherObserver::RemoveNotifications() { 777 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 778 if (!g_thread_watcher_observer_) 779 return; 780 g_thread_watcher_observer_->registrar_.RemoveAll(); 781 delete g_thread_watcher_observer_; 782} 783 784void ThreadWatcherObserver::Observe( 785 int type, 786 const content::NotificationSource& source, 787 const content::NotificationDetails& details) { 788 // There is some user activity, see if thread watchers are to be awakened. 789 base::TimeTicks now = base::TimeTicks::Now(); 790 if ((now - last_wakeup_time_) < wakeup_interval_) 791 return; 792 last_wakeup_time_ = now; 793 WatchDogThread::PostTask( 794 FROM_HERE, 795 base::Bind(&ThreadWatcherList::WakeUpAll)); 796} 797 798// WatchDogThread methods and members. 799 800// This lock protects g_watchdog_thread. 801static base::LazyInstance<base::Lock>::Leaky 802 g_watchdog_lock = LAZY_INSTANCE_INITIALIZER; 803 804// The singleton of this class. 805static WatchDogThread* g_watchdog_thread = NULL; 806 807WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") { 808} 809 810WatchDogThread::~WatchDogThread() { 811 Stop(); 812} 813 814// static 815bool WatchDogThread::CurrentlyOnWatchDogThread() { 816 base::AutoLock lock(g_watchdog_lock.Get()); 817 return g_watchdog_thread && 818 g_watchdog_thread->message_loop() == base::MessageLoop::current(); 819} 820 821// static 822bool WatchDogThread::PostTask(const tracked_objects::Location& from_here, 823 const base::Closure& task) { 824 return PostTaskHelper(from_here, task, base::TimeDelta()); 825} 826 827// static 828bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here, 829 const base::Closure& task, 830 base::TimeDelta delay) { 831 return PostTaskHelper(from_here, task, delay); 832} 833 834// static 835bool WatchDogThread::PostTaskHelper( 836 const tracked_objects::Location& from_here, 837 const base::Closure& task, 838 base::TimeDelta delay) { 839 { 840 base::AutoLock lock(g_watchdog_lock.Get()); 841 842 base::MessageLoop* message_loop = g_watchdog_thread ? 843 g_watchdog_thread->message_loop() : NULL; 844 if (message_loop) { 845 message_loop->PostDelayedTask(from_here, task, delay); 846 return true; 847 } 848 } 849 850 return false; 851} 852 853void WatchDogThread::Init() { 854 // This thread shouldn't be allowed to perform any blocking disk I/O. 855 base::ThreadRestrictions::SetIOAllowed(false); 856 857 base::AutoLock lock(g_watchdog_lock.Get()); 858 CHECK(!g_watchdog_thread); 859 g_watchdog_thread = this; 860} 861 862void WatchDogThread::CleanUp() { 863 base::AutoLock lock(g_watchdog_lock.Get()); 864 g_watchdog_thread = NULL; 865} 866 867namespace { 868 869// StartupWatchDogThread methods and members. 870// 871// Class for detecting hangs during startup. 872class StartupWatchDogThread : public base::Watchdog { 873 public: 874 // Constructor specifies how long the StartupWatchDogThread will wait before 875 // alarming. 876 explicit StartupWatchDogThread(const base::TimeDelta& duration) 877 : base::Watchdog(duration, "Startup watchdog thread", true) { 878 } 879 880 // Alarm is called if the time expires after an Arm() without someone calling 881 // Disarm(). When Alarm goes off, in release mode we get the crash dump 882 // without crashing and in debug mode we break into the debugger. 883 virtual void Alarm() OVERRIDE { 884#ifndef NDEBUG 885 DCHECK(false); 886#else 887 WatchDogThread::PostTask(FROM_HERE, 888 base::Bind(&base::debug::DumpWithoutCrashing)); 889#endif 890 } 891 892 DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread); 893}; 894 895// ShutdownWatchDogThread methods and members. 896// 897// Class for detecting hangs during shutdown. 898class ShutdownWatchDogThread : public base::Watchdog { 899 public: 900 // Constructor specifies how long the ShutdownWatchDogThread will wait before 901 // alarming. 902 explicit ShutdownWatchDogThread(const base::TimeDelta& duration) 903 : base::Watchdog(duration, "Shutdown watchdog thread", true) { 904 } 905 906 // Alarm is called if the time expires after an Arm() without someone calling 907 // Disarm(). We crash the browser if this method is called. 908 virtual void Alarm() OVERRIDE { 909 ShutdownCrash(); 910 } 911 912 DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread); 913}; 914} // namespace 915 916// StartupTimeBomb methods and members. 917// 918// static 919StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL; 920 921StartupTimeBomb::StartupTimeBomb() 922 : startup_watchdog_(NULL), 923 thread_id_(base::PlatformThread::CurrentId()) { 924 CHECK(!g_startup_timebomb_); 925 g_startup_timebomb_ = this; 926} 927 928StartupTimeBomb::~StartupTimeBomb() { 929 DCHECK(this == g_startup_timebomb_); 930 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 931 if (startup_watchdog_) 932 Disarm(); 933 g_startup_timebomb_ = NULL; 934} 935 936void StartupTimeBomb::Arm(const base::TimeDelta& duration) { 937 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 938 DCHECK(!startup_watchdog_); 939 startup_watchdog_ = new StartupWatchDogThread(duration); 940 startup_watchdog_->Arm(); 941 return; 942} 943 944void StartupTimeBomb::Disarm() { 945 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 946 if (startup_watchdog_) { 947 startup_watchdog_->Disarm(); 948 startup_watchdog_->Cleanup(); 949 DeleteStartupWatchdog(); 950 } 951} 952 953void StartupTimeBomb::DeleteStartupWatchdog() { 954 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 955 if (startup_watchdog_->IsJoinable()) { 956 // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns 957 // very fast. 958 base::ThreadRestrictions::SetIOAllowed(true); 959 delete startup_watchdog_; 960 startup_watchdog_ = NULL; 961 return; 962 } 963 base::MessageLoop::current()->PostDelayedTask( 964 FROM_HERE, 965 base::Bind(&StartupTimeBomb::DeleteStartupWatchdog, 966 base::Unretained(this)), 967 base::TimeDelta::FromSeconds(10)); 968} 969 970// static 971void StartupTimeBomb::DisarmStartupTimeBomb() { 972 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 973 if (g_startup_timebomb_) 974 g_startup_timebomb_->Disarm(); 975} 976 977// ShutdownWatcherHelper methods and members. 978// 979// ShutdownWatcherHelper is a wrapper class for detecting hangs during 980// shutdown. 981ShutdownWatcherHelper::ShutdownWatcherHelper() 982 : shutdown_watchdog_(NULL), 983 thread_id_(base::PlatformThread::CurrentId()) { 984} 985 986ShutdownWatcherHelper::~ShutdownWatcherHelper() { 987 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 988 if (shutdown_watchdog_) { 989 shutdown_watchdog_->Disarm(); 990 delete shutdown_watchdog_; 991 shutdown_watchdog_ = NULL; 992 } 993} 994 995void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) { 996 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 997 DCHECK(!shutdown_watchdog_); 998 base::TimeDelta actual_duration = duration; 999 1000 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 1001 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 1002 actual_duration *= 20; 1003 } else if (channel == chrome::VersionInfo::CHANNEL_BETA || 1004 channel == chrome::VersionInfo::CHANNEL_DEV) { 1005 actual_duration *= 10; 1006 } 1007 1008#if defined(OS_WIN) 1009 // On Windows XP, give twice the time for shutdown. 1010 if (base::win::GetVersion() <= base::win::VERSION_XP) 1011 actual_duration *= 2; 1012#endif 1013 1014 shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration); 1015 shutdown_watchdog_->Arm(); 1016} 1017