thread_watcher.cc revision 4e180b6a0b4720a9b8e9e959a882386f690f08ff
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/metrics/thread_watcher.h" 6 7#include <math.h> // ceil 8 9#include "base/bind.h" 10#include "base/compiler_specific.h" 11#include "base/debug/alias.h" 12#include "base/lazy_instance.h" 13#include "base/strings/string_number_conversions.h" 14#include "base/strings/string_split.h" 15#include "base/strings/string_tokenizer.h" 16#include "base/strings/stringprintf.h" 17#include "base/threading/thread_restrictions.h" 18#include "build/build_config.h" 19#include "chrome/browser/metrics/metrics_service.h" 20#include "chrome/common/chrome_switches.h" 21#include "chrome/common/chrome_version_info.h" 22#include "chrome/common/dump_without_crashing.h" 23#include "chrome/common/logging_chrome.h" 24 25#if defined(OS_WIN) 26#include "base/win/windows_version.h" 27#endif 28 29using content::BrowserThread; 30 31namespace { 32 33// The following are unique function names for forcing the crash when a thread 34// is unresponsive. This makes it possible to tell from the callstack alone what 35// thread was unresponsive. 36// 37// We disable optimizations for this block of functions so the compiler doesn't 38// merge them all together. 39MSVC_DISABLE_OPTIMIZE() 40MSVC_PUSH_DISABLE_WARNING(4748) 41 42int* NullPointer() { 43 return reinterpret_cast<int*>(NULL); 44} 45 46void NullPointerCrash(int line_number) { 47#ifndef NDEBUG 48 *NullPointer() = line_number; // Crash. 49#else 50 logging::DumpWithoutCrashing(); 51#endif 52} 53 54NOINLINE void ShutdownCrash() { 55 NullPointerCrash(__LINE__); 56} 57 58NOINLINE void ThreadUnresponsive_UI() { 59 NullPointerCrash(__LINE__); 60} 61 62NOINLINE void ThreadUnresponsive_DB() { 63 NullPointerCrash(__LINE__); 64} 65 66NOINLINE void ThreadUnresponsive_FILE() { 67 NullPointerCrash(__LINE__); 68} 69 70NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() { 71 NullPointerCrash(__LINE__); 72} 73 74NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() { 75 NullPointerCrash(__LINE__); 76} 77 78NOINLINE void ThreadUnresponsive_CACHE() { 79 NullPointerCrash(__LINE__); 80} 81 82NOINLINE void ThreadUnresponsive_IO() { 83 NullPointerCrash(__LINE__); 84} 85 86MSVC_POP_WARNING() 87MSVC_ENABLE_OPTIMIZE(); 88 89void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) { 90 base::debug::Alias(&thread_id); 91 92 switch (thread_id) { 93 case BrowserThread::UI: 94 return ThreadUnresponsive_UI(); 95 case BrowserThread::DB: 96 return ThreadUnresponsive_DB(); 97 case BrowserThread::FILE: 98 return ThreadUnresponsive_FILE(); 99 case BrowserThread::FILE_USER_BLOCKING: 100 return ThreadUnresponsive_FILE_USER_BLOCKING(); 101 case BrowserThread::PROCESS_LAUNCHER: 102 return ThreadUnresponsive_PROCESS_LAUNCHER(); 103 case BrowserThread::CACHE: 104 return ThreadUnresponsive_CACHE(); 105 case BrowserThread::IO: 106 return ThreadUnresponsive_IO(); 107 case BrowserThread::ID_COUNT: 108 CHECK(false); // This shouldn't actually be reached! 109 break; 110 111 // Omission of the default hander is intentional -- that way the compiler 112 // should warn if our switch becomes outdated. 113 } 114 115 CHECK(false) << "Unknown thread was unresponsive."; // Shouldn't be reached. 116} 117 118} // namespace 119 120// ThreadWatcher methods and members. 121ThreadWatcher::ThreadWatcher(const WatchingParams& params) 122 : thread_id_(params.thread_id), 123 thread_name_(params.thread_name), 124 watched_loop_( 125 BrowserThread::GetMessageLoopProxyForThread(params.thread_id)), 126 sleep_time_(params.sleep_time), 127 unresponsive_time_(params.unresponsive_time), 128 ping_time_(base::TimeTicks::Now()), 129 pong_time_(ping_time_), 130 ping_sequence_number_(0), 131 active_(false), 132 ping_count_(params.unresponsive_threshold), 133 response_time_histogram_(NULL), 134 unresponsive_time_histogram_(NULL), 135 unresponsive_count_(0), 136 hung_processing_complete_(false), 137 unresponsive_threshold_(params.unresponsive_threshold), 138 crash_on_hang_(params.crash_on_hang), 139 live_threads_threshold_(params.live_threads_threshold), 140 weak_ptr_factory_(this) { 141 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 142 Initialize(); 143} 144 145ThreadWatcher::~ThreadWatcher() {} 146 147// static 148void ThreadWatcher::StartWatching(const WatchingParams& params) { 149 DCHECK_GE(params.sleep_time.InMilliseconds(), 0); 150 DCHECK_GE(params.unresponsive_time.InMilliseconds(), 151 params.sleep_time.InMilliseconds()); 152 153 // If we are not on WatchDogThread, then post a task to call StartWatching on 154 // WatchDogThread. 155 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 156 WatchDogThread::PostTask( 157 FROM_HERE, 158 base::Bind(&ThreadWatcher::StartWatching, params)); 159 return; 160 } 161 162 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 163 164 // Create a new thread watcher object for the given thread and activate it. 165 ThreadWatcher* watcher = new ThreadWatcher(params); 166 167 DCHECK(watcher); 168 // If we couldn't register the thread watcher object, we are shutting down, 169 // then don't activate thread watching. 170 if (!ThreadWatcherList::IsRegistered(params.thread_id)) 171 return; 172 watcher->ActivateThreadWatching(); 173} 174 175void ThreadWatcher::ActivateThreadWatching() { 176 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 177 if (active_) return; 178 active_ = true; 179 ping_count_ = unresponsive_threshold_; 180 ResetHangCounters(); 181 base::MessageLoop::current()->PostTask( 182 FROM_HERE, 183 base::Bind(&ThreadWatcher::PostPingMessage, 184 weak_ptr_factory_.GetWeakPtr())); 185} 186 187void ThreadWatcher::DeActivateThreadWatching() { 188 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 189 active_ = false; 190 ping_count_ = 0; 191 weak_ptr_factory_.InvalidateWeakPtrs(); 192} 193 194void ThreadWatcher::WakeUp() { 195 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 196 // There is some user activity, PostPingMessage task of thread watcher if 197 // needed. 198 if (!active_) return; 199 200 // Throw away the previous |unresponsive_count_| and start over again. Just 201 // before going to sleep, |unresponsive_count_| could be very close to 202 // |unresponsive_threshold_| and when user becomes active, 203 // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no 204 // response for ping messages. Reset |unresponsive_count_| to start measuring 205 // the unresponsiveness of the threads when system becomes active. 206 unresponsive_count_ = 0; 207 208 if (ping_count_ <= 0) { 209 ping_count_ = unresponsive_threshold_; 210 ResetHangCounters(); 211 PostPingMessage(); 212 } else { 213 ping_count_ = unresponsive_threshold_; 214 } 215} 216 217void ThreadWatcher::PostPingMessage() { 218 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 219 // If we have stopped watching or if the user is idle, then stop sending 220 // ping messages. 221 if (!active_ || ping_count_ <= 0) 222 return; 223 224 // Save the current time when we have sent ping message. 225 ping_time_ = base::TimeTicks::Now(); 226 227 // Send a ping message to the watched thread. Callback will be called on 228 // the WatchDogThread. 229 base::Closure callback( 230 base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(), 231 ping_sequence_number_)); 232 if (watched_loop_->PostTask( 233 FROM_HERE, 234 base::Bind(&ThreadWatcher::OnPingMessage, thread_id_, 235 callback))) { 236 // Post a task to check the responsiveness of watched thread. 237 base::MessageLoop::current()->PostDelayedTask( 238 FROM_HERE, 239 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 240 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 241 unresponsive_time_); 242 } else { 243 // Watched thread might have gone away, stop watching it. 244 DeActivateThreadWatching(); 245 } 246} 247 248void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) { 249 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 250 251 // Record watched thread's response time. 252 base::TimeTicks now = base::TimeTicks::Now(); 253 base::TimeDelta response_time = now - ping_time_; 254 response_time_histogram_->AddTime(response_time); 255 256 // Save the current time when we have got pong message. 257 pong_time_ = now; 258 259 // Check if there are any extra pings in flight. 260 DCHECK_EQ(ping_sequence_number_, ping_sequence_number); 261 if (ping_sequence_number_ != ping_sequence_number) 262 return; 263 264 // Increment sequence number for the next ping message to indicate watched 265 // thread is responsive. 266 ++ping_sequence_number_; 267 268 // If we have stopped watching or if the user is idle, then stop sending 269 // ping messages. 270 if (!active_ || --ping_count_ <= 0) 271 return; 272 273 base::MessageLoop::current()->PostDelayedTask( 274 FROM_HERE, 275 base::Bind(&ThreadWatcher::PostPingMessage, 276 weak_ptr_factory_.GetWeakPtr()), 277 sleep_time_); 278} 279 280void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) { 281 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 282 // If we have stopped watching then consider thread as responding. 283 if (!active_) { 284 responsive_ = true; 285 return; 286 } 287 // If the latest ping_sequence_number_ is not same as the ping_sequence_number 288 // that is passed in, then we can assume OnPongMessage was called. 289 // OnPongMessage increments ping_sequence_number_. 290 if (ping_sequence_number_ != ping_sequence_number) { 291 // Reset unresponsive_count_ to zero because we got a response from the 292 // watched thread. 293 ResetHangCounters(); 294 295 responsive_ = true; 296 return; 297 } 298 // Record that we got no response from watched thread. 299 GotNoResponse(); 300 301 // Post a task to check the responsiveness of watched thread. 302 base::MessageLoop::current()->PostDelayedTask( 303 FROM_HERE, 304 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 305 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 306 unresponsive_time_); 307 responsive_ = false; 308} 309 310void ThreadWatcher::Initialize() { 311 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 312 ThreadWatcherList::Register(this); 313 314 const std::string response_time_histogram_name = 315 "ThreadWatcher.ResponseTime." + thread_name_; 316 response_time_histogram_ = base::Histogram::FactoryTimeGet( 317 response_time_histogram_name, 318 base::TimeDelta::FromMilliseconds(1), 319 base::TimeDelta::FromSeconds(100), 50, 320 base::Histogram::kUmaTargetedHistogramFlag); 321 322 const std::string unresponsive_time_histogram_name = 323 "ThreadWatcher.Unresponsive." + thread_name_; 324 unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet( 325 unresponsive_time_histogram_name, 326 base::TimeDelta::FromMilliseconds(1), 327 base::TimeDelta::FromSeconds(100), 50, 328 base::Histogram::kUmaTargetedHistogramFlag); 329 330 const std::string responsive_count_histogram_name = 331 "ThreadWatcher.ResponsiveThreads." + thread_name_; 332 responsive_count_histogram_ = base::LinearHistogram::FactoryGet( 333 responsive_count_histogram_name, 1, 10, 11, 334 base::Histogram::kUmaTargetedHistogramFlag); 335 336 const std::string unresponsive_count_histogram_name = 337 "ThreadWatcher.UnresponsiveThreads." + thread_name_; 338 unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet( 339 unresponsive_count_histogram_name, 1, 10, 11, 340 base::Histogram::kUmaTargetedHistogramFlag); 341} 342 343// static 344void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id, 345 const base::Closure& callback_task) { 346 // This method is called on watched thread. 347 DCHECK(BrowserThread::CurrentlyOn(thread_id)); 348 WatchDogThread::PostTask(FROM_HERE, callback_task); 349} 350 351void ThreadWatcher::ResetHangCounters() { 352 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 353 unresponsive_count_ = 0; 354 hung_processing_complete_ = false; 355} 356 357void ThreadWatcher::GotNoResponse() { 358 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 359 360 ++unresponsive_count_; 361 if (!IsVeryUnresponsive()) 362 return; 363 364 // Record total unresponsive_time since last pong message. 365 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; 366 unresponsive_time_histogram_->AddTime(unresponse_time); 367 368 // We have already collected stats for the non-responding watched thread. 369 if (hung_processing_complete_) 370 return; 371 372 // Record how other threads are responding. 373 uint32 responding_thread_count = 0; 374 uint32 unresponding_thread_count = 0; 375 ThreadWatcherList::GetStatusOfThreads(&responding_thread_count, 376 &unresponding_thread_count); 377 378 // Record how many watched threads are responding. 379 responsive_count_histogram_->Add(responding_thread_count); 380 381 // Record how many watched threads are not responding. 382 unresponsive_count_histogram_->Add(unresponding_thread_count); 383 384 // Crash the browser if the watched thread is to be crashed on hang and if the 385 // number of other threads responding is less than or equal to 386 // live_threads_threshold_ and at least one other thread is responding. 387 if (crash_on_hang_ && 388 responding_thread_count > 0 && 389 responding_thread_count <= live_threads_threshold_) { 390 static bool crashed_once = false; 391 if (!crashed_once) { 392 crashed_once = true; 393 CrashBecauseThreadWasUnresponsive(thread_id_); 394 } 395 } 396 397 hung_processing_complete_ = true; 398} 399 400bool ThreadWatcher::IsVeryUnresponsive() { 401 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 402 return unresponsive_count_ >= unresponsive_threshold_; 403} 404 405// ThreadWatcherList methods and members. 406// 407// static 408ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL; 409// static 410const int ThreadWatcherList::kSleepSeconds = 1; 411// static 412const int ThreadWatcherList::kUnresponsiveSeconds = 2; 413// static 414const int ThreadWatcherList::kUnresponsiveCount = 9; 415// static 416const int ThreadWatcherList::kLiveThreadsThreshold = 2; 417 418ThreadWatcherList::CrashDataThresholds::CrashDataThresholds( 419 uint32 live_threads_threshold, 420 uint32 unresponsive_threshold) 421 : live_threads_threshold(live_threads_threshold), 422 unresponsive_threshold(unresponsive_threshold) { 423} 424 425ThreadWatcherList::CrashDataThresholds::CrashDataThresholds() 426 : live_threads_threshold(kLiveThreadsThreshold), 427 unresponsive_threshold(kUnresponsiveCount) { 428} 429 430// static 431void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) { 432 // TODO(rtenneti): Enable ThreadWatcher. 433 uint32 unresponsive_threshold; 434 CrashOnHangThreadMap crash_on_hang_threads; 435 ParseCommandLine(command_line, 436 &unresponsive_threshold, 437 &crash_on_hang_threads); 438 439 ThreadWatcherObserver::SetupNotifications( 440 base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold)); 441 442 WatchDogThread::PostDelayedTask( 443 FROM_HERE, 444 base::Bind(&ThreadWatcherList::InitializeAndStartWatching, 445 unresponsive_threshold, 446 crash_on_hang_threads), 447 base::TimeDelta::FromSeconds(120)); 448} 449 450// static 451void ThreadWatcherList::StopWatchingAll() { 452 // TODO(rtenneti): Enable ThreadWatcher. 453 ThreadWatcherObserver::RemoveNotifications(); 454 DeleteAll(); 455} 456 457// static 458void ThreadWatcherList::Register(ThreadWatcher* watcher) { 459 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 460 if (!g_thread_watcher_list_) 461 return; 462 DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id())); 463 g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher; 464} 465 466// static 467bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) { 468 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 469 return NULL != ThreadWatcherList::Find(thread_id); 470} 471 472// static 473void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count, 474 uint32* unresponding_thread_count) { 475 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 476 *responding_thread_count = 0; 477 *unresponding_thread_count = 0; 478 if (!g_thread_watcher_list_) 479 return; 480 481 for (RegistrationList::iterator it = 482 g_thread_watcher_list_->registered_.begin(); 483 g_thread_watcher_list_->registered_.end() != it; 484 ++it) { 485 if (it->second->IsVeryUnresponsive()) 486 ++(*unresponding_thread_count); 487 else 488 ++(*responding_thread_count); 489 } 490} 491 492// static 493void ThreadWatcherList::WakeUpAll() { 494 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 495 if (!g_thread_watcher_list_) 496 return; 497 498 for (RegistrationList::iterator it = 499 g_thread_watcher_list_->registered_.begin(); 500 g_thread_watcher_list_->registered_.end() != it; 501 ++it) 502 it->second->WakeUp(); 503} 504 505ThreadWatcherList::ThreadWatcherList() { 506 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 507 CHECK(!g_thread_watcher_list_); 508 g_thread_watcher_list_ = this; 509} 510 511ThreadWatcherList::~ThreadWatcherList() { 512 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 513 DCHECK(this == g_thread_watcher_list_); 514 g_thread_watcher_list_ = NULL; 515} 516 517// static 518void ThreadWatcherList::ParseCommandLine( 519 const CommandLine& command_line, 520 uint32* unresponsive_threshold, 521 CrashOnHangThreadMap* crash_on_hang_threads) { 522 // Initialize |unresponsive_threshold| to a default value. 523 *unresponsive_threshold = kUnresponsiveCount; 524 525 // Increase the unresponsive_threshold on the Stable and Beta channels to 526 // reduce the number of crashes due to ThreadWatcher. 527 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 528 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 529 *unresponsive_threshold *= 4; 530 } else if (channel == chrome::VersionInfo::CHANNEL_BETA) { 531 *unresponsive_threshold *= 2; 532 } 533 534#if defined(OS_WIN) 535 // For Windows XP (old systems), double the unresponsive_threshold to give 536 // the OS a chance to schedule UI/IO threads a time slice to respond with a 537 // pong message (to get around limitations with the OS). 538 if (base::win::GetVersion() <= base::win::VERSION_XP) 539 *unresponsive_threshold *= 2; 540#endif 541 542 uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds; 543 std::string crash_on_hang_thread_names; 544 bool has_command_line_overwrite = false; 545 if (command_line.HasSwitch(switches::kCrashOnHangThreads)) { 546 crash_on_hang_thread_names = 547 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); 548 has_command_line_overwrite = true; 549 } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) { 550 // Default to crashing the browser if UI or IO or FILE threads are not 551 // responsive except in stable channel. 552 crash_on_hang_thread_names = base::StringPrintf( 553 "UI:%d:%d,IO:%d:%d,FILE:%d:%d", 554 kLiveThreadsThreshold, crash_seconds, 555 kLiveThreadsThreshold, crash_seconds, 556 kLiveThreadsThreshold, crash_seconds * 5); 557 } 558 559 ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names, 560 kLiveThreadsThreshold, 561 crash_seconds, 562 crash_on_hang_threads); 563 564 if (channel != chrome::VersionInfo::CHANNEL_CANARY || 565 has_command_line_overwrite) { 566 return; 567 } 568 569 // Set up a field trial for 100% of the users to crash if either UI or IO 570 // thread is not responsive for 30 seconds (or 15 pings). 571 scoped_refptr<base::FieldTrial> field_trial( 572 base::FieldTrialList::FactoryGetFieldTrial( 573 "ThreadWatcher", 100, "default_hung_threads", 574 2014, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL)); 575 int hung_thread_group = field_trial->AppendGroup("hung_thread", 100); 576 if (field_trial->group() == hung_thread_group) { 577 for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin(); 578 crash_on_hang_threads->end() != it; 579 ++it) { 580 if (it->first == "FILE") 581 continue; 582 it->second.live_threads_threshold = INT_MAX; 583 if (it->first == "UI") { 584 // TODO(rtenneti): set unresponsive threshold to 120 seconds to catch 585 // the worst UI hangs and for fewer crashes due to ThreadWatcher. Reduce 586 // it to a more reasonable time ala IO thread. 587 it->second.unresponsive_threshold = 60; 588 } else { 589 it->second.unresponsive_threshold = 15; 590 } 591 } 592 } 593} 594 595// static 596void ThreadWatcherList::ParseCommandLineCrashOnHangThreads( 597 const std::string& crash_on_hang_thread_names, 598 uint32 default_live_threads_threshold, 599 uint32 default_crash_seconds, 600 CrashOnHangThreadMap* crash_on_hang_threads) { 601 base::StringTokenizer tokens(crash_on_hang_thread_names, ","); 602 std::vector<std::string> values; 603 while (tokens.GetNext()) { 604 const std::string& token = tokens.token(); 605 base::SplitString(token, ':', &values); 606 std::string thread_name = values[0]; 607 608 uint32 live_threads_threshold = default_live_threads_threshold; 609 uint32 crash_seconds = default_crash_seconds; 610 if (values.size() >= 2 && 611 (!base::StringToUint(values[1], &live_threads_threshold))) { 612 continue; 613 } 614 if (values.size() >= 3 && 615 (!base::StringToUint(values[2], &crash_seconds))) { 616 continue; 617 } 618 uint32 unresponsive_threshold = static_cast<uint32>( 619 ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); 620 621 CrashDataThresholds crash_data(live_threads_threshold, 622 unresponsive_threshold); 623 // Use the last specifier. 624 (*crash_on_hang_threads)[thread_name] = crash_data; 625 } 626} 627 628// static 629void ThreadWatcherList::InitializeAndStartWatching( 630 uint32 unresponsive_threshold, 631 const CrashOnHangThreadMap& crash_on_hang_threads) { 632 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 633 634 ThreadWatcherList* thread_watcher_list = new ThreadWatcherList(); 635 CHECK(thread_watcher_list); 636 637 BrowserThread::PostTask( 638 BrowserThread::UI, 639 FROM_HERE, 640 base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb)); 641 642 const base::TimeDelta kSleepTime = 643 base::TimeDelta::FromSeconds(kSleepSeconds); 644 const base::TimeDelta kUnresponsiveTime = 645 base::TimeDelta::FromSeconds(kUnresponsiveSeconds); 646 647 StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime, 648 unresponsive_threshold, crash_on_hang_threads); 649 StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime, 650 unresponsive_threshold, crash_on_hang_threads); 651 StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime, 652 unresponsive_threshold, crash_on_hang_threads); 653 StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime, 654 unresponsive_threshold, crash_on_hang_threads); 655 StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime, 656 unresponsive_threshold, crash_on_hang_threads); 657} 658 659// static 660void ThreadWatcherList::StartWatching( 661 const BrowserThread::ID& thread_id, 662 const std::string& thread_name, 663 const base::TimeDelta& sleep_time, 664 const base::TimeDelta& unresponsive_time, 665 uint32 unresponsive_threshold, 666 const CrashOnHangThreadMap& crash_on_hang_threads) { 667 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 668 669 CrashOnHangThreadMap::const_iterator it = 670 crash_on_hang_threads.find(thread_name); 671 bool crash_on_hang = false; 672 uint32 live_threads_threshold = 0; 673 if (it != crash_on_hang_threads.end()) { 674 crash_on_hang = true; 675 live_threads_threshold = it->second.live_threads_threshold; 676 unresponsive_threshold = it->second.unresponsive_threshold; 677 } 678 679 ThreadWatcher::StartWatching( 680 ThreadWatcher::WatchingParams(thread_id, 681 thread_name, 682 sleep_time, 683 unresponsive_time, 684 unresponsive_threshold, 685 crash_on_hang, 686 live_threads_threshold)); 687} 688 689// static 690void ThreadWatcherList::DeleteAll() { 691 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 692 WatchDogThread::PostTask( 693 FROM_HERE, 694 base::Bind(&ThreadWatcherList::DeleteAll)); 695 return; 696 } 697 698 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 699 if (!g_thread_watcher_list_) 700 return; 701 702 // Delete all thread watcher objects. 703 while (!g_thread_watcher_list_->registered_.empty()) { 704 RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin(); 705 delete it->second; 706 g_thread_watcher_list_->registered_.erase(it); 707 } 708 709 delete g_thread_watcher_list_; 710} 711 712// static 713ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) { 714 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 715 if (!g_thread_watcher_list_) 716 return NULL; 717 RegistrationList::iterator it = 718 g_thread_watcher_list_->registered_.find(thread_id); 719 if (g_thread_watcher_list_->registered_.end() == it) 720 return NULL; 721 return it->second; 722} 723 724// ThreadWatcherObserver methods and members. 725// 726// static 727ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL; 728 729ThreadWatcherObserver::ThreadWatcherObserver( 730 const base::TimeDelta& wakeup_interval) 731 : last_wakeup_time_(base::TimeTicks::Now()), 732 wakeup_interval_(wakeup_interval) { 733 CHECK(!g_thread_watcher_observer_); 734 g_thread_watcher_observer_ = this; 735} 736 737ThreadWatcherObserver::~ThreadWatcherObserver() { 738 DCHECK(this == g_thread_watcher_observer_); 739 g_thread_watcher_observer_ = NULL; 740} 741 742// static 743void ThreadWatcherObserver::SetupNotifications( 744 const base::TimeDelta& wakeup_interval) { 745 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 746 ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval); 747 MetricsService::SetUpNotifications(&observer->registrar_, observer); 748} 749 750// static 751void ThreadWatcherObserver::RemoveNotifications() { 752 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 753 if (!g_thread_watcher_observer_) 754 return; 755 g_thread_watcher_observer_->registrar_.RemoveAll(); 756 delete g_thread_watcher_observer_; 757} 758 759void ThreadWatcherObserver::Observe( 760 int type, 761 const content::NotificationSource& source, 762 const content::NotificationDetails& details) { 763 // There is some user activity, see if thread watchers are to be awakened. 764 base::TimeTicks now = base::TimeTicks::Now(); 765 if ((now - last_wakeup_time_) < wakeup_interval_) 766 return; 767 last_wakeup_time_ = now; 768 WatchDogThread::PostTask( 769 FROM_HERE, 770 base::Bind(&ThreadWatcherList::WakeUpAll)); 771} 772 773// WatchDogThread methods and members. 774 775// This lock protects g_watchdog_thread. 776static base::LazyInstance<base::Lock>::Leaky 777 g_watchdog_lock = LAZY_INSTANCE_INITIALIZER; 778 779// The singleton of this class. 780static WatchDogThread* g_watchdog_thread = NULL; 781 782WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") { 783} 784 785WatchDogThread::~WatchDogThread() { 786 Stop(); 787} 788 789// static 790bool WatchDogThread::CurrentlyOnWatchDogThread() { 791 base::AutoLock lock(g_watchdog_lock.Get()); 792 return g_watchdog_thread && 793 g_watchdog_thread->message_loop() == base::MessageLoop::current(); 794} 795 796// static 797bool WatchDogThread::PostTask(const tracked_objects::Location& from_here, 798 const base::Closure& task) { 799 return PostTaskHelper(from_here, task, base::TimeDelta()); 800} 801 802// static 803bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here, 804 const base::Closure& task, 805 base::TimeDelta delay) { 806 return PostTaskHelper(from_here, task, delay); 807} 808 809// static 810bool WatchDogThread::PostTaskHelper( 811 const tracked_objects::Location& from_here, 812 const base::Closure& task, 813 base::TimeDelta delay) { 814 { 815 base::AutoLock lock(g_watchdog_lock.Get()); 816 817 base::MessageLoop* message_loop = g_watchdog_thread ? 818 g_watchdog_thread->message_loop() : NULL; 819 if (message_loop) { 820 message_loop->PostDelayedTask(from_here, task, delay); 821 return true; 822 } 823 } 824 825 return false; 826} 827 828void WatchDogThread::Init() { 829 // This thread shouldn't be allowed to perform any blocking disk I/O. 830 base::ThreadRestrictions::SetIOAllowed(false); 831 832 base::AutoLock lock(g_watchdog_lock.Get()); 833 CHECK(!g_watchdog_thread); 834 g_watchdog_thread = this; 835} 836 837void WatchDogThread::CleanUp() { 838 base::AutoLock lock(g_watchdog_lock.Get()); 839 g_watchdog_thread = NULL; 840} 841 842namespace { 843 844// StartupWatchDogThread methods and members. 845// 846// Class for detecting hangs during startup. 847class StartupWatchDogThread : public base::Watchdog { 848 public: 849 // Constructor specifies how long the StartupWatchDogThread will wait before 850 // alarming. 851 explicit StartupWatchDogThread(const base::TimeDelta& duration) 852 : base::Watchdog(duration, "Startup watchdog thread", true) { 853 } 854 855 // Alarm is called if the time expires after an Arm() without someone calling 856 // Disarm(). When Alarm goes off, in release mode we get the crash dump 857 // without crashing and in debug mode we break into the debugger. 858 virtual void Alarm() OVERRIDE { 859#ifndef NDEBUG 860 DCHECK(false); 861#else 862 logging::DumpWithoutCrashing(); 863#endif 864 } 865 866 DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread); 867}; 868 869// ShutdownWatchDogThread methods and members. 870// 871// Class for detecting hangs during shutdown. 872class ShutdownWatchDogThread : public base::Watchdog { 873 public: 874 // Constructor specifies how long the ShutdownWatchDogThread will wait before 875 // alarming. 876 explicit ShutdownWatchDogThread(const base::TimeDelta& duration) 877 : base::Watchdog(duration, "Shutdown watchdog thread", true) { 878 } 879 880 // Alarm is called if the time expires after an Arm() without someone calling 881 // Disarm(). We crash the browser if this method is called. 882 virtual void Alarm() OVERRIDE { 883 ShutdownCrash(); 884 } 885 886 DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread); 887}; 888} // namespace 889 890// StartupTimeBomb methods and members. 891// 892// static 893StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL; 894 895StartupTimeBomb::StartupTimeBomb() 896 : startup_watchdog_(NULL), 897 thread_id_(base::PlatformThread::CurrentId()) { 898 CHECK(!g_startup_timebomb_); 899 g_startup_timebomb_ = this; 900} 901 902StartupTimeBomb::~StartupTimeBomb() { 903 DCHECK(this == g_startup_timebomb_); 904 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 905 if (startup_watchdog_) 906 Disarm(); 907 g_startup_timebomb_ = NULL; 908} 909 910void StartupTimeBomb::Arm(const base::TimeDelta& duration) { 911 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 912 DCHECK(!startup_watchdog_); 913 startup_watchdog_ = new StartupWatchDogThread(duration); 914 startup_watchdog_->Arm(); 915 return; 916} 917 918void StartupTimeBomb::Disarm() { 919 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 920 if (startup_watchdog_) { 921 startup_watchdog_->Disarm(); 922 startup_watchdog_->Cleanup(); 923 DeleteStartupWatchdog(); 924 } 925} 926 927void StartupTimeBomb::DeleteStartupWatchdog() { 928 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 929 if (startup_watchdog_->IsJoinable()) { 930 // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns 931 // very fast. 932 base::ThreadRestrictions::SetIOAllowed(true); 933 delete startup_watchdog_; 934 startup_watchdog_ = NULL; 935 return; 936 } 937 base::MessageLoop::current()->PostDelayedTask( 938 FROM_HERE, 939 base::Bind(&StartupTimeBomb::DeleteStartupWatchdog, 940 base::Unretained(this)), 941 base::TimeDelta::FromSeconds(10)); 942} 943 944// static 945void StartupTimeBomb::DisarmStartupTimeBomb() { 946 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 947 if (g_startup_timebomb_) 948 g_startup_timebomb_->Disarm(); 949} 950 951// ShutdownWatcherHelper methods and members. 952// 953// ShutdownWatcherHelper is a wrapper class for detecting hangs during 954// shutdown. 955ShutdownWatcherHelper::ShutdownWatcherHelper() 956 : shutdown_watchdog_(NULL), 957 thread_id_(base::PlatformThread::CurrentId()) { 958} 959 960ShutdownWatcherHelper::~ShutdownWatcherHelper() { 961 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 962 if (shutdown_watchdog_) { 963 shutdown_watchdog_->Disarm(); 964 delete shutdown_watchdog_; 965 shutdown_watchdog_ = NULL; 966 } 967} 968 969void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) { 970 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 971 DCHECK(!shutdown_watchdog_); 972 base::TimeDelta actual_duration = duration; 973 974 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 975 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 976 actual_duration *= 20; 977 } else if (channel == chrome::VersionInfo::CHANNEL_BETA || 978 channel == chrome::VersionInfo::CHANNEL_DEV) { 979 actual_duration *= 10; 980 } 981 982#if defined(OS_WIN) 983 // On Windows XP, give twice the time for shutdown. 984 if (base::win::GetVersion() <= base::win::VERSION_XP) 985 actual_duration *= 2; 986#endif 987 988 shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration); 989 shutdown_watchdog_->Arm(); 990} 991