1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/metrics/thread_watcher.h" 6 7#include <math.h> // ceil 8 9#include "base/bind.h" 10#include "base/compiler_specific.h" 11#include "base/debug/alias.h" 12#include "base/debug/debugger.h" 13#include "base/debug/dump_without_crashing.h" 14#include "base/lazy_instance.h" 15#include "base/metrics/field_trial.h" 16#include "base/strings/string_number_conversions.h" 17#include "base/strings/string_split.h" 18#include "base/strings/string_tokenizer.h" 19#include "base/strings/stringprintf.h" 20#include "base/threading/thread_restrictions.h" 21#include "build/build_config.h" 22#include "chrome/browser/chrome_notification_types.h" 23#include "chrome/common/chrome_switches.h" 24#include "chrome/common/chrome_version_info.h" 25#include "chrome/common/logging_chrome.h" 26#include "content/public/browser/notification_service.h" 27 28#if defined(OS_WIN) 29#include "base/win/windows_version.h" 30#endif 31 32using content::BrowserThread; 33 34namespace { 35 36// The following are unique function names for forcing the crash when a thread 37// is unresponsive. This makes it possible to tell from the callstack alone what 38// thread was unresponsive. 39// 40// We disable optimizations for this block of functions so the compiler doesn't 41// merge them all together. 42MSVC_DISABLE_OPTIMIZE() 43MSVC_PUSH_DISABLE_WARNING(4748) 44 45void ReportThreadHang() { 46#if defined(NDEBUG) 47 base::debug::DumpWithoutCrashing(); 48#else 49 base::debug::BreakDebugger(); 50#endif 51} 52 53#if !defined(OS_ANDROID) || !defined(NDEBUG) 54// TODO(rtenneti): Enabled crashing, after getting data. 55NOINLINE void StartupHang() { 56 ReportThreadHang(); 57} 58#endif // OS_ANDROID 59 60NOINLINE void ShutdownHang() { 61 ReportThreadHang(); 62} 63 64NOINLINE void ThreadUnresponsive_UI() { 65 ReportThreadHang(); 66} 67 68NOINLINE void ThreadUnresponsive_DB() { 69 ReportThreadHang(); 70} 71 72NOINLINE void ThreadUnresponsive_FILE() { 73 ReportThreadHang(); 74} 75 76NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() { 77 ReportThreadHang(); 78} 79 80NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() { 81 ReportThreadHang(); 82} 83 84NOINLINE void ThreadUnresponsive_CACHE() { 85 ReportThreadHang(); 86} 87 88NOINLINE void ThreadUnresponsive_IO() { 89 ReportThreadHang(); 90} 91 92MSVC_POP_WARNING() 93MSVC_ENABLE_OPTIMIZE(); 94 95void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) { 96 base::debug::Alias(&thread_id); 97 98 switch (thread_id) { 99 case BrowserThread::UI: 100 return ThreadUnresponsive_UI(); 101 case BrowserThread::DB: 102 return ThreadUnresponsive_DB(); 103 case BrowserThread::FILE: 104 return ThreadUnresponsive_FILE(); 105 case BrowserThread::FILE_USER_BLOCKING: 106 return ThreadUnresponsive_FILE_USER_BLOCKING(); 107 case BrowserThread::PROCESS_LAUNCHER: 108 return ThreadUnresponsive_PROCESS_LAUNCHER(); 109 case BrowserThread::CACHE: 110 return ThreadUnresponsive_CACHE(); 111 case BrowserThread::IO: 112 return ThreadUnresponsive_IO(); 113 case BrowserThread::ID_COUNT: 114 CHECK(false); // This shouldn't actually be reached! 115 break; 116 117 // Omission of the default hander is intentional -- that way the compiler 118 // should warn if our switch becomes outdated. 119 } 120 121 CHECK(false) << "Unknown thread was unresponsive."; // Shouldn't be reached. 122} 123 124} // namespace 125 126// ThreadWatcher methods and members. 127ThreadWatcher::ThreadWatcher(const WatchingParams& params) 128 : thread_id_(params.thread_id), 129 thread_name_(params.thread_name), 130 watched_loop_( 131 BrowserThread::GetMessageLoopProxyForThread(params.thread_id)), 132 sleep_time_(params.sleep_time), 133 unresponsive_time_(params.unresponsive_time), 134 ping_time_(base::TimeTicks::Now()), 135 pong_time_(ping_time_), 136 ping_sequence_number_(0), 137 active_(false), 138 ping_count_(params.unresponsive_threshold), 139 response_time_histogram_(NULL), 140 unresponsive_time_histogram_(NULL), 141 unresponsive_count_(0), 142 hung_processing_complete_(false), 143 unresponsive_threshold_(params.unresponsive_threshold), 144 crash_on_hang_(params.crash_on_hang), 145 live_threads_threshold_(params.live_threads_threshold), 146 weak_ptr_factory_(this) { 147 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 148 Initialize(); 149} 150 151ThreadWatcher::~ThreadWatcher() {} 152 153// static 154void ThreadWatcher::StartWatching(const WatchingParams& params) { 155 DCHECK_GE(params.sleep_time.InMilliseconds(), 0); 156 DCHECK_GE(params.unresponsive_time.InMilliseconds(), 157 params.sleep_time.InMilliseconds()); 158 159 // If we are not on WatchDogThread, then post a task to call StartWatching on 160 // WatchDogThread. 161 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 162 WatchDogThread::PostTask( 163 FROM_HERE, 164 base::Bind(&ThreadWatcher::StartWatching, params)); 165 return; 166 } 167 168 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 169 170 // Create a new thread watcher object for the given thread and activate it. 171 ThreadWatcher* watcher = new ThreadWatcher(params); 172 173 DCHECK(watcher); 174 // If we couldn't register the thread watcher object, we are shutting down, 175 // then don't activate thread watching. 176 if (!ThreadWatcherList::IsRegistered(params.thread_id)) 177 return; 178 watcher->ActivateThreadWatching(); 179} 180 181void ThreadWatcher::ActivateThreadWatching() { 182 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 183 if (active_) return; 184 active_ = true; 185 ping_count_ = unresponsive_threshold_; 186 ResetHangCounters(); 187 base::MessageLoop::current()->PostTask( 188 FROM_HERE, 189 base::Bind(&ThreadWatcher::PostPingMessage, 190 weak_ptr_factory_.GetWeakPtr())); 191} 192 193void ThreadWatcher::DeActivateThreadWatching() { 194 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 195 active_ = false; 196 ping_count_ = 0; 197 weak_ptr_factory_.InvalidateWeakPtrs(); 198} 199 200void ThreadWatcher::WakeUp() { 201 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 202 // There is some user activity, PostPingMessage task of thread watcher if 203 // needed. 204 if (!active_) return; 205 206 // Throw away the previous |unresponsive_count_| and start over again. Just 207 // before going to sleep, |unresponsive_count_| could be very close to 208 // |unresponsive_threshold_| and when user becomes active, 209 // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no 210 // response for ping messages. Reset |unresponsive_count_| to start measuring 211 // the unresponsiveness of the threads when system becomes active. 212 unresponsive_count_ = 0; 213 214 if (ping_count_ <= 0) { 215 ping_count_ = unresponsive_threshold_; 216 ResetHangCounters(); 217 PostPingMessage(); 218 } else { 219 ping_count_ = unresponsive_threshold_; 220 } 221} 222 223void ThreadWatcher::PostPingMessage() { 224 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 225 // If we have stopped watching or if the user is idle, then stop sending 226 // ping messages. 227 if (!active_ || ping_count_ <= 0) 228 return; 229 230 // Save the current time when we have sent ping message. 231 ping_time_ = base::TimeTicks::Now(); 232 233 // Send a ping message to the watched thread. Callback will be called on 234 // the WatchDogThread. 235 base::Closure callback( 236 base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(), 237 ping_sequence_number_)); 238 if (watched_loop_->PostTask( 239 FROM_HERE, 240 base::Bind(&ThreadWatcher::OnPingMessage, thread_id_, 241 callback))) { 242 // Post a task to check the responsiveness of watched thread. 243 base::MessageLoop::current()->PostDelayedTask( 244 FROM_HERE, 245 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 246 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 247 unresponsive_time_); 248 } else { 249 // Watched thread might have gone away, stop watching it. 250 DeActivateThreadWatching(); 251 } 252} 253 254void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) { 255 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 256 257 // Record watched thread's response time. 258 base::TimeTicks now = base::TimeTicks::Now(); 259 base::TimeDelta response_time = now - ping_time_; 260 response_time_histogram_->AddTime(response_time); 261 262 // Save the current time when we have got pong message. 263 pong_time_ = now; 264 265 // Check if there are any extra pings in flight. 266 DCHECK_EQ(ping_sequence_number_, ping_sequence_number); 267 if (ping_sequence_number_ != ping_sequence_number) 268 return; 269 270 // Increment sequence number for the next ping message to indicate watched 271 // thread is responsive. 272 ++ping_sequence_number_; 273 274 // If we have stopped watching or if the user is idle, then stop sending 275 // ping messages. 276 if (!active_ || --ping_count_ <= 0) 277 return; 278 279 base::MessageLoop::current()->PostDelayedTask( 280 FROM_HERE, 281 base::Bind(&ThreadWatcher::PostPingMessage, 282 weak_ptr_factory_.GetWeakPtr()), 283 sleep_time_); 284} 285 286void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) { 287 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 288 // If we have stopped watching then consider thread as responding. 289 if (!active_) { 290 responsive_ = true; 291 return; 292 } 293 // If the latest ping_sequence_number_ is not same as the ping_sequence_number 294 // that is passed in, then we can assume OnPongMessage was called. 295 // OnPongMessage increments ping_sequence_number_. 296 if (ping_sequence_number_ != ping_sequence_number) { 297 // Reset unresponsive_count_ to zero because we got a response from the 298 // watched thread. 299 ResetHangCounters(); 300 301 responsive_ = true; 302 return; 303 } 304 // Record that we got no response from watched thread. 305 GotNoResponse(); 306 307 // Post a task to check the responsiveness of watched thread. 308 base::MessageLoop::current()->PostDelayedTask( 309 FROM_HERE, 310 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 311 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 312 unresponsive_time_); 313 responsive_ = false; 314} 315 316void ThreadWatcher::Initialize() { 317 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 318 ThreadWatcherList::Register(this); 319 320 const std::string response_time_histogram_name = 321 "ThreadWatcher.ResponseTime." + thread_name_; 322 response_time_histogram_ = base::Histogram::FactoryTimeGet( 323 response_time_histogram_name, 324 base::TimeDelta::FromMilliseconds(1), 325 base::TimeDelta::FromSeconds(100), 50, 326 base::Histogram::kUmaTargetedHistogramFlag); 327 328 const std::string unresponsive_time_histogram_name = 329 "ThreadWatcher.Unresponsive." + thread_name_; 330 unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet( 331 unresponsive_time_histogram_name, 332 base::TimeDelta::FromMilliseconds(1), 333 base::TimeDelta::FromSeconds(100), 50, 334 base::Histogram::kUmaTargetedHistogramFlag); 335 336 const std::string responsive_count_histogram_name = 337 "ThreadWatcher.ResponsiveThreads." + thread_name_; 338 responsive_count_histogram_ = base::LinearHistogram::FactoryGet( 339 responsive_count_histogram_name, 1, 10, 11, 340 base::Histogram::kUmaTargetedHistogramFlag); 341 342 const std::string unresponsive_count_histogram_name = 343 "ThreadWatcher.UnresponsiveThreads." + thread_name_; 344 unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet( 345 unresponsive_count_histogram_name, 1, 10, 11, 346 base::Histogram::kUmaTargetedHistogramFlag); 347} 348 349// static 350void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id, 351 const base::Closure& callback_task) { 352 // This method is called on watched thread. 353 DCHECK(BrowserThread::CurrentlyOn(thread_id)); 354 WatchDogThread::PostTask(FROM_HERE, callback_task); 355} 356 357void ThreadWatcher::ResetHangCounters() { 358 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 359 unresponsive_count_ = 0; 360 hung_processing_complete_ = false; 361} 362 363void ThreadWatcher::GotNoResponse() { 364 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 365 366 ++unresponsive_count_; 367 if (!IsVeryUnresponsive()) 368 return; 369 370 // Record total unresponsive_time since last pong message. 371 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; 372 unresponsive_time_histogram_->AddTime(unresponse_time); 373 374 // We have already collected stats for the non-responding watched thread. 375 if (hung_processing_complete_) 376 return; 377 378 // Record how other threads are responding. 379 uint32 responding_thread_count = 0; 380 uint32 unresponding_thread_count = 0; 381 ThreadWatcherList::GetStatusOfThreads(&responding_thread_count, 382 &unresponding_thread_count); 383 384 // Record how many watched threads are responding. 385 responsive_count_histogram_->Add(responding_thread_count); 386 387 // Record how many watched threads are not responding. 388 unresponsive_count_histogram_->Add(unresponding_thread_count); 389 390 // Crash the browser if the watched thread is to be crashed on hang and if the 391 // number of other threads responding is less than or equal to 392 // live_threads_threshold_ and at least one other thread is responding. 393 if (crash_on_hang_ && 394 responding_thread_count > 0 && 395 responding_thread_count <= live_threads_threshold_) { 396 static bool crashed_once = false; 397 if (!crashed_once) { 398 crashed_once = true; 399 CrashBecauseThreadWasUnresponsive(thread_id_); 400 } 401 } 402 403 hung_processing_complete_ = true; 404} 405 406bool ThreadWatcher::IsVeryUnresponsive() { 407 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 408 return unresponsive_count_ >= unresponsive_threshold_; 409} 410 411// ThreadWatcherList methods and members. 412// 413// static 414ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL; 415// static 416bool ThreadWatcherList::g_stopped_ = false; 417// static 418const int ThreadWatcherList::kSleepSeconds = 1; 419// static 420const int ThreadWatcherList::kUnresponsiveSeconds = 2; 421// static 422const int ThreadWatcherList::kUnresponsiveCount = 9; 423// static 424const int ThreadWatcherList::kLiveThreadsThreshold = 2; 425// static, non-const for tests. 426int ThreadWatcherList::g_initialize_delay_seconds = 120; 427 428ThreadWatcherList::CrashDataThresholds::CrashDataThresholds( 429 uint32 live_threads_threshold, 430 uint32 unresponsive_threshold) 431 : live_threads_threshold(live_threads_threshold), 432 unresponsive_threshold(unresponsive_threshold) { 433} 434 435ThreadWatcherList::CrashDataThresholds::CrashDataThresholds() 436 : live_threads_threshold(kLiveThreadsThreshold), 437 unresponsive_threshold(kUnresponsiveCount) { 438} 439 440// static 441void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) { 442 // TODO(rtenneti): Enable ThreadWatcher. 443 uint32 unresponsive_threshold; 444 CrashOnHangThreadMap crash_on_hang_threads; 445 ParseCommandLine(command_line, 446 &unresponsive_threshold, 447 &crash_on_hang_threads); 448 449 ThreadWatcherObserver::SetupNotifications( 450 base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold)); 451 452 WatchDogThread::PostTask( 453 FROM_HERE, 454 base::Bind(&ThreadWatcherList::SetStopped, false)); 455 456 WatchDogThread::PostDelayedTask( 457 FROM_HERE, 458 base::Bind(&ThreadWatcherList::InitializeAndStartWatching, 459 unresponsive_threshold, 460 crash_on_hang_threads), 461 base::TimeDelta::FromSeconds(g_initialize_delay_seconds)); 462} 463 464// static 465void ThreadWatcherList::StopWatchingAll() { 466 // TODO(rtenneti): Enable ThreadWatcher. 467 ThreadWatcherObserver::RemoveNotifications(); 468 DeleteAll(); 469} 470 471// static 472void ThreadWatcherList::Register(ThreadWatcher* watcher) { 473 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 474 if (!g_thread_watcher_list_) 475 return; 476 DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id())); 477 g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher; 478} 479 480// static 481bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) { 482 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 483 return NULL != ThreadWatcherList::Find(thread_id); 484} 485 486// static 487void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count, 488 uint32* unresponding_thread_count) { 489 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 490 *responding_thread_count = 0; 491 *unresponding_thread_count = 0; 492 if (!g_thread_watcher_list_) 493 return; 494 495 for (RegistrationList::iterator it = 496 g_thread_watcher_list_->registered_.begin(); 497 g_thread_watcher_list_->registered_.end() != it; 498 ++it) { 499 if (it->second->IsVeryUnresponsive()) 500 ++(*unresponding_thread_count); 501 else 502 ++(*responding_thread_count); 503 } 504} 505 506// static 507void ThreadWatcherList::WakeUpAll() { 508 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 509 if (!g_thread_watcher_list_) 510 return; 511 512 for (RegistrationList::iterator it = 513 g_thread_watcher_list_->registered_.begin(); 514 g_thread_watcher_list_->registered_.end() != it; 515 ++it) 516 it->second->WakeUp(); 517} 518 519ThreadWatcherList::ThreadWatcherList() { 520 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 521 CHECK(!g_thread_watcher_list_); 522 g_thread_watcher_list_ = this; 523} 524 525ThreadWatcherList::~ThreadWatcherList() { 526 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 527 DCHECK(this == g_thread_watcher_list_); 528 g_thread_watcher_list_ = NULL; 529} 530 531// static 532void ThreadWatcherList::ParseCommandLine( 533 const CommandLine& command_line, 534 uint32* unresponsive_threshold, 535 CrashOnHangThreadMap* crash_on_hang_threads) { 536 // Initialize |unresponsive_threshold| to a default value. 537 // TODO(rtenneti): Changed the default value to 4 times, until we can triage 538 // hangs automatically (and to reduce the crash dumps). 539 *unresponsive_threshold = kUnresponsiveCount * 4; 540 541 // Increase the unresponsive_threshold on the Stable and Beta channels to 542 // reduce the number of crashes due to ThreadWatcher. 543 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 544 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 545 *unresponsive_threshold *= 4; 546 } else if (channel == chrome::VersionInfo::CHANNEL_BETA) { 547 *unresponsive_threshold *= 2; 548 } 549 550#if defined(OS_WIN) 551 // For Windows XP (old systems), double the unresponsive_threshold to give 552 // the OS a chance to schedule UI/IO threads a time slice to respond with a 553 // pong message (to get around limitations with the OS). 554 if (base::win::GetVersion() <= base::win::VERSION_XP) 555 *unresponsive_threshold *= 2; 556#endif 557 558 uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds; 559 std::string crash_on_hang_thread_names; 560 bool has_command_line_overwrite = false; 561 if (command_line.HasSwitch(switches::kCrashOnHangThreads)) { 562 crash_on_hang_thread_names = 563 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); 564 has_command_line_overwrite = true; 565 } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) { 566 // Default to crashing the browser if UI or IO or FILE threads are not 567 // responsive except in stable channel. 568 crash_on_hang_thread_names = base::StringPrintf( 569 "UI:%d:%d,IO:%d:%d,FILE:%d:%d", 570 kLiveThreadsThreshold, crash_seconds, 571 kLiveThreadsThreshold, crash_seconds, 572 kLiveThreadsThreshold, crash_seconds * 5); 573 } 574 575 ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names, 576 kLiveThreadsThreshold, 577 crash_seconds, 578 crash_on_hang_threads); 579 580 if (channel != chrome::VersionInfo::CHANNEL_CANARY || 581 has_command_line_overwrite) { 582 return; 583 } 584 585 const char* kFieldTrialName = "ThreadWatcher"; 586 587 // Nothing else to be done if the trial has already been set (i.e., when 588 // StartWatchingAll() has been already called once). 589 if (base::FieldTrialList::TrialExists(kFieldTrialName)) 590 return; 591 592 // Set up a field trial for 100% of the users to crash if either UI or IO 593 // thread is not responsive for 30 seconds (or 15 pings). 594 scoped_refptr<base::FieldTrial> field_trial( 595 base::FieldTrialList::FactoryGetFieldTrial( 596 kFieldTrialName, 100, "default_hung_threads", 597 2014, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL)); 598 int hung_thread_group = field_trial->AppendGroup("hung_thread", 100); 599 if (field_trial->group() == hung_thread_group) { 600 for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin(); 601 crash_on_hang_threads->end() != it; 602 ++it) { 603 if (it->first == "FILE") 604 continue; 605 it->second.live_threads_threshold = INT_MAX; 606 if (it->first == "UI") { 607 // TODO(rtenneti): set unresponsive threshold to 120 seconds to catch 608 // the worst UI hangs and for fewer crashes due to ThreadWatcher. Reduce 609 // it to a more reasonable time ala IO thread. 610 it->second.unresponsive_threshold = 60; 611 } else { 612 it->second.unresponsive_threshold = 15; 613 } 614 } 615 } 616} 617 618// static 619void ThreadWatcherList::ParseCommandLineCrashOnHangThreads( 620 const std::string& crash_on_hang_thread_names, 621 uint32 default_live_threads_threshold, 622 uint32 default_crash_seconds, 623 CrashOnHangThreadMap* crash_on_hang_threads) { 624 base::StringTokenizer tokens(crash_on_hang_thread_names, ","); 625 std::vector<std::string> values; 626 while (tokens.GetNext()) { 627 const std::string& token = tokens.token(); 628 base::SplitString(token, ':', &values); 629 std::string thread_name = values[0]; 630 631 uint32 live_threads_threshold = default_live_threads_threshold; 632 uint32 crash_seconds = default_crash_seconds; 633 if (values.size() >= 2 && 634 (!base::StringToUint(values[1], &live_threads_threshold))) { 635 continue; 636 } 637 if (values.size() >= 3 && 638 (!base::StringToUint(values[2], &crash_seconds))) { 639 continue; 640 } 641 uint32 unresponsive_threshold = static_cast<uint32>( 642 ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); 643 644 CrashDataThresholds crash_data(live_threads_threshold, 645 unresponsive_threshold); 646 // Use the last specifier. 647 (*crash_on_hang_threads)[thread_name] = crash_data; 648 } 649} 650 651// static 652void ThreadWatcherList::InitializeAndStartWatching( 653 uint32 unresponsive_threshold, 654 const CrashOnHangThreadMap& crash_on_hang_threads) { 655 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 656 657 // Disarm the startup timebomb, even if stop has been called. 658 BrowserThread::PostTask( 659 BrowserThread::UI, 660 FROM_HERE, 661 base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb)); 662 663 // This method is deferred in relationship to its StopWatchingAll() 664 // counterpart. If a previous initialization has already happened, or if 665 // stop has been called, there's nothing left to do here. 666 if (g_thread_watcher_list_ || g_stopped_) 667 return; 668 669 ThreadWatcherList* thread_watcher_list = new ThreadWatcherList(); 670 CHECK(thread_watcher_list); 671 672 const base::TimeDelta kSleepTime = 673 base::TimeDelta::FromSeconds(kSleepSeconds); 674 const base::TimeDelta kUnresponsiveTime = 675 base::TimeDelta::FromSeconds(kUnresponsiveSeconds); 676 677 StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime, 678 unresponsive_threshold, crash_on_hang_threads); 679 StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime, 680 unresponsive_threshold, crash_on_hang_threads); 681 StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime, 682 unresponsive_threshold, crash_on_hang_threads); 683 StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime, 684 unresponsive_threshold, crash_on_hang_threads); 685 StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime, 686 unresponsive_threshold, crash_on_hang_threads); 687} 688 689// static 690void ThreadWatcherList::StartWatching( 691 const BrowserThread::ID& thread_id, 692 const std::string& thread_name, 693 const base::TimeDelta& sleep_time, 694 const base::TimeDelta& unresponsive_time, 695 uint32 unresponsive_threshold, 696 const CrashOnHangThreadMap& crash_on_hang_threads) { 697 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 698 699 CrashOnHangThreadMap::const_iterator it = 700 crash_on_hang_threads.find(thread_name); 701 bool crash_on_hang = false; 702 uint32 live_threads_threshold = 0; 703 if (it != crash_on_hang_threads.end()) { 704 crash_on_hang = true; 705 live_threads_threshold = it->second.live_threads_threshold; 706 unresponsive_threshold = it->second.unresponsive_threshold; 707 } 708 709 ThreadWatcher::StartWatching( 710 ThreadWatcher::WatchingParams(thread_id, 711 thread_name, 712 sleep_time, 713 unresponsive_time, 714 unresponsive_threshold, 715 crash_on_hang, 716 live_threads_threshold)); 717} 718 719// static 720void ThreadWatcherList::DeleteAll() { 721 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 722 WatchDogThread::PostTask( 723 FROM_HERE, 724 base::Bind(&ThreadWatcherList::DeleteAll)); 725 return; 726 } 727 728 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 729 730 SetStopped(true); 731 732 if (!g_thread_watcher_list_) 733 return; 734 735 // Delete all thread watcher objects. 736 while (!g_thread_watcher_list_->registered_.empty()) { 737 RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin(); 738 delete it->second; 739 g_thread_watcher_list_->registered_.erase(it); 740 } 741 742 delete g_thread_watcher_list_; 743} 744 745// static 746ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) { 747 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 748 if (!g_thread_watcher_list_) 749 return NULL; 750 RegistrationList::iterator it = 751 g_thread_watcher_list_->registered_.find(thread_id); 752 if (g_thread_watcher_list_->registered_.end() == it) 753 return NULL; 754 return it->second; 755} 756 757// static 758void ThreadWatcherList::SetStopped(bool stopped) { 759 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 760 g_stopped_ = stopped; 761} 762 763// ThreadWatcherObserver methods and members. 764// 765// static 766ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL; 767 768ThreadWatcherObserver::ThreadWatcherObserver( 769 const base::TimeDelta& wakeup_interval) 770 : last_wakeup_time_(base::TimeTicks::Now()), 771 wakeup_interval_(wakeup_interval) { 772 CHECK(!g_thread_watcher_observer_); 773 g_thread_watcher_observer_ = this; 774} 775 776ThreadWatcherObserver::~ThreadWatcherObserver() { 777 DCHECK(this == g_thread_watcher_observer_); 778 g_thread_watcher_observer_ = NULL; 779} 780 781// static 782void ThreadWatcherObserver::SetupNotifications( 783 const base::TimeDelta& wakeup_interval) { 784 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 785 ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval); 786 observer->registrar_.Add( 787 observer, 788 chrome::NOTIFICATION_BROWSER_OPENED, 789 content::NotificationService::AllBrowserContextsAndSources()); 790 observer->registrar_.Add(observer, 791 chrome::NOTIFICATION_BROWSER_CLOSED, 792 content::NotificationService::AllSources()); 793 observer->registrar_.Add(observer, 794 chrome::NOTIFICATION_TAB_PARENTED, 795 content::NotificationService::AllSources()); 796 observer->registrar_.Add(observer, 797 chrome::NOTIFICATION_TAB_CLOSING, 798 content::NotificationService::AllSources()); 799 observer->registrar_.Add(observer, 800 content::NOTIFICATION_LOAD_START, 801 content::NotificationService::AllSources()); 802 observer->registrar_.Add(observer, 803 content::NOTIFICATION_LOAD_STOP, 804 content::NotificationService::AllSources()); 805 observer->registrar_.Add(observer, 806 content::NOTIFICATION_RENDERER_PROCESS_CLOSED, 807 content::NotificationService::AllSources()); 808 observer->registrar_.Add(observer, 809 content::NOTIFICATION_RENDER_WIDGET_HOST_HANG, 810 content::NotificationService::AllSources()); 811 observer->registrar_.Add(observer, 812 chrome::NOTIFICATION_OMNIBOX_OPENED_URL, 813 content::NotificationService::AllSources()); 814} 815 816// static 817void ThreadWatcherObserver::RemoveNotifications() { 818 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 819 if (!g_thread_watcher_observer_) 820 return; 821 g_thread_watcher_observer_->registrar_.RemoveAll(); 822 delete g_thread_watcher_observer_; 823} 824 825void ThreadWatcherObserver::Observe( 826 int type, 827 const content::NotificationSource& source, 828 const content::NotificationDetails& details) { 829 // There is some user activity, see if thread watchers are to be awakened. 830 base::TimeTicks now = base::TimeTicks::Now(); 831 if ((now - last_wakeup_time_) < wakeup_interval_) 832 return; 833 last_wakeup_time_ = now; 834 WatchDogThread::PostTask( 835 FROM_HERE, 836 base::Bind(&ThreadWatcherList::WakeUpAll)); 837} 838 839// WatchDogThread methods and members. 840 841// This lock protects g_watchdog_thread. 842static base::LazyInstance<base::Lock>::Leaky 843 g_watchdog_lock = LAZY_INSTANCE_INITIALIZER; 844 845// The singleton of this class. 846static WatchDogThread* g_watchdog_thread = NULL; 847 848WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") { 849} 850 851WatchDogThread::~WatchDogThread() { 852 Stop(); 853} 854 855// static 856bool WatchDogThread::CurrentlyOnWatchDogThread() { 857 base::AutoLock lock(g_watchdog_lock.Get()); 858 return g_watchdog_thread && 859 g_watchdog_thread->message_loop() == base::MessageLoop::current(); 860} 861 862// static 863bool WatchDogThread::PostTask(const tracked_objects::Location& from_here, 864 const base::Closure& task) { 865 return PostTaskHelper(from_here, task, base::TimeDelta()); 866} 867 868// static 869bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here, 870 const base::Closure& task, 871 base::TimeDelta delay) { 872 return PostTaskHelper(from_here, task, delay); 873} 874 875// static 876bool WatchDogThread::PostTaskHelper( 877 const tracked_objects::Location& from_here, 878 const base::Closure& task, 879 base::TimeDelta delay) { 880 { 881 base::AutoLock lock(g_watchdog_lock.Get()); 882 883 base::MessageLoop* message_loop = g_watchdog_thread ? 884 g_watchdog_thread->message_loop() : NULL; 885 if (message_loop) { 886 message_loop->PostDelayedTask(from_here, task, delay); 887 return true; 888 } 889 } 890 891 return false; 892} 893 894void WatchDogThread::Init() { 895 // This thread shouldn't be allowed to perform any blocking disk I/O. 896 base::ThreadRestrictions::SetIOAllowed(false); 897 898 base::AutoLock lock(g_watchdog_lock.Get()); 899 CHECK(!g_watchdog_thread); 900 g_watchdog_thread = this; 901} 902 903void WatchDogThread::CleanUp() { 904 base::AutoLock lock(g_watchdog_lock.Get()); 905 g_watchdog_thread = NULL; 906} 907 908namespace { 909 910// StartupWatchDogThread methods and members. 911// 912// Class for detecting hangs during startup. 913class StartupWatchDogThread : public base::Watchdog { 914 public: 915 // Constructor specifies how long the StartupWatchDogThread will wait before 916 // alarming. 917 explicit StartupWatchDogThread(const base::TimeDelta& duration) 918 : base::Watchdog(duration, "Startup watchdog thread", true) { 919#if defined(OS_ANDROID) 920 // TODO(rtenneti): Delete this code, after getting data. 921 start_time_clock_= base::Time::Now(); 922 start_time_monotonic_ = base::TimeTicks::Now(); 923 start_time_thread_now_ = base::TimeTicks::IsThreadNowSupported() 924 ? base::TimeTicks::ThreadNow() : base::TimeTicks::Now(); 925#endif // OS_ANDROID 926 } 927 928 // Alarm is called if the time expires after an Arm() without someone calling 929 // Disarm(). When Alarm goes off, in release mode we get the crash dump 930 // without crashing and in debug mode we break into the debugger. 931 virtual void Alarm() OVERRIDE { 932#if !defined(NDEBUG) 933 StartupHang(); 934 return; 935#elif !defined(OS_ANDROID) 936 WatchDogThread::PostTask(FROM_HERE, base::Bind(&StartupHang)); 937 return; 938#else // Android release: gather stats to figure out when to crash. 939 // TODO(rtenneti): Delete this code, after getting data. 940 UMA_HISTOGRAM_TIMES("StartupTimeBomb.Alarm.TimeDuration", 941 base::Time::Now() - start_time_clock_); 942 UMA_HISTOGRAM_TIMES("StartupTimeBomb.Alarm.TimeTicksDuration", 943 base::TimeTicks::Now() - start_time_monotonic_); 944 if (base::TimeTicks::IsThreadNowSupported()) { 945 UMA_HISTOGRAM_TIMES( 946 "StartupTimeBomb.Alarm.ThreadNowDuration", 947 base::TimeTicks::ThreadNow() - start_time_thread_now_); 948 } 949 return; 950#endif // OS_ANDROID 951 } 952 953 private: 954#if defined(OS_ANDROID) 955 // TODO(rtenneti): Delete this code, after getting data. 956 base::Time start_time_clock_; 957 base::TimeTicks start_time_monotonic_; 958 base::TimeTicks start_time_thread_now_; 959#endif // OS_ANDROID 960 961 DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread); 962}; 963 964// ShutdownWatchDogThread methods and members. 965// 966// Class for detecting hangs during shutdown. 967class ShutdownWatchDogThread : public base::Watchdog { 968 public: 969 // Constructor specifies how long the ShutdownWatchDogThread will wait before 970 // alarming. 971 explicit ShutdownWatchDogThread(const base::TimeDelta& duration) 972 : base::Watchdog(duration, "Shutdown watchdog thread", true) { 973 } 974 975 // Alarm is called if the time expires after an Arm() without someone calling 976 // Disarm(). We crash the browser if this method is called. 977 virtual void Alarm() OVERRIDE { 978 ShutdownHang(); 979 } 980 981 private: 982 DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread); 983}; 984} // namespace 985 986// StartupTimeBomb methods and members. 987// 988// static 989StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL; 990 991StartupTimeBomb::StartupTimeBomb() 992 : startup_watchdog_(NULL), 993 thread_id_(base::PlatformThread::CurrentId()) { 994 CHECK(!g_startup_timebomb_); 995 g_startup_timebomb_ = this; 996} 997 998StartupTimeBomb::~StartupTimeBomb() { 999 DCHECK(this == g_startup_timebomb_); 1000 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1001 if (startup_watchdog_) 1002 Disarm(); 1003 g_startup_timebomb_ = NULL; 1004} 1005 1006void StartupTimeBomb::Arm(const base::TimeDelta& duration) { 1007 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1008 DCHECK(!startup_watchdog_); 1009 startup_watchdog_ = new StartupWatchDogThread(duration); 1010 startup_watchdog_->Arm(); 1011 return; 1012} 1013 1014void StartupTimeBomb::Disarm() { 1015 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1016 if (startup_watchdog_) { 1017 startup_watchdog_->Disarm(); 1018 startup_watchdog_->Cleanup(); 1019 DeleteStartupWatchdog(); 1020 } 1021} 1022 1023void StartupTimeBomb::DeleteStartupWatchdog() { 1024 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1025 if (startup_watchdog_->IsJoinable()) { 1026 // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns 1027 // very fast. 1028 base::ThreadRestrictions::SetIOAllowed(true); 1029 delete startup_watchdog_; 1030 startup_watchdog_ = NULL; 1031 return; 1032 } 1033 base::MessageLoop::current()->PostDelayedTask( 1034 FROM_HERE, 1035 base::Bind(&StartupTimeBomb::DeleteStartupWatchdog, 1036 base::Unretained(this)), 1037 base::TimeDelta::FromSeconds(10)); 1038} 1039 1040// static 1041void StartupTimeBomb::DisarmStartupTimeBomb() { 1042 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 1043 if (g_startup_timebomb_) 1044 g_startup_timebomb_->Disarm(); 1045} 1046 1047// ShutdownWatcherHelper methods and members. 1048// 1049// ShutdownWatcherHelper is a wrapper class for detecting hangs during 1050// shutdown. 1051ShutdownWatcherHelper::ShutdownWatcherHelper() 1052 : shutdown_watchdog_(NULL), 1053 thread_id_(base::PlatformThread::CurrentId()) { 1054} 1055 1056ShutdownWatcherHelper::~ShutdownWatcherHelper() { 1057 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1058 if (shutdown_watchdog_) { 1059 shutdown_watchdog_->Disarm(); 1060 delete shutdown_watchdog_; 1061 shutdown_watchdog_ = NULL; 1062 } 1063} 1064 1065void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) { 1066 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1067 DCHECK(!shutdown_watchdog_); 1068 base::TimeDelta actual_duration = duration; 1069 1070 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 1071 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 1072 actual_duration *= 20; 1073 } else if (channel == chrome::VersionInfo::CHANNEL_BETA || 1074 channel == chrome::VersionInfo::CHANNEL_DEV) { 1075 actual_duration *= 10; 1076 } 1077 1078#if defined(OS_WIN) 1079 // On Windows XP, give twice the time for shutdown. 1080 if (base::win::GetVersion() <= base::win::VERSION_XP) 1081 actual_duration *= 2; 1082#endif 1083 1084 shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration); 1085 shutdown_watchdog_->Arm(); 1086} 1087