1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/metrics/thread_watcher.h" 6 7#include <math.h> // ceil 8 9#include "base/bind.h" 10#include "base/compiler_specific.h" 11#include "base/debug/alias.h" 12#include "base/debug/dump_without_crashing.h" 13#include "base/lazy_instance.h" 14#include "base/metrics/field_trial.h" 15#include "base/strings/string_number_conversions.h" 16#include "base/strings/string_split.h" 17#include "base/strings/string_tokenizer.h" 18#include "base/strings/stringprintf.h" 19#include "base/threading/thread_restrictions.h" 20#include "build/build_config.h" 21#include "chrome/browser/chrome_notification_types.h" 22#include "chrome/common/chrome_switches.h" 23#include "chrome/common/chrome_version_info.h" 24#include "chrome/common/logging_chrome.h" 25#include "content/public/browser/notification_service.h" 26 27#if defined(OS_WIN) 28#include "base/win/windows_version.h" 29#endif 30 31using content::BrowserThread; 32 33namespace { 34 35// The following are unique function names for forcing the crash when a thread 36// is unresponsive. This makes it possible to tell from the callstack alone what 37// thread was unresponsive. 38// 39// We disable optimizations for this block of functions so the compiler doesn't 40// merge them all together. 41MSVC_DISABLE_OPTIMIZE() 42MSVC_PUSH_DISABLE_WARNING(4748) 43 44#ifndef NDEBUG 45int* NullPointer() { 46 return reinterpret_cast<int*>(NULL); 47} 48#endif 49 50void NullPointerCrash(int line_number) { 51#ifndef NDEBUG 52 *NullPointer() = line_number; // Crash. 53#else 54 base::debug::DumpWithoutCrashing(); 55#endif 56} 57 58#if !defined(OS_ANDROID) || !defined(NDEBUG) 59// TODO(rtenneti): Enabled crashing, after getting data. 60NOINLINE void StartupCrash() { 61 NullPointerCrash(__LINE__); 62} 63#endif // OS_ANDROID 64 65NOINLINE void ShutdownCrash() { 66 NullPointerCrash(__LINE__); 67} 68 69NOINLINE void ThreadUnresponsive_UI() { 70 NullPointerCrash(__LINE__); 71} 72 73NOINLINE void ThreadUnresponsive_DB() { 74 NullPointerCrash(__LINE__); 75} 76 77NOINLINE void ThreadUnresponsive_FILE() { 78 NullPointerCrash(__LINE__); 79} 80 81NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() { 82 NullPointerCrash(__LINE__); 83} 84 85NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() { 86 NullPointerCrash(__LINE__); 87} 88 89NOINLINE void ThreadUnresponsive_CACHE() { 90 NullPointerCrash(__LINE__); 91} 92 93NOINLINE void ThreadUnresponsive_IO() { 94 NullPointerCrash(__LINE__); 95} 96 97MSVC_POP_WARNING() 98MSVC_ENABLE_OPTIMIZE(); 99 100void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) { 101 base::debug::Alias(&thread_id); 102 103 switch (thread_id) { 104 case BrowserThread::UI: 105 return ThreadUnresponsive_UI(); 106 case BrowserThread::DB: 107 return ThreadUnresponsive_DB(); 108 case BrowserThread::FILE: 109 return ThreadUnresponsive_FILE(); 110 case BrowserThread::FILE_USER_BLOCKING: 111 return ThreadUnresponsive_FILE_USER_BLOCKING(); 112 case BrowserThread::PROCESS_LAUNCHER: 113 return ThreadUnresponsive_PROCESS_LAUNCHER(); 114 case BrowserThread::CACHE: 115 return ThreadUnresponsive_CACHE(); 116 case BrowserThread::IO: 117 return ThreadUnresponsive_IO(); 118 case BrowserThread::ID_COUNT: 119 CHECK(false); // This shouldn't actually be reached! 120 break; 121 122 // Omission of the default hander is intentional -- that way the compiler 123 // should warn if our switch becomes outdated. 124 } 125 126 CHECK(false) << "Unknown thread was unresponsive."; // Shouldn't be reached. 127} 128 129} // namespace 130 131// ThreadWatcher methods and members. 132ThreadWatcher::ThreadWatcher(const WatchingParams& params) 133 : thread_id_(params.thread_id), 134 thread_name_(params.thread_name), 135 watched_loop_( 136 BrowserThread::GetMessageLoopProxyForThread(params.thread_id)), 137 sleep_time_(params.sleep_time), 138 unresponsive_time_(params.unresponsive_time), 139 ping_time_(base::TimeTicks::Now()), 140 pong_time_(ping_time_), 141 ping_sequence_number_(0), 142 active_(false), 143 ping_count_(params.unresponsive_threshold), 144 response_time_histogram_(NULL), 145 unresponsive_time_histogram_(NULL), 146 unresponsive_count_(0), 147 hung_processing_complete_(false), 148 unresponsive_threshold_(params.unresponsive_threshold), 149 crash_on_hang_(params.crash_on_hang), 150 live_threads_threshold_(params.live_threads_threshold), 151 weak_ptr_factory_(this) { 152 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 153 Initialize(); 154} 155 156ThreadWatcher::~ThreadWatcher() {} 157 158// static 159void ThreadWatcher::StartWatching(const WatchingParams& params) { 160 DCHECK_GE(params.sleep_time.InMilliseconds(), 0); 161 DCHECK_GE(params.unresponsive_time.InMilliseconds(), 162 params.sleep_time.InMilliseconds()); 163 164 // If we are not on WatchDogThread, then post a task to call StartWatching on 165 // WatchDogThread. 166 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 167 WatchDogThread::PostTask( 168 FROM_HERE, 169 base::Bind(&ThreadWatcher::StartWatching, params)); 170 return; 171 } 172 173 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 174 175 // Create a new thread watcher object for the given thread and activate it. 176 ThreadWatcher* watcher = new ThreadWatcher(params); 177 178 DCHECK(watcher); 179 // If we couldn't register the thread watcher object, we are shutting down, 180 // then don't activate thread watching. 181 if (!ThreadWatcherList::IsRegistered(params.thread_id)) 182 return; 183 watcher->ActivateThreadWatching(); 184} 185 186void ThreadWatcher::ActivateThreadWatching() { 187 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 188 if (active_) return; 189 active_ = true; 190 ping_count_ = unresponsive_threshold_; 191 ResetHangCounters(); 192 base::MessageLoop::current()->PostTask( 193 FROM_HERE, 194 base::Bind(&ThreadWatcher::PostPingMessage, 195 weak_ptr_factory_.GetWeakPtr())); 196} 197 198void ThreadWatcher::DeActivateThreadWatching() { 199 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 200 active_ = false; 201 ping_count_ = 0; 202 weak_ptr_factory_.InvalidateWeakPtrs(); 203} 204 205void ThreadWatcher::WakeUp() { 206 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 207 // There is some user activity, PostPingMessage task of thread watcher if 208 // needed. 209 if (!active_) return; 210 211 // Throw away the previous |unresponsive_count_| and start over again. Just 212 // before going to sleep, |unresponsive_count_| could be very close to 213 // |unresponsive_threshold_| and when user becomes active, 214 // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no 215 // response for ping messages. Reset |unresponsive_count_| to start measuring 216 // the unresponsiveness of the threads when system becomes active. 217 unresponsive_count_ = 0; 218 219 if (ping_count_ <= 0) { 220 ping_count_ = unresponsive_threshold_; 221 ResetHangCounters(); 222 PostPingMessage(); 223 } else { 224 ping_count_ = unresponsive_threshold_; 225 } 226} 227 228void ThreadWatcher::PostPingMessage() { 229 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 230 // If we have stopped watching or if the user is idle, then stop sending 231 // ping messages. 232 if (!active_ || ping_count_ <= 0) 233 return; 234 235 // Save the current time when we have sent ping message. 236 ping_time_ = base::TimeTicks::Now(); 237 238 // Send a ping message to the watched thread. Callback will be called on 239 // the WatchDogThread. 240 base::Closure callback( 241 base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(), 242 ping_sequence_number_)); 243 if (watched_loop_->PostTask( 244 FROM_HERE, 245 base::Bind(&ThreadWatcher::OnPingMessage, thread_id_, 246 callback))) { 247 // Post a task to check the responsiveness of watched thread. 248 base::MessageLoop::current()->PostDelayedTask( 249 FROM_HERE, 250 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 251 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 252 unresponsive_time_); 253 } else { 254 // Watched thread might have gone away, stop watching it. 255 DeActivateThreadWatching(); 256 } 257} 258 259void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) { 260 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 261 262 // Record watched thread's response time. 263 base::TimeTicks now = base::TimeTicks::Now(); 264 base::TimeDelta response_time = now - ping_time_; 265 response_time_histogram_->AddTime(response_time); 266 267 // Save the current time when we have got pong message. 268 pong_time_ = now; 269 270 // Check if there are any extra pings in flight. 271 DCHECK_EQ(ping_sequence_number_, ping_sequence_number); 272 if (ping_sequence_number_ != ping_sequence_number) 273 return; 274 275 // Increment sequence number for the next ping message to indicate watched 276 // thread is responsive. 277 ++ping_sequence_number_; 278 279 // If we have stopped watching or if the user is idle, then stop sending 280 // ping messages. 281 if (!active_ || --ping_count_ <= 0) 282 return; 283 284 base::MessageLoop::current()->PostDelayedTask( 285 FROM_HERE, 286 base::Bind(&ThreadWatcher::PostPingMessage, 287 weak_ptr_factory_.GetWeakPtr()), 288 sleep_time_); 289} 290 291void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) { 292 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 293 // If we have stopped watching then consider thread as responding. 294 if (!active_) { 295 responsive_ = true; 296 return; 297 } 298 // If the latest ping_sequence_number_ is not same as the ping_sequence_number 299 // that is passed in, then we can assume OnPongMessage was called. 300 // OnPongMessage increments ping_sequence_number_. 301 if (ping_sequence_number_ != ping_sequence_number) { 302 // Reset unresponsive_count_ to zero because we got a response from the 303 // watched thread. 304 ResetHangCounters(); 305 306 responsive_ = true; 307 return; 308 } 309 // Record that we got no response from watched thread. 310 GotNoResponse(); 311 312 // Post a task to check the responsiveness of watched thread. 313 base::MessageLoop::current()->PostDelayedTask( 314 FROM_HERE, 315 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 316 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 317 unresponsive_time_); 318 responsive_ = false; 319} 320 321void ThreadWatcher::Initialize() { 322 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 323 ThreadWatcherList::Register(this); 324 325 const std::string response_time_histogram_name = 326 "ThreadWatcher.ResponseTime." + thread_name_; 327 response_time_histogram_ = base::Histogram::FactoryTimeGet( 328 response_time_histogram_name, 329 base::TimeDelta::FromMilliseconds(1), 330 base::TimeDelta::FromSeconds(100), 50, 331 base::Histogram::kUmaTargetedHistogramFlag); 332 333 const std::string unresponsive_time_histogram_name = 334 "ThreadWatcher.Unresponsive." + thread_name_; 335 unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet( 336 unresponsive_time_histogram_name, 337 base::TimeDelta::FromMilliseconds(1), 338 base::TimeDelta::FromSeconds(100), 50, 339 base::Histogram::kUmaTargetedHistogramFlag); 340 341 const std::string responsive_count_histogram_name = 342 "ThreadWatcher.ResponsiveThreads." + thread_name_; 343 responsive_count_histogram_ = base::LinearHistogram::FactoryGet( 344 responsive_count_histogram_name, 1, 10, 11, 345 base::Histogram::kUmaTargetedHistogramFlag); 346 347 const std::string unresponsive_count_histogram_name = 348 "ThreadWatcher.UnresponsiveThreads." + thread_name_; 349 unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet( 350 unresponsive_count_histogram_name, 1, 10, 11, 351 base::Histogram::kUmaTargetedHistogramFlag); 352} 353 354// static 355void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id, 356 const base::Closure& callback_task) { 357 // This method is called on watched thread. 358 DCHECK(BrowserThread::CurrentlyOn(thread_id)); 359 WatchDogThread::PostTask(FROM_HERE, callback_task); 360} 361 362void ThreadWatcher::ResetHangCounters() { 363 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 364 unresponsive_count_ = 0; 365 hung_processing_complete_ = false; 366} 367 368void ThreadWatcher::GotNoResponse() { 369 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 370 371 ++unresponsive_count_; 372 if (!IsVeryUnresponsive()) 373 return; 374 375 // Record total unresponsive_time since last pong message. 376 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; 377 unresponsive_time_histogram_->AddTime(unresponse_time); 378 379 // We have already collected stats for the non-responding watched thread. 380 if (hung_processing_complete_) 381 return; 382 383 // Record how other threads are responding. 384 uint32 responding_thread_count = 0; 385 uint32 unresponding_thread_count = 0; 386 ThreadWatcherList::GetStatusOfThreads(&responding_thread_count, 387 &unresponding_thread_count); 388 389 // Record how many watched threads are responding. 390 responsive_count_histogram_->Add(responding_thread_count); 391 392 // Record how many watched threads are not responding. 393 unresponsive_count_histogram_->Add(unresponding_thread_count); 394 395 // Crash the browser if the watched thread is to be crashed on hang and if the 396 // number of other threads responding is less than or equal to 397 // live_threads_threshold_ and at least one other thread is responding. 398 if (crash_on_hang_ && 399 responding_thread_count > 0 && 400 responding_thread_count <= live_threads_threshold_) { 401 static bool crashed_once = false; 402 if (!crashed_once) { 403 crashed_once = true; 404 CrashBecauseThreadWasUnresponsive(thread_id_); 405 } 406 } 407 408 hung_processing_complete_ = true; 409} 410 411bool ThreadWatcher::IsVeryUnresponsive() { 412 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 413 return unresponsive_count_ >= unresponsive_threshold_; 414} 415 416// ThreadWatcherList methods and members. 417// 418// static 419ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL; 420// static 421bool ThreadWatcherList::g_stopped_ = false; 422// static 423const int ThreadWatcherList::kSleepSeconds = 1; 424// static 425const int ThreadWatcherList::kUnresponsiveSeconds = 2; 426// static 427const int ThreadWatcherList::kUnresponsiveCount = 9; 428// static 429const int ThreadWatcherList::kLiveThreadsThreshold = 2; 430// static, non-const for tests. 431int ThreadWatcherList::g_initialize_delay_seconds = 120; 432 433ThreadWatcherList::CrashDataThresholds::CrashDataThresholds( 434 uint32 live_threads_threshold, 435 uint32 unresponsive_threshold) 436 : live_threads_threshold(live_threads_threshold), 437 unresponsive_threshold(unresponsive_threshold) { 438} 439 440ThreadWatcherList::CrashDataThresholds::CrashDataThresholds() 441 : live_threads_threshold(kLiveThreadsThreshold), 442 unresponsive_threshold(kUnresponsiveCount) { 443} 444 445// static 446void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) { 447 // TODO(rtenneti): Enable ThreadWatcher. 448 uint32 unresponsive_threshold; 449 CrashOnHangThreadMap crash_on_hang_threads; 450 ParseCommandLine(command_line, 451 &unresponsive_threshold, 452 &crash_on_hang_threads); 453 454 ThreadWatcherObserver::SetupNotifications( 455 base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold)); 456 457 WatchDogThread::PostTask( 458 FROM_HERE, 459 base::Bind(&ThreadWatcherList::SetStopped, false)); 460 461 WatchDogThread::PostDelayedTask( 462 FROM_HERE, 463 base::Bind(&ThreadWatcherList::InitializeAndStartWatching, 464 unresponsive_threshold, 465 crash_on_hang_threads), 466 base::TimeDelta::FromSeconds(g_initialize_delay_seconds)); 467} 468 469// static 470void ThreadWatcherList::StopWatchingAll() { 471 // TODO(rtenneti): Enable ThreadWatcher. 472 ThreadWatcherObserver::RemoveNotifications(); 473 DeleteAll(); 474} 475 476// static 477void ThreadWatcherList::Register(ThreadWatcher* watcher) { 478 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 479 if (!g_thread_watcher_list_) 480 return; 481 DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id())); 482 g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher; 483} 484 485// static 486bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) { 487 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 488 return NULL != ThreadWatcherList::Find(thread_id); 489} 490 491// static 492void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count, 493 uint32* unresponding_thread_count) { 494 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 495 *responding_thread_count = 0; 496 *unresponding_thread_count = 0; 497 if (!g_thread_watcher_list_) 498 return; 499 500 for (RegistrationList::iterator it = 501 g_thread_watcher_list_->registered_.begin(); 502 g_thread_watcher_list_->registered_.end() != it; 503 ++it) { 504 if (it->second->IsVeryUnresponsive()) 505 ++(*unresponding_thread_count); 506 else 507 ++(*responding_thread_count); 508 } 509} 510 511// static 512void ThreadWatcherList::WakeUpAll() { 513 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 514 if (!g_thread_watcher_list_) 515 return; 516 517 for (RegistrationList::iterator it = 518 g_thread_watcher_list_->registered_.begin(); 519 g_thread_watcher_list_->registered_.end() != it; 520 ++it) 521 it->second->WakeUp(); 522} 523 524ThreadWatcherList::ThreadWatcherList() { 525 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 526 CHECK(!g_thread_watcher_list_); 527 g_thread_watcher_list_ = this; 528} 529 530ThreadWatcherList::~ThreadWatcherList() { 531 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 532 DCHECK(this == g_thread_watcher_list_); 533 g_thread_watcher_list_ = NULL; 534} 535 536// static 537void ThreadWatcherList::ParseCommandLine( 538 const CommandLine& command_line, 539 uint32* unresponsive_threshold, 540 CrashOnHangThreadMap* crash_on_hang_threads) { 541 // Initialize |unresponsive_threshold| to a default value. 542 *unresponsive_threshold = kUnresponsiveCount; 543 544 // Increase the unresponsive_threshold on the Stable and Beta channels to 545 // reduce the number of crashes due to ThreadWatcher. 546 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 547 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 548 *unresponsive_threshold *= 4; 549 } else if (channel == chrome::VersionInfo::CHANNEL_BETA) { 550 *unresponsive_threshold *= 2; 551 } 552 553#if defined(OS_WIN) 554 // For Windows XP (old systems), double the unresponsive_threshold to give 555 // the OS a chance to schedule UI/IO threads a time slice to respond with a 556 // pong message (to get around limitations with the OS). 557 if (base::win::GetVersion() <= base::win::VERSION_XP) 558 *unresponsive_threshold *= 2; 559#endif 560 561 uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds; 562 std::string crash_on_hang_thread_names; 563 bool has_command_line_overwrite = false; 564 if (command_line.HasSwitch(switches::kCrashOnHangThreads)) { 565 crash_on_hang_thread_names = 566 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); 567 has_command_line_overwrite = true; 568 } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) { 569 // Default to crashing the browser if UI or IO or FILE threads are not 570 // responsive except in stable channel. 571 crash_on_hang_thread_names = base::StringPrintf( 572 "UI:%d:%d,IO:%d:%d,FILE:%d:%d", 573 kLiveThreadsThreshold, crash_seconds, 574 kLiveThreadsThreshold, crash_seconds, 575 kLiveThreadsThreshold, crash_seconds * 5); 576 } 577 578 ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names, 579 kLiveThreadsThreshold, 580 crash_seconds, 581 crash_on_hang_threads); 582 583 if (channel != chrome::VersionInfo::CHANNEL_CANARY || 584 has_command_line_overwrite) { 585 return; 586 } 587 588 const char* kFieldTrialName = "ThreadWatcher"; 589 590 // Nothing else to be done if the trial has already been set (i.e., when 591 // StartWatchingAll() has been already called once). 592 if (base::FieldTrialList::TrialExists(kFieldTrialName)) 593 return; 594 595 // Set up a field trial for 100% of the users to crash if either UI or IO 596 // thread is not responsive for 30 seconds (or 15 pings). 597 scoped_refptr<base::FieldTrial> field_trial( 598 base::FieldTrialList::FactoryGetFieldTrial( 599 kFieldTrialName, 100, "default_hung_threads", 600 2014, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL)); 601 int hung_thread_group = field_trial->AppendGroup("hung_thread", 100); 602 if (field_trial->group() == hung_thread_group) { 603 for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin(); 604 crash_on_hang_threads->end() != it; 605 ++it) { 606 if (it->first == "FILE") 607 continue; 608 it->second.live_threads_threshold = INT_MAX; 609 if (it->first == "UI") { 610 // TODO(rtenneti): set unresponsive threshold to 120 seconds to catch 611 // the worst UI hangs and for fewer crashes due to ThreadWatcher. Reduce 612 // it to a more reasonable time ala IO thread. 613 it->second.unresponsive_threshold = 60; 614 } else { 615 it->second.unresponsive_threshold = 15; 616 } 617 } 618 } 619} 620 621// static 622void ThreadWatcherList::ParseCommandLineCrashOnHangThreads( 623 const std::string& crash_on_hang_thread_names, 624 uint32 default_live_threads_threshold, 625 uint32 default_crash_seconds, 626 CrashOnHangThreadMap* crash_on_hang_threads) { 627 base::StringTokenizer tokens(crash_on_hang_thread_names, ","); 628 std::vector<std::string> values; 629 while (tokens.GetNext()) { 630 const std::string& token = tokens.token(); 631 base::SplitString(token, ':', &values); 632 std::string thread_name = values[0]; 633 634 uint32 live_threads_threshold = default_live_threads_threshold; 635 uint32 crash_seconds = default_crash_seconds; 636 if (values.size() >= 2 && 637 (!base::StringToUint(values[1], &live_threads_threshold))) { 638 continue; 639 } 640 if (values.size() >= 3 && 641 (!base::StringToUint(values[2], &crash_seconds))) { 642 continue; 643 } 644 uint32 unresponsive_threshold = static_cast<uint32>( 645 ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); 646 647 CrashDataThresholds crash_data(live_threads_threshold, 648 unresponsive_threshold); 649 // Use the last specifier. 650 (*crash_on_hang_threads)[thread_name] = crash_data; 651 } 652} 653 654// static 655void ThreadWatcherList::InitializeAndStartWatching( 656 uint32 unresponsive_threshold, 657 const CrashOnHangThreadMap& crash_on_hang_threads) { 658 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 659 660 // Disarm the startup timebomb, even if stop has been called. 661 BrowserThread::PostTask( 662 BrowserThread::UI, 663 FROM_HERE, 664 base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb)); 665 666 // This method is deferred in relationship to its StopWatchingAll() 667 // counterpart. If a previous initialization has already happened, or if 668 // stop has been called, there's nothing left to do here. 669 if (g_thread_watcher_list_ || g_stopped_) 670 return; 671 672 ThreadWatcherList* thread_watcher_list = new ThreadWatcherList(); 673 CHECK(thread_watcher_list); 674 675 const base::TimeDelta kSleepTime = 676 base::TimeDelta::FromSeconds(kSleepSeconds); 677 const base::TimeDelta kUnresponsiveTime = 678 base::TimeDelta::FromSeconds(kUnresponsiveSeconds); 679 680 StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime, 681 unresponsive_threshold, crash_on_hang_threads); 682 StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime, 683 unresponsive_threshold, crash_on_hang_threads); 684 StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime, 685 unresponsive_threshold, crash_on_hang_threads); 686 StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime, 687 unresponsive_threshold, crash_on_hang_threads); 688 StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime, 689 unresponsive_threshold, crash_on_hang_threads); 690} 691 692// static 693void ThreadWatcherList::StartWatching( 694 const BrowserThread::ID& thread_id, 695 const std::string& thread_name, 696 const base::TimeDelta& sleep_time, 697 const base::TimeDelta& unresponsive_time, 698 uint32 unresponsive_threshold, 699 const CrashOnHangThreadMap& crash_on_hang_threads) { 700 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 701 702 CrashOnHangThreadMap::const_iterator it = 703 crash_on_hang_threads.find(thread_name); 704 bool crash_on_hang = false; 705 uint32 live_threads_threshold = 0; 706 if (it != crash_on_hang_threads.end()) { 707 crash_on_hang = true; 708 live_threads_threshold = it->second.live_threads_threshold; 709 unresponsive_threshold = it->second.unresponsive_threshold; 710 } 711 712 ThreadWatcher::StartWatching( 713 ThreadWatcher::WatchingParams(thread_id, 714 thread_name, 715 sleep_time, 716 unresponsive_time, 717 unresponsive_threshold, 718 crash_on_hang, 719 live_threads_threshold)); 720} 721 722// static 723void ThreadWatcherList::DeleteAll() { 724 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 725 WatchDogThread::PostTask( 726 FROM_HERE, 727 base::Bind(&ThreadWatcherList::DeleteAll)); 728 return; 729 } 730 731 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 732 733 SetStopped(true); 734 735 if (!g_thread_watcher_list_) 736 return; 737 738 // Delete all thread watcher objects. 739 while (!g_thread_watcher_list_->registered_.empty()) { 740 RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin(); 741 delete it->second; 742 g_thread_watcher_list_->registered_.erase(it); 743 } 744 745 delete g_thread_watcher_list_; 746} 747 748// static 749ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) { 750 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 751 if (!g_thread_watcher_list_) 752 return NULL; 753 RegistrationList::iterator it = 754 g_thread_watcher_list_->registered_.find(thread_id); 755 if (g_thread_watcher_list_->registered_.end() == it) 756 return NULL; 757 return it->second; 758} 759 760// static 761void ThreadWatcherList::SetStopped(bool stopped) { 762 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 763 g_stopped_ = stopped; 764} 765 766// ThreadWatcherObserver methods and members. 767// 768// static 769ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL; 770 771ThreadWatcherObserver::ThreadWatcherObserver( 772 const base::TimeDelta& wakeup_interval) 773 : last_wakeup_time_(base::TimeTicks::Now()), 774 wakeup_interval_(wakeup_interval) { 775 CHECK(!g_thread_watcher_observer_); 776 g_thread_watcher_observer_ = this; 777} 778 779ThreadWatcherObserver::~ThreadWatcherObserver() { 780 DCHECK(this == g_thread_watcher_observer_); 781 g_thread_watcher_observer_ = NULL; 782} 783 784// static 785void ThreadWatcherObserver::SetupNotifications( 786 const base::TimeDelta& wakeup_interval) { 787 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 788 ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval); 789 observer->registrar_.Add( 790 observer, 791 chrome::NOTIFICATION_BROWSER_OPENED, 792 content::NotificationService::AllBrowserContextsAndSources()); 793 observer->registrar_.Add(observer, 794 chrome::NOTIFICATION_BROWSER_CLOSED, 795 content::NotificationService::AllSources()); 796 observer->registrar_.Add(observer, 797 chrome::NOTIFICATION_TAB_PARENTED, 798 content::NotificationService::AllSources()); 799 observer->registrar_.Add(observer, 800 chrome::NOTIFICATION_TAB_CLOSING, 801 content::NotificationService::AllSources()); 802 observer->registrar_.Add(observer, 803 content::NOTIFICATION_LOAD_START, 804 content::NotificationService::AllSources()); 805 observer->registrar_.Add(observer, 806 content::NOTIFICATION_LOAD_STOP, 807 content::NotificationService::AllSources()); 808 observer->registrar_.Add(observer, 809 content::NOTIFICATION_RENDERER_PROCESS_CLOSED, 810 content::NotificationService::AllSources()); 811 observer->registrar_.Add(observer, 812 content::NOTIFICATION_RENDER_WIDGET_HOST_HANG, 813 content::NotificationService::AllSources()); 814 observer->registrar_.Add(observer, 815 chrome::NOTIFICATION_OMNIBOX_OPENED_URL, 816 content::NotificationService::AllSources()); 817} 818 819// static 820void ThreadWatcherObserver::RemoveNotifications() { 821 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 822 if (!g_thread_watcher_observer_) 823 return; 824 g_thread_watcher_observer_->registrar_.RemoveAll(); 825 delete g_thread_watcher_observer_; 826} 827 828void ThreadWatcherObserver::Observe( 829 int type, 830 const content::NotificationSource& source, 831 const content::NotificationDetails& details) { 832 // There is some user activity, see if thread watchers are to be awakened. 833 base::TimeTicks now = base::TimeTicks::Now(); 834 if ((now - last_wakeup_time_) < wakeup_interval_) 835 return; 836 last_wakeup_time_ = now; 837 WatchDogThread::PostTask( 838 FROM_HERE, 839 base::Bind(&ThreadWatcherList::WakeUpAll)); 840} 841 842// WatchDogThread methods and members. 843 844// This lock protects g_watchdog_thread. 845static base::LazyInstance<base::Lock>::Leaky 846 g_watchdog_lock = LAZY_INSTANCE_INITIALIZER; 847 848// The singleton of this class. 849static WatchDogThread* g_watchdog_thread = NULL; 850 851WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") { 852} 853 854WatchDogThread::~WatchDogThread() { 855 Stop(); 856} 857 858// static 859bool WatchDogThread::CurrentlyOnWatchDogThread() { 860 base::AutoLock lock(g_watchdog_lock.Get()); 861 return g_watchdog_thread && 862 g_watchdog_thread->message_loop() == base::MessageLoop::current(); 863} 864 865// static 866bool WatchDogThread::PostTask(const tracked_objects::Location& from_here, 867 const base::Closure& task) { 868 return PostTaskHelper(from_here, task, base::TimeDelta()); 869} 870 871// static 872bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here, 873 const base::Closure& task, 874 base::TimeDelta delay) { 875 return PostTaskHelper(from_here, task, delay); 876} 877 878// static 879bool WatchDogThread::PostTaskHelper( 880 const tracked_objects::Location& from_here, 881 const base::Closure& task, 882 base::TimeDelta delay) { 883 { 884 base::AutoLock lock(g_watchdog_lock.Get()); 885 886 base::MessageLoop* message_loop = g_watchdog_thread ? 887 g_watchdog_thread->message_loop() : NULL; 888 if (message_loop) { 889 message_loop->PostDelayedTask(from_here, task, delay); 890 return true; 891 } 892 } 893 894 return false; 895} 896 897void WatchDogThread::Init() { 898 // This thread shouldn't be allowed to perform any blocking disk I/O. 899 base::ThreadRestrictions::SetIOAllowed(false); 900 901 base::AutoLock lock(g_watchdog_lock.Get()); 902 CHECK(!g_watchdog_thread); 903 g_watchdog_thread = this; 904} 905 906void WatchDogThread::CleanUp() { 907 base::AutoLock lock(g_watchdog_lock.Get()); 908 g_watchdog_thread = NULL; 909} 910 911namespace { 912 913// StartupWatchDogThread methods and members. 914// 915// Class for detecting hangs during startup. 916class StartupWatchDogThread : public base::Watchdog { 917 public: 918 // Constructor specifies how long the StartupWatchDogThread will wait before 919 // alarming. 920 explicit StartupWatchDogThread(const base::TimeDelta& duration) 921 : base::Watchdog(duration, "Startup watchdog thread", true) { 922#if defined(OS_ANDROID) 923 // TODO(rtenneti): Delete this code, after getting data. 924 start_time_clock_= base::Time::Now(); 925 start_time_monotonic_ = base::TimeTicks::Now(); 926 start_time_thread_now_ = base::TimeTicks::IsThreadNowSupported() 927 ? base::TimeTicks::ThreadNow() : base::TimeTicks::Now(); 928#endif // OS_ANDROID 929 } 930 931 // Alarm is called if the time expires after an Arm() without someone calling 932 // Disarm(). When Alarm goes off, in release mode we get the crash dump 933 // without crashing and in debug mode we break into the debugger. 934 virtual void Alarm() OVERRIDE { 935#if !defined(NDEBUG) 936 StartupCrash(); 937 return; 938#elif !defined(OS_ANDROID) 939 WatchDogThread::PostTask(FROM_HERE, base::Bind(&StartupCrash)); 940 return; 941#else // Android release: gather stats to figure out when to crash. 942 // TODO(rtenneti): Delete this code, after getting data. 943 UMA_HISTOGRAM_TIMES("StartupTimeBomb.Alarm.TimeDuration", 944 base::Time::Now() - start_time_clock_); 945 UMA_HISTOGRAM_TIMES("StartupTimeBomb.Alarm.TimeTicksDuration", 946 base::TimeTicks::Now() - start_time_monotonic_); 947 if (base::TimeTicks::IsThreadNowSupported()) { 948 UMA_HISTOGRAM_TIMES( 949 "StartupTimeBomb.Alarm.ThreadNowDuration", 950 base::TimeTicks::ThreadNow() - start_time_thread_now_); 951 } 952 return; 953#endif // OS_ANDROID 954 } 955 956 private: 957#if defined(OS_ANDROID) 958 // TODO(rtenneti): Delete this code, after getting data. 959 base::Time start_time_clock_; 960 base::TimeTicks start_time_monotonic_; 961 base::TimeTicks start_time_thread_now_; 962#endif // OS_ANDROID 963 964 DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread); 965}; 966 967// ShutdownWatchDogThread methods and members. 968// 969// Class for detecting hangs during shutdown. 970class ShutdownWatchDogThread : public base::Watchdog { 971 public: 972 // Constructor specifies how long the ShutdownWatchDogThread will wait before 973 // alarming. 974 explicit ShutdownWatchDogThread(const base::TimeDelta& duration) 975 : base::Watchdog(duration, "Shutdown watchdog thread", true) { 976 } 977 978 // Alarm is called if the time expires after an Arm() without someone calling 979 // Disarm(). We crash the browser if this method is called. 980 virtual void Alarm() OVERRIDE { 981 ShutdownCrash(); 982 } 983 984 private: 985 DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread); 986}; 987} // namespace 988 989// StartupTimeBomb methods and members. 990// 991// static 992StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL; 993 994StartupTimeBomb::StartupTimeBomb() 995 : startup_watchdog_(NULL), 996 thread_id_(base::PlatformThread::CurrentId()) { 997 CHECK(!g_startup_timebomb_); 998 g_startup_timebomb_ = this; 999} 1000 1001StartupTimeBomb::~StartupTimeBomb() { 1002 DCHECK(this == g_startup_timebomb_); 1003 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1004 if (startup_watchdog_) 1005 Disarm(); 1006 g_startup_timebomb_ = NULL; 1007} 1008 1009void StartupTimeBomb::Arm(const base::TimeDelta& duration) { 1010 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1011 DCHECK(!startup_watchdog_); 1012 startup_watchdog_ = new StartupWatchDogThread(duration); 1013 startup_watchdog_->Arm(); 1014 return; 1015} 1016 1017void StartupTimeBomb::Disarm() { 1018 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1019 if (startup_watchdog_) { 1020 startup_watchdog_->Disarm(); 1021 startup_watchdog_->Cleanup(); 1022 DeleteStartupWatchdog(); 1023 } 1024} 1025 1026void StartupTimeBomb::DeleteStartupWatchdog() { 1027 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1028 if (startup_watchdog_->IsJoinable()) { 1029 // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns 1030 // very fast. 1031 base::ThreadRestrictions::SetIOAllowed(true); 1032 delete startup_watchdog_; 1033 startup_watchdog_ = NULL; 1034 return; 1035 } 1036 base::MessageLoop::current()->PostDelayedTask( 1037 FROM_HERE, 1038 base::Bind(&StartupTimeBomb::DeleteStartupWatchdog, 1039 base::Unretained(this)), 1040 base::TimeDelta::FromSeconds(10)); 1041} 1042 1043// static 1044void StartupTimeBomb::DisarmStartupTimeBomb() { 1045 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 1046 if (g_startup_timebomb_) 1047 g_startup_timebomb_->Disarm(); 1048} 1049 1050// ShutdownWatcherHelper methods and members. 1051// 1052// ShutdownWatcherHelper is a wrapper class for detecting hangs during 1053// shutdown. 1054ShutdownWatcherHelper::ShutdownWatcherHelper() 1055 : shutdown_watchdog_(NULL), 1056 thread_id_(base::PlatformThread::CurrentId()) { 1057} 1058 1059ShutdownWatcherHelper::~ShutdownWatcherHelper() { 1060 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1061 if (shutdown_watchdog_) { 1062 shutdown_watchdog_->Disarm(); 1063 delete shutdown_watchdog_; 1064 shutdown_watchdog_ = NULL; 1065 } 1066} 1067 1068void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) { 1069 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1070 DCHECK(!shutdown_watchdog_); 1071 base::TimeDelta actual_duration = duration; 1072 1073 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 1074 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 1075 actual_duration *= 20; 1076 } else if (channel == chrome::VersionInfo::CHANNEL_BETA || 1077 channel == chrome::VersionInfo::CHANNEL_DEV) { 1078 actual_duration *= 10; 1079 } 1080 1081#if defined(OS_WIN) 1082 // On Windows XP, give twice the time for shutdown. 1083 if (base::win::GetVersion() <= base::win::VERSION_XP) 1084 actual_duration *= 2; 1085#endif 1086 1087 shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration); 1088 shutdown_watchdog_->Arm(); 1089} 1090