1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This file defines a WatchDog thread that monitors the responsiveness of other
6// browser threads like UI, IO, DB, FILE and CACHED threads. It also defines
7// ThreadWatcher class which performs health check on threads that would like to
8// be watched. This file also defines ThreadWatcherList class that has list of
9// all active ThreadWatcher objects.
10//
11// ThreadWatcher class sends ping message to the watched thread and the watched
12// thread responds back with a pong message. It uploads response time
13// (difference between ping and pong times) as a histogram.
14//
15// TODO(raman): ThreadWatcher can detect hung threads. If a hung thread is
16// detected, we should probably just crash, and allow the crash system to gather
17// then stack trace.
18//
19// Example Usage:
20//
21//   The following is an example for watching responsiveness of watched (IO)
22//   thread. |sleep_time| specifies how often ping messages have to be sent to
23//   watched (IO) thread. |unresponsive_time| is the wait time after ping
24//   message is sent, to check if we have received pong message or not.
25//   |unresponsive_threshold| specifies the number of unanswered ping messages
26//   after which watched (IO) thread is considered as not responsive.
27//   |crash_on_hang| specifies if we want to crash the browser when the watched
28//   (IO) thread has become sufficiently unresponsive, while other threads are
29//   sufficiently responsive. |live_threads_threshold| specifies the number of
30//   browser threads that are to be responsive when we want to crash the browser
31//   because of hung watched (IO) thread.
32//
33//   base::TimeDelta sleep_time = base::TimeDelta::FromSeconds(5);
34//   base::TimeDelta unresponsive_time = base::TimeDelta::FromSeconds(10);
35//   uint32 unresponsive_threshold = ThreadWatcherList::kUnresponsiveCount;
36//   bool crash_on_hang = false;
37//   uint32 live_threads_threshold = ThreadWatcherList::kLiveThreadsThreshold;
38//   ThreadWatcher::StartWatching(
39//       BrowserThread::IO, "IO", sleep_time, unresponsive_time,
40//       unresponsive_threshold, crash_on_hang, live_threads_threshold);
41
42#ifndef CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
43#define CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
44
45#include <map>
46#include <string>
47#include <vector>
48
49#include "base/basictypes.h"
50#include "base/command_line.h"
51#include "base/gtest_prod_util.h"
52#include "base/memory/ref_counted.h"
53#include "base/memory/weak_ptr.h"
54#include "base/message_loop/message_loop.h"
55#include "base/metrics/histogram.h"
56#include "base/synchronization/lock.h"
57#include "base/threading/platform_thread.h"
58#include "base/threading/thread.h"
59#include "base/threading/watchdog.h"
60#include "base/time/time.h"
61#include "content/public/browser/browser_thread.h"
62#include "content/public/browser/notification_observer.h"
63#include "content/public/browser/notification_registrar.h"
64
65class CustomThreadWatcher;
66class StartupTimeBomb;
67class ThreadWatcherList;
68class ThreadWatcherObserver;
69
70// This class performs health check on threads that would like to be watched.
71class ThreadWatcher {
72 public:
73  // base::Bind supports methods with up to 6 parameters. WatchingParams is used
74  // as a workaround that limitation for invoking ThreadWatcher::StartWatching.
75  struct WatchingParams {
76    const content::BrowserThread::ID& thread_id;
77    const std::string& thread_name;
78    const base::TimeDelta& sleep_time;
79    const base::TimeDelta& unresponsive_time;
80    uint32 unresponsive_threshold;
81    bool crash_on_hang;
82    uint32 live_threads_threshold;
83
84    WatchingParams(const content::BrowserThread::ID& thread_id_in,
85                   const std::string& thread_name_in,
86                   const base::TimeDelta& sleep_time_in,
87                   const base::TimeDelta& unresponsive_time_in,
88                   uint32 unresponsive_threshold_in,
89                   bool crash_on_hang_in,
90                   uint32 live_threads_threshold_in)
91        : thread_id(thread_id_in),
92          thread_name(thread_name_in),
93          sleep_time(sleep_time_in),
94          unresponsive_time(unresponsive_time_in),
95          unresponsive_threshold(unresponsive_threshold_in),
96          crash_on_hang(crash_on_hang_in),
97          live_threads_threshold(live_threads_threshold_in) {
98    }
99  };
100
101  // This method starts performing health check on the given |thread_id|. It
102  // will create ThreadWatcher object for the given |thread_id|, |thread_name|.
103  // |sleep_time| is the wait time between ping messages. |unresponsive_time| is
104  // the wait time after ping message is sent, to check if we have received pong
105  // message or not. |unresponsive_threshold| is used to determine if the thread
106  // is responsive or not. The watched thread is considered unresponsive if it
107  // hasn't responded with a pong message for |unresponsive_threshold| number of
108  // ping messages. |crash_on_hang| specifies if browser should be crashed when
109  // the watched thread is unresponsive. |live_threads_threshold| specifies the
110  // number of browser threads that are to be responsive when we want to crash
111  // the browser and watched thread has become sufficiently unresponsive. It
112  // will register that ThreadWatcher object and activate the thread watching of
113  // the given thread_id.
114  static void StartWatching(const WatchingParams& params);
115
116  // Return the |thread_id_| of the thread being watched.
117  content::BrowserThread::ID thread_id() const { return thread_id_; }
118
119  // Return the name of the thread being watched.
120  std::string thread_name() const { return thread_name_; }
121
122  // Return the sleep time between ping messages to be sent to the thread.
123  base::TimeDelta sleep_time() const { return sleep_time_; }
124
125  // Return the the wait time to check the responsiveness of the thread.
126  base::TimeDelta unresponsive_time() const { return unresponsive_time_; }
127
128  // Returns true if we are montioring the thread.
129  bool active() const { return active_; }
130
131  // Returns |ping_time_| (used by unit tests).
132  base::TimeTicks ping_time() const { return ping_time_; }
133
134  // Returns |ping_sequence_number_| (used by unit tests).
135  uint64 ping_sequence_number() const { return ping_sequence_number_; }
136
137 protected:
138  // Construct a ThreadWatcher for the given |thread_id|. |sleep_time| is the
139  // wait time between ping messages. |unresponsive_time| is the wait time after
140  // ping message is sent, to check if we have received pong message or not.
141  explicit ThreadWatcher(const WatchingParams& params);
142
143  virtual ~ThreadWatcher();
144
145  // This method activates the thread watching which starts ping/pong messaging.
146  virtual void ActivateThreadWatching();
147
148  // This method de-activates the thread watching and revokes all tasks.
149  virtual void DeActivateThreadWatching();
150
151  // This will ensure that the watching is actively taking place, and awaken
152  // (i.e., post a PostPingMessage()) if the watcher has stopped pinging due to
153  // lack of user activity. It will also reset |ping_count_| to
154  // |unresponsive_threshold_|.
155  virtual void WakeUp();
156
157  // This method records when ping message was sent and it will Post a task
158  // (OnPingMessage()) to the watched thread that does nothing but respond with
159  // OnPongMessage(). It also posts a task (OnCheckResponsiveness()) to check
160  // responsiveness of monitored thread that would be called after waiting
161  // |unresponsive_time_|.
162  // This method is accessible on WatchDogThread.
163  virtual void PostPingMessage();
164
165  // This method handles a Pong Message from watched thread. It will track the
166  // response time (pong time minus ping time) via histograms. It posts a
167  // PostPingMessage() task that would be called after waiting |sleep_time_|. It
168  // increments |ping_sequence_number_| by 1.
169  // This method is accessible on WatchDogThread.
170  virtual void OnPongMessage(uint64 ping_sequence_number);
171
172  // This method will determine if the watched thread is responsive or not. If
173  // the latest |ping_sequence_number_| is not same as the
174  // |ping_sequence_number| that is passed in, then we can assume that watched
175  // thread has responded with a pong message.
176  // This method is accessible on WatchDogThread.
177  virtual void OnCheckResponsiveness(uint64 ping_sequence_number);
178
179  // Set by OnCheckResponsiveness when it determines if the watched thread is
180  // responsive or not.
181  bool responsive_;
182
183 private:
184  friend class ThreadWatcherList;
185  friend class CustomThreadWatcher;
186
187  // Allow tests to access our innards for testing purposes.
188  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration);
189  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadResponding);
190  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNotResponding);
191  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsResponding);
192  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsNotResponding);
193
194  // Post constructor initialization.
195  void Initialize();
196
197  // Watched thread does nothing except post callback_task to the WATCHDOG
198  // Thread. This method is called on watched thread.
199  static void OnPingMessage(const content::BrowserThread::ID& thread_id,
200                            const base::Closure& callback_task);
201
202  // This method resets |unresponsive_count_| to zero because watched thread is
203  // responding to the ping message with a pong message.
204  void ResetHangCounters();
205
206  // This method records watched thread is not responding to the ping message.
207  // It increments |unresponsive_count_| by 1.
208  void GotNoResponse();
209
210  // This method returns true if the watched thread has not responded with a
211  // pong message for |unresponsive_threshold_| number of ping messages.
212  bool IsVeryUnresponsive();
213
214  // The |thread_id_| of the thread being watched. Only one instance can exist
215  // for the given |thread_id_| of the thread being watched.
216  const content::BrowserThread::ID thread_id_;
217
218  // The name of the thread being watched.
219  const std::string thread_name_;
220
221  // Used to post messages to watched thread.
222  scoped_refptr<base::MessageLoopProxy> watched_loop_;
223
224  // It is the sleep time between the receipt of a pong message back, and the
225  // sending of another ping message.
226  const base::TimeDelta sleep_time_;
227
228  // It is the duration from sending a ping message, until we check status to be
229  // sure a pong message has been returned.
230  const base::TimeDelta unresponsive_time_;
231
232  // This is the last time when ping message was sent.
233  base::TimeTicks ping_time_;
234
235  // This is the last time when we got pong message.
236  base::TimeTicks pong_time_;
237
238  // This is the sequence number of the next ping for which there is no pong. If
239  // the instance is sleeping, then it will be the sequence number for the next
240  // ping.
241  uint64 ping_sequence_number_;
242
243  // This is set to true if thread watcher is watching.
244  bool active_;
245
246  // The counter tracks least number of ping messages that will be sent to
247  // watched thread before the ping-pong mechanism will go into an extended
248  // sleep. If this value is zero, then the mechanism is in an extended sleep,
249  // and awaiting some observed user action before continuing.
250  int ping_count_;
251
252  // Histogram that keeps track of response times for the watched thread.
253  base::HistogramBase* response_time_histogram_;
254
255  // Histogram that keeps track of unresponsive time since the last pong message
256  // when we got no response (GotNoResponse()) from the watched thread.
257  base::HistogramBase* unresponsive_time_histogram_;
258
259  // Histogram that keeps track of how many threads are responding when we got
260  // no response (GotNoResponse()) from the watched thread.
261  base::HistogramBase* responsive_count_histogram_;
262
263  // Histogram that keeps track of how many threads are not responding when we
264  // got no response (GotNoResponse()) from the watched thread. Count includes
265  // the thread that got no response.
266  base::HistogramBase* unresponsive_count_histogram_;
267
268  // This counter tracks the unresponsiveness of watched thread. If this value
269  // is zero then watched thread has responded with a pong message. This is
270  // incremented by 1 when we got no response (GotNoResponse()) from the watched
271  // thread.
272  uint32 unresponsive_count_;
273
274  // This is set to true when we would have crashed the browser because the
275  // watched thread hasn't responded at least |unresponsive_threshold_| times.
276  // It is reset to false when watched thread responds with a pong message.
277  bool hung_processing_complete_;
278
279  // This is used to determine if the watched thread is responsive or not. If
280  // watched thread's |unresponsive_count_| is greater than or equal to
281  // |unresponsive_threshold_| then we would consider it as unresponsive.
282  uint32 unresponsive_threshold_;
283
284  // This is set to true if we want to crash the browser when the watched thread
285  // has become sufficiently unresponsive, while other threads are sufficiently
286  // responsive.
287  bool crash_on_hang_;
288
289  // This specifies the number of browser threads that are to be responsive when
290  // we want to crash the browser because watched thread has become sufficiently
291  // unresponsive.
292  uint32 live_threads_threshold_;
293
294  // We use this factory to create callback tasks for ThreadWatcher object. We
295  // use this during ping-pong messaging between WatchDog thread and watched
296  // thread.
297  base::WeakPtrFactory<ThreadWatcher> weak_ptr_factory_;
298
299  DISALLOW_COPY_AND_ASSIGN(ThreadWatcher);
300};
301
302// Class with a list of all active thread watchers.  A thread watcher is active
303// if it has been registered, which includes determing the histogram name. This
304// class provides utility functions to start and stop watching all browser
305// threads. Only one instance of this class exists.
306class ThreadWatcherList {
307 public:
308  // A map from BrowserThread to the actual instances.
309  typedef std::map<content::BrowserThread::ID, ThreadWatcher*> RegistrationList;
310
311  // A map from thread names (UI, IO, etc) to |CrashDataThresholds|.
312  // |live_threads_threshold| specifies the maximum number of browser threads
313  // that have to be responsive when we want to crash the browser because of
314  // hung watched thread. This threshold allows us to either look for a system
315  // deadlock, or look for a solo hung thread. A small live_threads_threshold
316  // looks for a broad deadlock (few browser threads left running), and a large
317  // threshold looks for a single hung thread (this in only appropriate for a
318  // thread that *should* never have much jank, such as the IO).
319  //
320  // |unresponsive_threshold| specifies the number of unanswered ping messages
321  // after which watched (UI, IO, etc) thread is considered as not responsive.
322  // We translate "time" (given in seconds) into a number of pings. As a result,
323  // we only declare a thread unresponsive when a lot of "time" has passed (many
324  // pings), and yet our pinging thread has continued to process messages (so we
325  // know the entire PC is not hung). Set this number higher to crash less
326  // often, and lower to crash more often.
327  //
328  // The map lists all threads (by name) that can induce a crash by hanging. It
329  // is populated from the command line, or given a default list.  See
330  // InitializeAndStartWatching() for the separate list of all threads that are
331  // watched, as they provide the system context of how hung *other* threads
332  // are.
333  //
334  // ThreadWatcher monitors five browser threads (i.e., UI, IO, DB, FILE,
335  // and CACHE). Out of the 5 threads, any subset may be watched, to potentially
336  // cause a crash. The following example's command line causes exactly 3
337  // threads to be watched.
338  //
339  // The example command line argument consists of "UI:3:18,IO:3:18,FILE:5:90".
340  // In that string, the first parameter specifies the thread_id: UI, IO or
341  // FILE. The second parameter specifies |live_threads_threshold|. For UI and
342  // IO threads, we would crash if the number of threads responding is less than
343  // or equal to 3. The third parameter specifies the unresponsive threshold
344  // seconds. This number is used to calculate |unresponsive_threshold|. In this
345  // example for UI and IO threads, we would crash if those threads don't
346  // respond for 18 seconds (or 9 unanswered ping messages) and for FILE thread,
347  // crash_seconds is set to 90 seconds (or 45 unanswered ping messages).
348  //
349  // The following examples explain how the data in |CrashDataThresholds|
350  // controls the crashes.
351  //
352  // Example 1: If the |live_threads_threshold| value for "IO" was 3 and
353  // unresponsive threshold seconds is 18 (or |unresponsive_threshold| is 9),
354  // then we would crash if the IO thread was hung (9 unanswered ping messages)
355  // and if at least one thread is responding and total responding threads is
356  // less than or equal to 3 (this thread, plus at least one other thread is
357  // unresponsive). We would not crash if none of the threads are responding, as
358  // we'd assume such large hang counts mean that the system is generally
359  // unresponsive.
360  // Example 2: If the |live_threads_threshold| value for "UI" was any number
361  // higher than 6 and unresponsive threshold seconds is 18 (or
362  // |unresponsive_threshold| is 9), then we would always crash if the UI thread
363  // was hung (9 unanswered ping messages), no matter what the other threads are
364  // doing.
365  // Example 3: If the |live_threads_threshold| value of "FILE" was 5 and
366  // unresponsive threshold seconds is 90 (or |unresponsive_threshold| is 45),
367  // then we would only crash if the FILE thread was the ONLY hung thread
368  // (because we watch 6 threads). If there was another unresponsive thread, we
369  // would not consider this a problem worth crashing for. FILE thread would be
370  // considered as hung if it didn't respond for 45 ping messages.
371  struct CrashDataThresholds {
372    CrashDataThresholds(uint32 live_threads_threshold,
373                        uint32 unresponsive_threshold);
374    CrashDataThresholds();
375
376    uint32 live_threads_threshold;
377    uint32 unresponsive_threshold;
378  };
379  typedef std::map<std::string, CrashDataThresholds> CrashOnHangThreadMap;
380
381  // This method posts a task on WatchDogThread to start watching all browser
382  // threads.
383  // This method is accessible on UI thread.
384  static void StartWatchingAll(const base::CommandLine& command_line);
385
386  // This method posts a task on WatchDogThread to RevokeAll tasks and to
387  // deactive thread watching of other threads and tell NotificationService to
388  // stop calling Observe.
389  // This method is accessible on UI thread.
390  static void StopWatchingAll();
391
392  // Register() stores a pointer to the given ThreadWatcher in a global map.
393  static void Register(ThreadWatcher* watcher);
394
395  // This method returns true if the ThreadWatcher object is registerd.
396  static bool IsRegistered(const content::BrowserThread::ID thread_id);
397
398  // This method returns number of responsive and unresponsive watched threads.
399  static void GetStatusOfThreads(uint32* responding_thread_count,
400                                 uint32* unresponding_thread_count);
401
402  // This will ensure that the watching is actively taking place, and awaken
403  // all thread watchers that are registered.
404  static void WakeUpAll();
405
406 private:
407  // Allow tests to access our innards for testing purposes.
408  friend class CustomThreadWatcher;
409  friend class ThreadWatcherListTest;
410  friend class ThreadWatcherTest;
411  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherAndroidTest,
412                           ApplicationStatusNotification);
413  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherListTest, Restart);
414  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesOnlyArgs);
415  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesAndLiveThresholdArgs);
416  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, CrashOnHangThreadsAllArgs);
417
418  // This singleton holds the global list of registered ThreadWatchers.
419  ThreadWatcherList();
420
421  // Destructor deletes all registered ThreadWatcher instances.
422  virtual ~ThreadWatcherList();
423
424  // Parses the command line to get |crash_on_hang_threads| map from
425  // switches::kCrashOnHangThreads. |crash_on_hang_threads| is a map of
426  // |crash_on_hang| thread's names to |CrashDataThresholds|.
427  static void ParseCommandLine(
428      const base::CommandLine& command_line,
429      uint32* unresponsive_threshold,
430      CrashOnHangThreadMap* crash_on_hang_threads);
431
432  // Parses the argument |crash_on_hang_thread_names| and creates
433  // |crash_on_hang_threads| map of |crash_on_hang| thread's names to
434  // |CrashDataThresholds|. If |crash_on_hang_thread_names| doesn't specify
435  // |live_threads_threshold|, then it uses |default_live_threads_threshold| as
436  // the value. If |crash_on_hang_thread_names| doesn't specify |crash_seconds|,
437  // then it uses |default_crash_seconds| as the value.
438  static void ParseCommandLineCrashOnHangThreads(
439      const std::string& crash_on_hang_thread_names,
440      uint32 default_live_threads_threshold,
441      uint32 default_crash_seconds,
442      CrashOnHangThreadMap* crash_on_hang_threads);
443
444  // This constructs the |ThreadWatcherList| singleton and starts watching
445  // browser threads by calling StartWatching() on each browser thread that is
446  // watched. It disarms StartupTimeBomb.
447  static void InitializeAndStartWatching(
448      uint32 unresponsive_threshold,
449      const CrashOnHangThreadMap& crash_on_hang_threads);
450
451  // This method calls ThreadWatcher::StartWatching() to perform health check on
452  // the given |thread_id|.
453  static void StartWatching(
454      const content::BrowserThread::ID& thread_id,
455      const std::string& thread_name,
456      const base::TimeDelta& sleep_time,
457      const base::TimeDelta& unresponsive_time,
458      uint32 unresponsive_threshold,
459      const CrashOnHangThreadMap& crash_on_hang_threads);
460
461  // Delete all thread watcher objects and remove them from global map. It also
462  // deletes |g_thread_watcher_list_|.
463  static void DeleteAll();
464
465  // The Find() method can be used to test to see if a given ThreadWatcher was
466  // already registered, or to retrieve a pointer to it from the global map.
467  static ThreadWatcher* Find(const content::BrowserThread::ID& thread_id);
468
469  // Sets |g_stopped_| on the WatchDogThread. This is necessary to reflect the
470  // state between the delayed |StartWatchingAll| and the immediate
471  // |StopWatchingAll|.
472  static void SetStopped(bool stopped);
473
474  // The singleton of this class and is used to keep track of information about
475  // threads that are being watched.
476  static ThreadWatcherList* g_thread_watcher_list_;
477
478  // StartWatchingAll() is delayed in relation to StopWatchingAll(), so if
479  // a Stop comes first, prevent further initialization.
480  static bool g_stopped_;
481
482  // This is the wait time between ping messages.
483  static const int kSleepSeconds;
484
485  // This is the wait time after ping message is sent, to check if we have
486  // received pong message or not.
487  static const int kUnresponsiveSeconds;
488
489  // Default values for |unresponsive_threshold|.
490  static const int kUnresponsiveCount;
491
492  // Default values for |live_threads_threshold|.
493  static const int kLiveThreadsThreshold;
494
495  // Default value for the delay until |InitializeAndStartWatching| is called.
496  // Non-const for tests.
497  static int g_initialize_delay_seconds;
498
499  // Map of all registered watched threads, from thread_id to ThreadWatcher.
500  RegistrationList registered_;
501
502  DISALLOW_COPY_AND_ASSIGN(ThreadWatcherList);
503};
504
505// This class ensures that the thread watching is actively taking place. Only
506// one instance of this class exists.
507class ThreadWatcherObserver : public content::NotificationObserver {
508 public:
509  // Registers |g_thread_watcher_observer_| as the Notifications observer.
510  // |wakeup_interval| specifies how often to wake up thread watchers. This
511  // method is accessible on UI thread.
512  static void SetupNotifications(const base::TimeDelta& wakeup_interval);
513
514  // Removes all ints from |registrar_| and deletes
515  // |g_thread_watcher_observer_|. This method is accessible on UI thread.
516  static void RemoveNotifications();
517
518 private:
519  // Constructor of |g_thread_watcher_observer_| singleton.
520  explicit ThreadWatcherObserver(const base::TimeDelta& wakeup_interval);
521
522  // Destructor of |g_thread_watcher_observer_| singleton.
523  virtual ~ThreadWatcherObserver();
524
525  // This ensures all thread watchers are active because there is some user
526  // activity. It will wake up all thread watchers every |wakeup_interval_|
527  // seconds. This is the implementation of content::NotificationObserver. When
528  // a matching notification is posted to the notification service, this method
529  // is called.
530  virtual void Observe(int type,
531                       const content::NotificationSource& source,
532                       const content::NotificationDetails& details) OVERRIDE;
533
534  // The singleton of this class.
535  static ThreadWatcherObserver* g_thread_watcher_observer_;
536
537  // The registrar that holds ints to be observed.
538  content::NotificationRegistrar registrar_;
539
540  // This is the last time when woke all thread watchers up.
541  base::TimeTicks last_wakeup_time_;
542
543  // It is the time interval between wake up calls to thread watchers.
544  const base::TimeDelta wakeup_interval_;
545
546  DISALLOW_COPY_AND_ASSIGN(ThreadWatcherObserver);
547};
548
549// Class for WatchDogThread and in its Init method, we start watching UI, IO,
550// DB, FILE, CACHED threads.
551class WatchDogThread : public base::Thread {
552 public:
553  // Constructor.
554  WatchDogThread();
555
556  // Destroys the thread and stops the thread.
557  virtual ~WatchDogThread();
558
559  // Callable on any thread.  Returns whether you're currently on a
560  // WatchDogThread.
561  static bool CurrentlyOnWatchDogThread();
562
563  // These are the same methods in message_loop.h, but are guaranteed to either
564  // get posted to the MessageLoop if it's still alive, or be deleted otherwise.
565  // They return true iff the watchdog thread existed and the task was posted.
566  // Note that even if the task is posted, there's no guarantee that it will
567  // run, since the target thread may already have a Quit message in its queue.
568  static bool PostTask(const tracked_objects::Location& from_here,
569                       const base::Closure& task);
570  static bool PostDelayedTask(const tracked_objects::Location& from_here,
571                              const base::Closure& task,
572                              base::TimeDelta delay);
573
574 protected:
575  virtual void Init() OVERRIDE;
576  virtual void CleanUp() OVERRIDE;
577
578 private:
579  static bool PostTaskHelper(
580      const tracked_objects::Location& from_here,
581      const base::Closure& task,
582      base::TimeDelta delay);
583
584  DISALLOW_COPY_AND_ASSIGN(WatchDogThread);
585};
586
587// This is a wrapper class for getting the crash dumps of the hangs during
588// startup.
589class StartupTimeBomb {
590 public:
591  // This singleton is instantiated when the browser process is launched.
592  StartupTimeBomb();
593
594  // Destructor disarm's startup_watchdog_ (if it is arm'ed) so that alarm
595  // doesn't go off.
596  ~StartupTimeBomb();
597
598  // Constructs |startup_watchdog_| which spawns a thread and starts timer.
599  // |duration| specifies how long |startup_watchdog_| will wait before it
600  // calls alarm.
601  void Arm(const base::TimeDelta& duration);
602
603  // Disarms |startup_watchdog_| thread and then deletes it which stops the
604  // Watchdog thread.
605  void Disarm();
606
607  // Disarms |g_startup_timebomb_|.
608  static void DisarmStartupTimeBomb();
609
610 private:
611  // Deletes |startup_watchdog_| if it is joinable. If |startup_watchdog_| is
612  // not joinable, then it will post a delayed task to try again.
613  void DeleteStartupWatchdog();
614
615  // The singleton of this class.
616  static StartupTimeBomb* g_startup_timebomb_;
617
618  // Watches for hangs during startup until it is disarm'ed.
619  base::Watchdog* startup_watchdog_;
620
621  // The |thread_id_| on which this object is constructed.
622  const base::PlatformThreadId thread_id_;
623
624  DISALLOW_COPY_AND_ASSIGN(StartupTimeBomb);
625};
626
627// This is a wrapper class for detecting hangs during shutdown.
628class ShutdownWatcherHelper {
629 public:
630  // Create an empty holder for |shutdown_watchdog_|.
631  ShutdownWatcherHelper();
632
633  // Destructor disarm's shutdown_watchdog_ so that alarm doesn't go off.
634  ~ShutdownWatcherHelper();
635
636  // Constructs ShutdownWatchDogThread which spawns a thread and starts timer.
637  // |duration| specifies how long it will wait before it calls alarm.
638  void Arm(const base::TimeDelta& duration);
639
640 private:
641  // shutdown_watchdog_ watches for hangs during shutdown.
642  base::Watchdog* shutdown_watchdog_;
643
644  // The |thread_id_| on which this object is constructed.
645  const base::PlatformThreadId thread_id_;
646
647  DISALLOW_COPY_AND_ASSIGN(ShutdownWatcherHelper);
648};
649
650#endif  // CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
651