1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This file defines a WatchDog thread that monitors the responsiveness of other
6// browser threads like UI, IO, DB, FILE and CACHED threads. It also defines
7// ThreadWatcher class which performs health check on threads that would like to
8// be watched. This file also defines ThreadWatcherList class that has list of
9// all active ThreadWatcher objects.
10//
11// ThreadWatcher class sends ping message to the watched thread and the watched
12// thread responds back with a pong message. It uploads response time
13// (difference between ping and pong times) as a histogram.
14//
15// TODO(raman): ThreadWatcher can detect hung threads. If a hung thread is
16// detected, we should probably just crash, and allow the crash system to gather
17// then stack trace.
18//
19// Example Usage:
20//
21//   The following is an example for watching responsiveness of IO thread.
22//   sleep_time specifies how often ping messages have to be sent to IO thread.
23//   unresponsive_time is the wait time after ping message is sent, to check if
24//   we have received pong message or not.
25//
26//   base::TimeDelta sleep_time = base::TimeDelta::FromSeconds(5);
27//   base::TimeDelta unresponsive_time = base::TimeDelta::FromSeconds(10);
28//   ThreadWatcher::StartWatching(BrowserThread::IO, "IO", sleep_time,
29//                                unresponsive_time);
30
31#ifndef CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
32#define CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
33
34#include <map>
35#include <string>
36#include <vector>
37
38#include "base/basictypes.h"
39#include "base/gtest_prod_util.h"
40#include "base/memory/ref_counted.h"
41#include "base/memory/scoped_ptr.h"
42#include "base/message_loop.h"
43#include "base/metrics/histogram.h"
44#include "base/synchronization/lock.h"
45#include "base/task.h"
46#include "base/threading/thread.h"
47#include "base/time.h"
48#include "content/browser/browser_thread.h"
49#include "content/common/notification_observer.h"
50#include "content/common/notification_registrar.h"
51
52class CustomThreadWatcher;
53class ThreadWatcherList;
54
55// This class performs health check on threads that would like to be watched.
56class ThreadWatcher {
57 public:
58  // This method starts performing health check on the given thread_id. It will
59  // create ThreadWatcher object for the given thread_id, thread_name,
60  // sleep_time and unresponsive_time. sleep_time_ is the wait time between ping
61  // messages. unresponsive_time_ is the wait time after ping message is sent,
62  // to check if we have received pong message or not. It will register that
63  // ThreadWatcher object and activate the thread watching of the given
64  // thread_id.
65  static void StartWatching(const BrowserThread::ID& thread_id,
66                            const std::string& thread_name,
67                            const base::TimeDelta& sleep_time,
68                            const base::TimeDelta& unresponsive_time);
69
70  // Return the thread_id of the thread being watched.
71  BrowserThread::ID thread_id() const { return thread_id_; }
72
73  // Return the name of the thread being watched.
74  std::string thread_name() const { return thread_name_; }
75
76  // Return the sleep time between ping messages to be sent to the thread.
77  base::TimeDelta sleep_time() const { return sleep_time_; }
78
79  // Return the the wait time to check the responsiveness of the thread.
80  base::TimeDelta unresponsive_time() const { return unresponsive_time_; }
81
82  // Returns true if we are montioring the thread.
83  bool active() const { return active_; }
84
85  // Returns ping_time_ (used by unit tests).
86  base::TimeTicks ping_time() const { return ping_time_; }
87
88  // Returns ping_sequence_number_ (used by unit tests).
89  uint64 ping_sequence_number() const { return ping_sequence_number_; }
90
91 protected:
92  // Construct a ThreadWatcher for the given thread_id. sleep_time_ is the
93  // wait time between ping messages. unresponsive_time_ is the wait time after
94  // ping message is sent, to check if we have received pong message or not.
95  ThreadWatcher(const BrowserThread::ID& thread_id,
96                const std::string& thread_name,
97                const base::TimeDelta& sleep_time,
98                const base::TimeDelta& unresponsive_time);
99  virtual ~ThreadWatcher();
100
101  // This method activates the thread watching which starts ping/pong messaging.
102  virtual void ActivateThreadWatching();
103
104  // This method de-activates the thread watching and revokes all tasks.
105  virtual void DeActivateThreadWatching();
106
107  // This will ensure that the watching is actively taking place, and awaken
108  // (i.e., post a PostPingMessage) if the watcher has stopped pinging due to
109  // lack of user activity. It will also reset ping_count_ to kPingCount.
110  virtual void WakeUp();
111
112  // This method records when ping message was sent and it will Post a task
113  // (OnPingMessage) to the watched thread that does nothing but respond with
114  // OnPongMessage. It also posts a task (OnCheckResponsiveness) to check
115  // responsiveness of monitored thread that would be called after waiting
116  // unresponsive_time_.
117  // This method is accessible on WatchDogThread.
118  virtual void PostPingMessage();
119
120  // This method handles a Pong Message from watched thread. It will track the
121  // response time (pong time minus ping time) via histograms. It posts a
122  // PostPingMessage task that would be called after waiting sleep_time_.  It
123  // increments ping_sequence_number_ by 1.
124  // This method is accessible on WatchDogThread.
125  virtual void OnPongMessage(uint64 ping_sequence_number);
126
127  // This method will determine if the watched thread is responsive or not. If
128  // the latest ping_sequence_number_ is not same as the ping_sequence_number
129  // that is passed in, then we can assume that watched thread has responded
130  // with a pong message.
131  // This method is accessible on WatchDogThread.
132  virtual bool OnCheckResponsiveness(uint64 ping_sequence_number);
133
134 private:
135  friend class ThreadWatcherList;
136
137  // Allow tests to access our innards for testing purposes.
138  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration);
139  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadResponding);
140  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNotResponding);
141  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsResponding);
142  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsNotResponding);
143
144  // Post constructor initialization.
145  void Initialize();
146
147  // Watched thread does nothing except post callback_task to the WATCHDOG
148  // Thread. This method is called on watched thread.
149  static void OnPingMessage(const BrowserThread::ID& thread_id,
150                            Task* callback_task);
151
152  // This is the number of ping messages to be sent when the user is idle.
153  // ping_count_ will be initialized to kPingCount whenever user becomes active.
154  static const int kPingCount;
155
156  // The thread_id of the thread being watched. Only one instance can exist for
157  // the given thread_id of the thread being watched.
158  const BrowserThread::ID thread_id_;
159
160  // The name of the thread being watched.
161  const std::string thread_name_;
162
163  // It is the sleep time between between the receipt of a pong message back,
164  // and the sending of another ping message.
165  const base::TimeDelta sleep_time_;
166
167  // It is the duration from sending a ping message, until we check status to be
168  // sure a pong message has been returned.
169  const base::TimeDelta unresponsive_time_;
170
171  // This is the last time when ping message was sent.
172  base::TimeTicks ping_time_;
173
174  // This is the sequence number of the next ping for which there is no pong. If
175  // the instance is sleeping, then it will be the sequence number for the next
176  // ping.
177  uint64 ping_sequence_number_;
178
179  // This is set to true if thread watcher is watching.
180  bool active_;
181
182  // The counter tracks least number of ping messages that will be sent to
183  // watched thread before the ping-pong mechanism will go into an extended
184  // sleep. If this value is zero, then the mechanism is in an extended sleep,
185  // and awaiting some observed user action before continuing.
186  int ping_count_;
187
188  // Histogram that keeps track of response times for the watched thread.
189  base::Histogram* histogram_;
190
191  // We use this factory to create callback tasks for ThreadWatcher object. We
192  // use this during ping-pong messaging between WatchDog thread and watched
193  // thread.
194  ScopedRunnableMethodFactory<ThreadWatcher> method_factory_;
195
196  DISALLOW_COPY_AND_ASSIGN(ThreadWatcher);
197};
198
199// Class with a list of all active thread watchers.  A thread watcher is active
200// if it has been registered, which includes determing the histogram name. This
201// class provides utility functions to start and stop watching all browser
202// threads. Only one instance of this class exists.
203class ThreadWatcherList : public NotificationObserver {
204 public:
205  // A map from BrowserThread to the actual instances.
206  typedef std::map<BrowserThread::ID, ThreadWatcher*> RegistrationList;
207
208  // This singleton holds the global list of registered ThreadWatchers.
209  ThreadWatcherList();
210  // Destructor deletes all registered ThreadWatcher instances.
211  virtual ~ThreadWatcherList();
212
213  // Register() stores a pointer to the given ThreadWatcher in a global map.
214  static void Register(ThreadWatcher* watcher);
215
216  // This method returns true if the ThreadWatcher object is registerd.
217  static bool IsRegistered(const BrowserThread::ID thread_id);
218
219  // This method posts a task on WatchDogThread to start watching all browser
220  // threads.
221  // This method is accessible on UI thread.
222  static void StartWatchingAll();
223
224  // This method posts a task on WatchDogThread to RevokeAll tasks and to
225  // deactive thread watching of other threads and tell NotificationService to
226  // stop calling Observe.
227  // This method is accessible on UI thread.
228  static void StopWatchingAll();
229
230  // RemoveAll NotificationTypes that are being observed.
231  // This method is accessible on UI thread.
232  static void RemoveNotifications();
233
234 private:
235  // Allow tests to access our innards for testing purposes.
236  FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration);
237
238  // Delete all thread watcher objects and remove them from global map.
239  // This method is accessible on WatchDogThread.
240  void DeleteAll();
241
242  // This will ensure that the watching is actively taking place. It will wakeup
243  // all thread watchers every 2 seconds. This is the implementation of
244  // NotificationObserver. When a matching notification is posted to the
245  // notification service, this method is called.
246  // This method is accessible on UI thread.
247  virtual void Observe(NotificationType type,
248                       const NotificationSource& source,
249                       const NotificationDetails& details);
250
251  // This will ensure that the watching is actively taking place, and awaken
252  // all thread watchers that are registered.
253  // This method is accessible on WatchDogThread.
254  virtual void WakeUpAll();
255
256  // The Find() method can be used to test to see if a given ThreadWatcher was
257  // already registered, or to retrieve a pointer to it from the global map.
258  static ThreadWatcher* Find(const BrowserThread::ID& thread_id);
259
260  // Helper function should be called only while holding lock_.
261  ThreadWatcher* PreLockedFind(const BrowserThread::ID& thread_id);
262
263  static ThreadWatcherList* global_;  // The singleton of this class.
264
265  // Lock for access to registered_.
266  base::Lock lock_;
267
268  // Map of all registered watched threads, from thread_id to ThreadWatcher.
269  RegistrationList registered_;
270
271  // The registrar that holds NotificationTypes to be observed.
272  NotificationRegistrar registrar_;
273
274  // This is the last time when woke all thread watchers up.
275  base::TimeTicks last_wakeup_time_;
276
277  DISALLOW_COPY_AND_ASSIGN(ThreadWatcherList);
278};
279
280// Class for WatchDogThread and in its Init method, we start watching UI, IO,
281// DB, FILE, CACHED threads.
282class WatchDogThread : public base::Thread {
283 public:
284  // Constructor.
285  WatchDogThread();
286
287  // Destroys the thread and stops the thread.
288  virtual ~WatchDogThread();
289
290  // Callable on any thread.  Returns whether you're currently on a
291  // watchdog_thread_.
292  static bool CurrentlyOnWatchDogThread();
293
294  // These are the same methods in message_loop.h, but are guaranteed to either
295  // get posted to the MessageLoop if it's still alive, or be deleted otherwise.
296  // They return true iff the watchdog thread existed and the task was posted.
297  // Note that even if the task is posted, there's no guarantee that it will
298  // run, since the target thread may already have a Quit message in its queue.
299  static bool PostTask(const tracked_objects::Location& from_here, Task* task);
300  static bool PostDelayedTask(const tracked_objects::Location& from_here,
301                              Task* task,
302                              int64 delay_ms);
303
304 protected:
305  virtual void Init();
306  virtual void CleanUp();
307  virtual void CleanUpAfterMessageLoopDestruction();
308
309 private:
310  static bool PostTaskHelper(
311      const tracked_objects::Location& from_here,
312      Task* task,
313      int64 delay_ms);
314
315  // This lock protects watchdog_thread_.
316  static base::Lock lock_;
317
318  static WatchDogThread* watchdog_thread_;  // The singleton of this class.
319
320  DISALLOW_COPY_AND_ASSIGN(WatchDogThread);
321};
322
323// DISABLE_RUNNABLE_METHOD_REFCOUNT is a convenience macro for disabling
324// refcounting of ThreadWatcher and ThreadWatcherList classes.
325DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcher);
326DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcherList);
327
328#endif  // CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
329