metrics_service.h revision 3345a6884c488ff3a535c2c9acdd33d74b37e311
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This file defines a service that collects information about the user
6// experience in order to help improve future versions of the app.
7
8#ifndef CHROME_BROWSER_METRICS_METRICS_SERVICE_H_
9#define CHROME_BROWSER_METRICS_METRICS_SERVICE_H_
10#pragma once
11
12#include <map>
13#include <string>
14#include <vector>
15
16#include "base/basictypes.h"
17#include "base/gtest_prod_util.h"
18#include "base/scoped_ptr.h"
19#include "chrome/common/metrics_helpers.h"
20#include "chrome/common/net/url_fetcher.h"
21#include "chrome/common/notification_observer.h"
22#include "chrome/common/notification_registrar.h"
23
24#if defined(OS_CHROMEOS)
25#include "chrome/browser/chromeos/external_metrics.h"
26#endif
27
28class BookmarkModel;
29class BookmarkNode;
30class DictionaryValue;
31class ListValue;
32class HistogramSynchronizer;
33class MetricsLogBase;
34class PrefService;
35class TemplateURLModel;
36struct WebPluginInfo;
37
38// Forward declaration of the xmlNode to avoid having tons of gyp files
39// needing to depend on the libxml third party lib.
40struct _xmlNode;
41typedef struct _xmlNode xmlNode;
42typedef xmlNode* xmlNodePtr;
43
44
45class MetricsService : public NotificationObserver,
46                       public URLFetcher::Delegate,
47                       public MetricsServiceBase {
48 public:
49  // Used to produce a historgram that keeps track of the status of recalling
50  // persisted per logs.
51  enum LogRecallStatus {
52    RECALL_SUCCESS,         // We were able to correctly recall a persisted log.
53    LIST_EMPTY,             // Attempting to recall from an empty list.
54    LIST_SIZE_MISSING,      // Failed to recover list size using GetAsInteger().
55    LIST_SIZE_TOO_SMALL,    // Too few elements in the list (less than 3).
56    LIST_SIZE_CORRUPTION,   // List size is not as expected.
57    LOG_STRING_CORRUPTION,  // Failed to recover log string using GetAsString().
58    CHECKSUM_CORRUPTION,    // Failed to verify checksum.
59    CHECKSUM_STRING_CORRUPTION,  // Failed to recover checksum string using
60                                 // GetAsString().
61    DECODE_FAIL,            // Failed to decode log.
62    END_RECALL_STATUS       // Number of bins to use to create the histogram.
63  };
64
65  // TODO(ziadh): This is here temporarily for a side experiment. Remove later
66  // on.
67  enum LogStoreStatus {
68    STORE_SUCCESS,    // Successfully presisted log.
69    ENCODE_FAIL,      // Failed to encode log.
70    COMPRESS_FAIL,    // Failed to compress log.
71    END_STORE_STATUS  // Number of bins to use to create the histogram.
72  };
73
74  MetricsService();
75  virtual ~MetricsService();
76
77  // Sets whether the user permits uploading.  The argument of this function
78  // should match the checkbox in Options.
79  void SetUserPermitsUpload(bool enabled);
80
81  // Start/stop the metrics recording and uploading machine.  These should be
82  // used on startup and when the user clicks the checkbox in the prefs.
83  // StartRecordingOnly starts the metrics recording but not reporting, for use
84  // in tests only.
85  void Start();
86  void StartRecordingOnly();
87  void Stop();
88
89  // At startup, prefs needs to be called with a list of all the pref names and
90  // types we'll be using.
91  static void RegisterPrefs(PrefService* local_state);
92
93  // Implementation of NotificationObserver
94  virtual void Observe(NotificationType type,
95                       const NotificationSource& source,
96                       const NotificationDetails& details);
97
98  // This should be called when the application is shutting down, to record
99  // the fact that this was a clean shutdown in the stability metrics.
100  void RecordCleanShutdown();
101
102  // Invoked when we get a WM_SESSIONEND. This places a value in prefs that is
103  // reset when RecordCompletedSessionEnd is invoked.
104  void RecordStartOfSessionEnd();
105
106  // This should be called when the application is shutting down. It records
107  // that session end was successful.
108  void RecordCompletedSessionEnd();
109
110  // Saves in the preferences if the crash report registration was successful.
111  // This count is eventually send via UMA logs.
112  void RecordBreakpadRegistration(bool success);
113
114  // Saves in the preferences if the browser is running under a debugger.
115  // This count is eventually send via UMA logs.
116  void RecordBreakpadHasDebugger(bool has_debugger);
117
118  // Save any unsent logs into a persistent store in a pref.  We always do this
119  // at shutdown, but we can do it as we reduce the list as well.
120  void StoreUnsentLogs();
121
122#if defined(OS_CHROMEOS)
123  // Returns the hardware class of the Chrome OS device (e.g.,
124  // hardware qualification ID), or "unknown" if the hardware class is
125  // not available.  The hardware class identifies the configured
126  // system components such us CPU, WiFi adapter, etc.  Note that this
127  // routine invokes an external utility to determine the hardware
128  // class.
129  static std::string GetHardwareClass();
130
131  // Start the external metrics service, which collects metrics from Chrome OS
132  // and passes them to UMA.
133  void StartExternalMetrics();
134#endif
135
136  bool recording_active() const;
137  bool reporting_active() const;
138
139 private:
140  // The MetricsService has a lifecycle that is stored as a state.
141  // See metrics_service.cc for description of this lifecycle.
142  enum State {
143    INITIALIZED,            // Constructor was called.
144    INIT_TASK_SCHEDULED,    // Waiting for deferred init tasks to complete.
145    INIT_TASK_DONE,         // Waiting for timer to send initial log.
146    INITIAL_LOG_READY,      // Initial log generated, and waiting for reply.
147    SEND_OLD_INITIAL_LOGS,  // Sending unsent logs from previous session.
148    SENDING_OLD_LOGS,       // Sending unsent logs from previous session.
149    SENDING_CURRENT_LOGS,   // Sending standard current logs as they acrue.
150  };
151
152  class InitTask;
153  class InitTaskComplete;
154
155  // Callback to let us know that the init task is done.
156  void OnInitTaskComplete(
157      const std::string& hardware_class,
158      const std::vector<WebPluginInfo>& plugins);
159
160  // When we start a new version of Chromium (different from our last run), we
161  // need to discard the old crash stats so that we don't attribute crashes etc.
162  // in the old version to the current version (via current logs).
163  // Without this, a common reason to finally start a new version is to crash
164  // the old version (after an autoupdate has arrived), and so we'd bias
165  // initial results towards showing crashes :-(.
166  static void DiscardOldStabilityStats(PrefService* local_state);
167
168  // Sets and gets whether metrics recording is active.
169  // SetRecording(false) also forces a persistent save of logging state (if
170  // anything has been recorded, or transmitted).
171  void SetRecording(bool enabled);
172
173  // Enable/disable transmission of accumulated logs and crash reports (dumps).
174  // Return value "true" indicates setting was definitively set as requested).
175  // Return value of "false" indicates that the enable state is effectively
176  // stuck in the other logical setting.
177  // Google Update maintains the authoritative preference in the registry, so
178  // the caller *might* not be able to actually change the setting.
179  // It is always possible to set this to at least one value, which matches the
180  // current value reported by querying Google Update.
181  void SetReporting(bool enabled);
182
183  // If in_idle is true, sets idle_since_last_transmission to true.
184  // If in_idle is false and idle_since_last_transmission_ is true, sets
185  // idle_since_last_transmission to false and starts the timer (provided
186  // starting the timer is permitted).
187  void HandleIdleSinceLastTransmission(bool in_idle);
188
189  // Set up client ID, session ID, etc.
190  void InitializeMetricsState();
191
192  // Generates a new client ID to use to identify self to metrics server.
193  static std::string GenerateClientID();
194
195#if defined(OS_POSIX)
196  // Generates a new client ID to use to identify self to metrics server,
197  // given 128 bits of randomness.
198  static std::string RandomBytesToGUIDString(const uint64 bytes[2]);
199#endif
200
201  // Schedule the next save of LocalState information.  This is called
202  // automatically by the task that performs each save to schedule the next one.
203  void ScheduleNextStateSave();
204
205  // Save the LocalState information immediately. This should not be called by
206  // anybody other than the scheduler to avoid doing too many writes. When you
207  // make a change, call ScheduleNextStateSave() instead.
208  void SaveLocalState();
209
210  // Called to start recording user experience metrics.
211  // Constructs a new, empty current_log_.
212  void StartRecording();
213
214  // Called to stop recording user experience metrics.  The caller takes
215  // ownership of the resulting MetricsLog object via the log parameter,
216  // or passes in NULL to indicate that the log should simply be deleted.
217  void StopRecording(MetricsLogBase** log);
218
219  // Deletes pending_log_ and current_log_, and pushes their text into the
220  // appropriate unsent_log vectors.  Called when Chrome shuts down.
221  void PushPendingLogsToUnsentLists();
222
223  // Save the pending_log_text_ persistently in a pref for transmission when we
224  // next run.  Note that IF this text is "too large," we just dicard it.
225  void PushPendingLogTextToUnsentOngoingLogs();
226
227  // Start timer for next log transmission.
228  void StartLogTransmissionTimer();
229
230  // Internal function to collect process memory information.
231  void LogTransmissionTimerDone();
232
233  // Do not call OnMemoryDetailCollectionDone() or
234  // OnHistogramSynchronizationDone() directly.
235  // Use StartLogTransmissionTimer() to schedule a call.
236  void OnMemoryDetailCollectionDone();
237  void OnHistogramSynchronizationDone();
238
239  // Takes whatever log should be uploaded next (according to the state_)
240  // and makes it the pending log.  If pending_log_ is not NULL,
241  // MakePendingLog does nothing and returns.
242  void MakePendingLog();
243
244  // Determines from state_ and permissions set out by the server and by
245  // the user whether the pending_log_ should be sent or discarded.  Called by
246  // TryToStartTransmission.
247  bool TransmissionPermitted() const;
248
249  // Check to see if there are any unsent logs from previous sessions.
250  bool unsent_logs() const {
251    return !unsent_initial_logs_.empty() || !unsent_ongoing_logs_.empty();
252  }
253  // Record stats, client ID, Session ID, etc. in a special "first" log.
254  void PrepareInitialLog();
255  // Pull copies of unsent logs from prefs into instance variables.
256  void RecallUnsentLogs();
257  // Decode and verify written pref log data.
258  static MetricsService::LogRecallStatus RecallUnsentLogsHelper(
259      const ListValue& list,
260      std::vector<std::string>* local_list);
261  // Encode and write list size and checksum for perf log data.
262  static void StoreUnsentLogsHelper(const std::vector<std::string>& local_list,
263                                    const size_t kMaxLocalListSize,
264                                    ListValue* list);
265  // Convert |pending_log_| to XML in |compressed_log_|, and compress it for
266  // transmission.
267  void PreparePendingLogText();
268
269  // Convert pending_log_ to XML, compress it, and prepare to pass to server.
270  // Upon return, current_fetch_ should be reset with its upload data set to
271  // a compressed copy of the pending log.
272  void PrepareFetchWithPendingLog();
273
274  // Implementation of URLFetcher::Delegate. Called after transmission
275  // completes (either successfully or with failure).
276  virtual void OnURLFetchComplete(const URLFetcher* source,
277                                  const GURL& url,
278                                  const URLRequestStatus& status,
279                                  int response_code,
280                                  const ResponseCookies& cookies,
281                                  const std::string& data);
282
283  // Called by OnURLFetchComplete to handle the case when the server returned
284  // a response code not equal to 200.
285  void HandleBadResponseCode();
286
287  // Class to hold all attributes that gets inherited by children in the UMA
288  // response data xml tree.  This is to make it convenient in the
289  // recursive function that does the tree traversal to pass all such
290  // data in the recursive call.  If you want to add more such attributes,
291  // add them to this class.
292  class InheritedProperties {
293    public:
294    InheritedProperties() : salt(123123), denominator(1000000) {}
295    int salt, denominator;
296    // Notice salt and denominator are inherited from parent nodes, but
297    // not probability; the default value of probability is 1.
298
299    // When a new node is reached it might have fields which overwrite inherited
300    // properties for that node (and its children).  Call this method to
301    // overwrite those settings.
302    void OverwriteWhereNeeded(xmlNodePtr node);
303  };
304
305  // Called by OnURLFetchComplete with data as the argument
306  // parses the xml returned by the server in the call to OnURLFetchComplete
307  // and extracts settings for subsequent frequency and content of log posts.
308  void GetSettingsFromResponseData(const std::string& data);
309
310  // This is a helper function for GetSettingsFromResponseData which iterates
311  // through the xml tree at the level of the <chrome_config> node.
312  void GetSettingsFromChromeConfigNode(xmlNodePtr chrome_config_node);
313
314  // GetSettingsFromUploadNode handles iteration over the children of the
315  // <upload> child of the <chrome_config> node.  It calls the recursive
316  // function GetSettingsFromUploadNodeRecursive which does the actual
317  // tree traversal.
318  void GetSettingsFromUploadNode(xmlNodePtr upload_node);
319  void GetSettingsFromUploadNodeRecursive(xmlNodePtr node,
320      InheritedProperties props,
321      std::string path_prefix,
322      bool uploadOn);
323
324  // NodeProbabilityTest gets called at every node in the tree traversal
325  // performed by GetSettingsFromUploadNodeRecursive.  It determines from
326  // the inherited attributes (salt, denominator) and the probability
327  // assiciated with the node whether that node and its contents should
328  // contribute to the upload.
329  bool NodeProbabilityTest(xmlNodePtr node, InheritedProperties props) const;
330  bool ProbabilityTest(double probability, int salt, int denominator) const;
331
332  // Records a window-related notification.
333  void LogWindowChange(NotificationType type,
334                       const NotificationSource& source,
335                       const NotificationDetails& details);
336
337  // Reads, increments and then sets the specified integer preference.
338  void IncrementPrefValue(const char* path);
339
340  // Reads, increments and then sets the specified long preference that is
341  // stored as a string.
342  void IncrementLongPrefsValue(const char* path);
343
344  // Records a renderer process crash.
345  void LogRendererCrash();
346
347  // Records an extension renderer process crash.
348  void LogExtensionRendererCrash();
349
350  // Records a renderer process hang.
351  void LogRendererHang();
352
353  // Set the value in preferences for the number of bookmarks and folders
354  // in node. The pref key for the number of bookmarks in num_bookmarks_key and
355  // the pref key for number of folders in num_folders_key.
356  void LogBookmarks(const BookmarkNode* node,
357                    const char* num_bookmarks_key,
358                    const char* num_folders_key);
359
360  // Sets preferences for the number of bookmarks in model.
361  void LogBookmarks(BookmarkModel* model);
362
363  // Records a child process related notification.  These are recorded to an
364  // in-object buffer because these notifications are sent on page load, and we
365  // don't want to slow that down.
366  void LogChildProcessChange(NotificationType type,
367                             const NotificationSource& source,
368                             const NotificationDetails& details);
369
370  // Logs keywords specific metrics. Keyword metrics are recorded in the
371  // profile specific metrics.
372  void LogKeywords(const TemplateURLModel* url_model);
373
374  // Saves plugin-related updates from the in-object buffer to Local State
375  // for retrieval next time we send a Profile log (generally next launch).
376  void RecordPluginChanges(PrefService* pref);
377
378  // Records state that should be periodically saved, like uptime and
379  // buffered plugin stability statistics.
380  void RecordCurrentState(PrefService* pref);
381
382  // Logs the initiation of a page load
383  void LogLoadStarted();
384
385  // Records a page load notification.
386  void LogLoadComplete(NotificationType type,
387                       const NotificationSource& source,
388                       const NotificationDetails& details);
389
390  // Checks whether a notification can be logged.
391  bool CanLogNotification(NotificationType type,
392                          const NotificationSource& source,
393                          const NotificationDetails& details);
394
395  // Sets the value of the specified path in prefs and schedules a save.
396  void RecordBooleanPrefValue(const char* path, bool value);
397
398  NotificationRegistrar registrar_;
399
400  // Indicate whether recording and reporting are currently happening.
401  // These should not be set directly, but by calling SetRecording and
402  // SetReporting.
403  bool recording_active_;
404  bool reporting_active_;
405
406  // Coincides with the check box in options window that lets the user control
407  // whether to upload.
408  bool user_permits_upload_;
409
410  // The variable server_permits_upload_ is set true when the response
411  // data forbids uploading.  This should coinside with the "die roll"
412  // with probability in the upload tag of the response data came out
413  // affirmative.
414  bool server_permits_upload_;
415
416  // The progession of states made by the browser are recorded in the following
417  // state.
418  State state_;
419
420  // Chrome OS hardware class (e.g., hardware qualification ID). This
421  // class identifies the configured system components such as CPU,
422  // WiFi adapter, etc.  For non Chrome OS hosts, this will be an
423  // empty string.
424  std::string hardware_class_;
425
426  // The list of plugins which was retrieved on the file thread.
427  std::vector<WebPluginInfo> plugins_;
428
429  // The outstanding transmission appears as a URL Fetch operation.
430  scoped_ptr<URLFetcher> current_fetch_;
431
432  // The URL for the metrics server.
433  std::wstring server_url_;
434
435  // The identifier that's sent to the server with the log reports.
436  std::string client_id_;
437
438  // Whether the MetricsService object has received any notifications since
439  // the last time a transmission was sent.
440  bool idle_since_last_transmission_;
441
442  // A number that identifies the how many times the app has been launched.
443  int session_id_;
444
445  // When logs were not sent during a previous session they are queued to be
446  // sent instead of currently accumulating logs.  We give preference to sending
447  // our inital log first, then unsent intial logs, then unsent ongoing logs.
448  // Unsent logs are gathered at shutdown, and save in a persistent pref, one
449  // log in each string in the following arrays.
450  // Note that the vector has the oldest logs listed first (early in the
451  // vector), and we'll discard old logs if we have gathered too many logs.
452  std::vector<std::string> unsent_initial_logs_;
453  std::vector<std::string> unsent_ongoing_logs_;
454
455  // Maps NavigationControllers (corresponding to tabs) or Browser
456  // (corresponding to Windows) to a unique integer that we will use to identify
457  // it. |next_window_id_| is used to track which IDs we have used so far.
458  typedef std::map<uintptr_t, int> WindowMap;
459  WindowMap window_map_;
460  int next_window_id_;
461
462  // Buffer of child process notifications for quick access.  See
463  // ChildProcessStats documentation above for more details.
464  struct ChildProcessStats;
465  std::map<std::wstring, ChildProcessStats> child_process_stats_buffer_;
466
467  ScopedRunnableMethodFactory<MetricsService> log_sender_factory_;
468  ScopedRunnableMethodFactory<MetricsService> state_saver_factory_;
469
470  // Dictionary containing all the profile specific metrics. This is set
471  // at creation time from the prefs.
472  scoped_ptr<DictionaryValue> profile_dictionary_;
473
474  // The interval between consecutive log transmissions (to avoid hogging the
475  // outbound network link).  This is usually also the duration for which we
476  // build up a log, but if other unsent-logs from previous sessions exist, we
477  // quickly transmit those unsent logs while we continue to build a log.
478  base::TimeDelta interlog_duration_;
479
480  // The maximum number of events which get transmitted in a log.  This defaults
481  // to a constant and otherwise is provided by the UMA server in the server
482  // response data.
483  int log_event_limit_;
484
485  // Indicate that a timer for sending the next log has already been queued.
486  bool timer_pending_;
487
488#if defined(OS_CHROMEOS)
489  // The external metric service is used to log ChromeOS UMA events.
490  scoped_refptr<chromeos::ExternalMetrics> external_metrics_;
491#endif
492
493  FRIEND_TEST_ALL_PREFIXES(MetricsServiceTest, EmptyLogList);
494  FRIEND_TEST_ALL_PREFIXES(MetricsServiceTest, SingleElementLogList);
495  FRIEND_TEST_ALL_PREFIXES(MetricsServiceTest, OverLimitLogList);
496  FRIEND_TEST_ALL_PREFIXES(MetricsServiceTest, SmallRecoveredListSize);
497  FRIEND_TEST_ALL_PREFIXES(MetricsServiceTest, RemoveSizeFromLogList);
498  FRIEND_TEST_ALL_PREFIXES(MetricsServiceTest, CorruptSizeOfLogList);
499  FRIEND_TEST_ALL_PREFIXES(MetricsServiceTest, CorruptChecksumOfLogList);
500  FRIEND_TEST_ALL_PREFIXES(MetricsServiceTest, ClientIdGeneratesAllZeroes);
501  FRIEND_TEST_ALL_PREFIXES(MetricsServiceTest, ClientIdGeneratesCorrectly);
502  FRIEND_TEST_ALL_PREFIXES(MetricsServiceTest, ClientIdCorrectlyFormatted);
503
504  DISALLOW_COPY_AND_ASSIGN(MetricsService);
505};
506
507#endif  // CHROME_BROWSER_METRICS_METRICS_SERVICE_H_
508