1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//------------------------------------------------------------------------------
6// Description of the life cycle of a instance of MetricsService.
7//
8//  OVERVIEW
9//
10// A MetricsService instance is typically created at application startup.  It
11// is the central controller for the acquisition of log data, and the automatic
12// transmission of that log data to an external server.  Its major job is to
13// manage logs, grouping them for transmission, and transmitting them.  As part
14// of its grouping, MS finalizes logs by including some just-in-time gathered
15// memory statistics, snapshotting the current stats of numerous histograms,
16// closing the logs, translating to XML text, and compressing the results for
17// transmission.  Transmission includes submitting a compressed log as data in a
18// URL-post, and retransmitting (or retaining at process termination) if the
19// attempted transmission failed.  Retention across process terminations is done
20// using the the PrefServices facilities. The retained logs (the ones that never
21// got transmitted) are compressed and base64-encoded before being persisted.
22//
23// Logs fall into one of two categories: "initial logs," and "ongoing logs."
24// There is at most one initial log sent for each complete run of the chromium
25// product (from startup, to browser shutdown).  An initial log is generally
26// transmitted some short time (1 minute?) after startup, and includes stats
27// such as recent crash info, the number and types of plugins, etc.  The
28// external server's response to the initial log conceptually tells this MS if
29// it should continue transmitting logs (during this session). The server
30// response can actually be much more detailed, and always includes (at a
31// minimum) how often additional ongoing logs should be sent.
32//
33// After the above initial log, a series of ongoing logs will be transmitted.
34// The first ongoing log actually begins to accumulate information stating when
35// the MS was first constructed.  Note that even though the initial log is
36// commonly sent a full minute after startup, the initial log does not include
37// much in the way of user stats.   The most common interlog period (delay)
38// is 20 minutes. That time period starts when the first user action causes a
39// logging event.  This means that if there is no user action, there may be long
40// periods without any (ongoing) log transmissions.  Ongoing logs typically
41// contain very detailed records of user activities (ex: opened tab, closed
42// tab, fetched URL, maximized window, etc.)  In addition, just before an
43// ongoing log is closed out, a call is made to gather memory statistics.  Those
44// memory statistics are deposited into a histogram, and the log finalization
45// code is then called.  In the finalization, a call to a Histogram server
46// acquires a list of all local histograms that have been flagged for upload
47// to the UMA server.  The finalization also acquires a the most recent number
48// of page loads, along with any counts of renderer or plugin crashes.
49//
50// When the browser shuts down, there will typically be a fragment of an ongoing
51// log that has not yet been transmitted.  At shutdown time, that fragment
52// is closed (including snapshotting histograms), and converted to text.  Note
53// that memory stats are not gathered during shutdown, as gathering *might* be
54// too time consuming.  The textual representation of the fragment of the
55// ongoing log is then stored persistently as a string in the PrefServices, for
56// potential transmission during a future run of the product.
57//
58// There are two slightly abnormal shutdown conditions.  There is a
59// "disconnected scenario," and a "really fast startup and shutdown" scenario.
60// In the "never connected" situation, the user has (during the running of the
61// process) never established an internet connection.  As a result, attempts to
62// transmit the initial log have failed, and a lot(?) of data has accumulated in
63// the ongoing log (which didn't yet get closed, because there was never even a
64// contemplation of sending it).  There is also a kindred "lost connection"
65// situation, where a loss of connection prevented an ongoing log from being
66// transmitted, and a (still open) log was stuck accumulating a lot(?) of data,
67// while the earlier log retried its transmission.  In both of these
68// disconnected situations, two logs need to be, and are, persistently stored
69// for future transmission.
70//
71// The other unusual shutdown condition, termed "really fast startup and
72// shutdown," involves the deliberate user termination of the process before
73// the initial log is even formed or transmitted. In that situation, no logging
74// is done, but the historical crash statistics remain (unlogged) for inclusion
75// in a future run's initial log.  (i.e., we don't lose crash stats).
76//
77// With the above overview, we can now describe the state machine's various
78// stats, based on the State enum specified in the state_ member.  Those states
79// are:
80//
81//    INITIALIZED,            // Constructor was called.
82//    INIT_TASK_SCHEDULED,    // Waiting for deferred init tasks to complete.
83//    INIT_TASK_DONE,         // Waiting for timer to send initial log.
84//    INITIAL_LOG_READY,      // Initial log generated, and waiting for reply.
85//    SEND_OLD_INITIAL_LOGS,  // Sending unsent logs from previous session.
86//    SENDING_OLD_LOGS,       // Sending unsent logs from previous session.
87//    SENDING_CURRENT_LOGS,   // Sending standard current logs as they accrue.
88//
89// In more detail, we have:
90//
91//    INITIALIZED,            // Constructor was called.
92// The MS has been constructed, but has taken no actions to compose the
93// initial log.
94//
95//    INIT_TASK_SCHEDULED,    // Waiting for deferred init tasks to complete.
96// Typically about 30 seconds after startup, a task is sent to a second thread
97// (the file thread) to perform deferred (lower priority and slower)
98// initialization steps such as getting the list of plugins.  That task will
99// (when complete) make an async callback (via a Task) to indicate the
100// completion.
101//
102//    INIT_TASK_DONE,         // Waiting for timer to send initial log.
103// The callback has arrived, and it is now possible for an initial log to be
104// created.  This callback typically arrives back less than one second after
105// the deferred init task is dispatched.
106//
107//    INITIAL_LOG_READY,      // Initial log generated, and waiting for reply.
108// This state is entered only after an initial log has been composed, and
109// prepared for transmission.  It is also the case that any previously unsent
110// logs have been loaded into instance variables for possible transmission.
111//
112//    SEND_OLD_INITIAL_LOGS,  // Sending unsent logs from previous session.
113// This state indicates that the initial log for this session has been
114// successfully sent and it is now time to send any "initial logs" that were
115// saved from previous sessions.  Most commonly, there are none, but all old
116// logs that were "initial logs" must be sent before this state is exited.
117//
118//    SENDING_OLD_LOGS,       // Sending unsent logs from previous session.
119// This state indicates that there are no more unsent initial logs, and now any
120// ongoing logs from previous sessions should be transmitted.  All such logs
121// will be transmitted before exiting this state, and proceeding with ongoing
122// logs from the current session (see next state).
123//
124//    SENDING_CURRENT_LOGS,   // Sending standard current logs as they accrue.
125// Current logs are being accumulated.  Typically every 20 minutes a log is
126// closed and finalized for transmission, at the same time as a new log is
127// started.
128//
129// The progression through the above states is simple, and sequential, in the
130// most common use cases.  States proceed from INITIAL to SENDING_CURRENT_LOGS,
131// and remain in the latter until shutdown.
132//
133// The one unusual case is when the user asks that we stop logging.  When that
134// happens, any pending (transmission in progress) log is pushed into the list
135// of old unsent logs (the appropriate list, depending on whether it is an
136// initial log, or an ongoing log).  An addition, any log that is currently
137// accumulating is also finalized, and pushed into the unsent log list.  With
138// those pushes performed, we regress back to the SEND_OLD_INITIAL_LOGS state in
139// case the user enables log recording again during this session.  This way
140// anything we have "pushed back" will be sent automatically if/when we progress
141// back to SENDING_CURRENT_LOG state.
142//
143// Also note that whenever the member variables containing unsent logs are
144// modified (i.e., when we send an old log), we mirror the list of logs into
145// the PrefServices.  This ensures that IF we crash, we won't start up and
146// retransmit our old logs again.
147//
148// Due to race conditions, it is always possible that a log file could be sent
149// twice.  For example, if a log file is sent, but not yet acknowledged by
150// the external server, and the user shuts down, then a copy of the log may be
151// saved for re-transmission.  These duplicates could be filtered out server
152// side, but are not expected to be a significant problem.
153//
154//
155//------------------------------------------------------------------------------
156
157#include "chrome/browser/metrics/metrics_service.h"
158
159#include "base/base64.h"
160#include "base/command_line.h"
161#include "base/md5.h"
162#include "base/metrics/histogram.h"
163#include "base/string_number_conversions.h"
164#include "base/threading/platform_thread.h"
165#include "base/threading/thread.h"
166#include "base/utf_string_conversions.h"
167#include "base/values.h"
168#include "chrome/browser/bookmarks/bookmark_model.h"
169#include "chrome/browser/browser_process.h"
170#include "chrome/browser/load_notification_details.h"
171#include "chrome/browser/memory_details.h"
172#include "chrome/browser/metrics/histogram_synchronizer.h"
173#include "chrome/browser/metrics/metrics_log.h"
174#include "chrome/browser/prefs/pref_service.h"
175#include "chrome/browser/prefs/scoped_user_pref_update.h"
176#include "chrome/browser/profiles/profile.h"
177#include "chrome/browser/search_engines/template_url_model.h"
178#include "chrome/browser/ui/browser_list.h"
179#include "chrome/common/child_process_logging.h"
180#include "chrome/common/chrome_switches.h"
181#include "chrome/common/guid.h"
182#include "chrome/common/pref_names.h"
183#include "chrome/common/render_messages.h"
184#include "content/browser/renderer_host/render_process_host.h"
185#include "content/common/child_process_info.h"
186#include "content/common/notification_service.h"
187#include "libxml/xmlwriter.h"
188#include "webkit/plugins/npapi/plugin_list.h"
189#include "webkit/plugins/npapi/webplugininfo.h"
190
191// TODO(port): port browser_distribution.h.
192#if !defined(OS_POSIX)
193#include "chrome/installer/util/browser_distribution.h"
194#endif
195
196#if defined(OS_CHROMEOS)
197#include "chrome/browser/chromeos/cros/cros_library.h"
198#include "chrome/browser/chromeos/external_metrics.h"
199#include "chrome/browser/chromeos/system_access.h"
200#endif
201
202namespace {
203MetricsService::LogRecallStatus MakeRecallStatusHistogram(
204    MetricsService::LogRecallStatus status) {
205  UMA_HISTOGRAM_ENUMERATION("PrefService.PersistentLogRecall", status,
206                            MetricsService::END_RECALL_STATUS);
207  return status;
208}
209
210// TODO(ziadh): Remove this when done with experiment.
211void MakeStoreStatusHistogram(MetricsService::LogStoreStatus status) {
212  UMA_HISTOGRAM_ENUMERATION("PrefService.PersistentLogStore2", status,
213                            MetricsService::END_STORE_STATUS);
214}
215}  // namespace
216
217using base::Time;
218using base::TimeDelta;
219
220// Check to see that we're being called on only one thread.
221static bool IsSingleThreaded();
222
223static const char kMetricsType[] = "application/vnd.mozilla.metrics.bz2";
224
225// The delay, in seconds, after startup before sending the first log message.
226static const int kInitialInterlogDuration = 60;  // one minute
227
228// This specifies the amount of time to wait for all renderers to send their
229// data.
230static const int kMaxHistogramGatheringWaitDuration = 60000;  // 60 seconds.
231
232// The maximum number of events in a log uploaded to the UMA server.
233static const int kEventLimit = 2400;
234
235// If an upload fails, and the transmission was over this byte count, then we
236// will discard the log, and not try to retransmit it.  We also don't persist
237// the log to the prefs for transmission during the next chrome session if this
238// limit is exceeded.
239static const int kUploadLogAvoidRetransmitSize = 50000;
240
241// When we have logs from previous Chrome sessions to send, how long should we
242// delay (in seconds) between each log transmission.
243static const int kUnsentLogDelay = 15;  // 15 seconds
244
245// Minimum time a log typically exists before sending, in seconds.
246// This number is supplied by the server, but until we parse it out of a server
247// response, we use this duration to specify how long we should wait before
248// sending the next log.  If the channel is busy, such as when there is a
249// failure during an attempt to transmit a previous log, then a log may wait
250// (and continue to accrue new log entries) for a much greater period of time.
251static const int kMinSecondsPerLog = 30 * 60;  // Thirty minutes.
252
253// When we don't succeed at transmitting a log to a server, we progressively
254// wait longer and longer before sending the next log.  This backoff process
255// help reduce load on the server, and makes the amount of backoff vary between
256// clients so that a collision (server overload?) on retransmit is less likely.
257// The following is the constant we use to expand that inter-log duration.
258static const double kBackoff = 1.1;
259// We limit the maximum backoff to be no greater than some multiple of the
260// default kMinSecondsPerLog.  The following is that maximum ratio.
261static const int kMaxBackoff = 10;
262
263// Interval, in seconds, between state saves.
264static const int kSaveStateInterval = 5 * 60;  // five minutes
265
266// The number of "initial" logs we're willing to save, and hope to send during
267// a future Chrome session.  Initial logs contain crash stats, and are pretty
268// small.
269static const size_t kMaxInitialLogsPersisted = 20;
270
271// The number of ongoing logs we're willing to save persistently, and hope to
272// send during a this or future sessions.  Note that each log may be pretty
273// large, as presumably the related "initial" log wasn't sent (probably nothing
274// was, as the user was probably off-line).  As a result, the log probably kept
275// accumulating while the "initial" log was stalled (pending_), and couldn't be
276// sent.  As a result, we don't want to save too many of these mega-logs.
277// A "standard shutdown" will create a small log, including just the data that
278// was not yet been transmitted, and that is normal (to have exactly one
279// ongoing_log_ at startup).
280static const size_t kMaxOngoingLogsPersisted = 8;
281
282// We append (2) more elements to persisted lists: the size of the list and a
283// checksum of the elements.
284static const size_t kChecksumEntryCount = 2;
285
286// This is used to quickly log stats from child process related notifications in
287// MetricsService::child_stats_buffer_.  The buffer's contents are transferred
288// out when Local State is periodically saved.  The information is then
289// reported to the UMA server on next launch.
290struct MetricsService::ChildProcessStats {
291 public:
292  explicit ChildProcessStats(ChildProcessInfo::ProcessType type)
293      : process_launches(0),
294        process_crashes(0),
295        instances(0),
296        process_type(type) {}
297
298  // This constructor is only used by the map to return some default value for
299  // an index for which no value has been assigned.
300  ChildProcessStats()
301      : process_launches(0),
302      process_crashes(0),
303      instances(0),
304      process_type(ChildProcessInfo::UNKNOWN_PROCESS) {}
305
306  // The number of times that the given child process has been launched
307  int process_launches;
308
309  // The number of times that the given child process has crashed
310  int process_crashes;
311
312  // The number of instances of this child process that have been created.
313  // An instance is a DOM object rendered by this child process during a page
314  // load.
315  int instances;
316
317  ChildProcessInfo::ProcessType process_type;
318};
319
320// Handles asynchronous fetching of memory details.
321// Will run the provided task after finished.
322class MetricsMemoryDetails : public MemoryDetails {
323 public:
324  explicit MetricsMemoryDetails(Task* completion) : completion_(completion) {}
325
326  virtual void OnDetailsAvailable() {
327    MessageLoop::current()->PostTask(FROM_HERE, completion_);
328  }
329
330 private:
331  ~MetricsMemoryDetails() {}
332
333  Task* completion_;
334  DISALLOW_COPY_AND_ASSIGN(MetricsMemoryDetails);
335};
336
337class MetricsService::InitTaskComplete : public Task {
338 public:
339  explicit InitTaskComplete(
340      const std::string& hardware_class,
341      const std::vector<webkit::npapi::WebPluginInfo>& plugins)
342      : hardware_class_(hardware_class), plugins_(plugins) {}
343
344  virtual void Run() {
345    g_browser_process->metrics_service()->OnInitTaskComplete(
346        hardware_class_, plugins_);
347  }
348
349 private:
350  std::string hardware_class_;
351  std::vector<webkit::npapi::WebPluginInfo> plugins_;
352};
353
354class MetricsService::InitTask : public Task {
355 public:
356  explicit InitTask(MessageLoop* callback_loop)
357      : callback_loop_(callback_loop) {}
358
359  virtual void Run() {
360    std::vector<webkit::npapi::WebPluginInfo> plugins;
361    webkit::npapi::PluginList::Singleton()->GetPlugins(false, &plugins);
362    std::string hardware_class;  // Empty string by default.
363#if defined(OS_CHROMEOS)
364    chromeos::SystemAccess::GetInstance()->GetMachineStatistic(
365        "hardware_class", &hardware_class);
366#endif  // OS_CHROMEOS
367    callback_loop_->PostTask(FROM_HERE, new InitTaskComplete(
368        hardware_class, plugins));
369  }
370
371 private:
372  MessageLoop* callback_loop_;
373};
374
375// static
376void MetricsService::RegisterPrefs(PrefService* local_state) {
377  DCHECK(IsSingleThreaded());
378  local_state->RegisterStringPref(prefs::kMetricsClientID, "");
379  local_state->RegisterInt64Pref(prefs::kMetricsClientIDTimestamp, 0);
380  local_state->RegisterInt64Pref(prefs::kStabilityLaunchTimeSec, 0);
381  local_state->RegisterInt64Pref(prefs::kStabilityLastTimestampSec, 0);
382  local_state->RegisterStringPref(prefs::kStabilityStatsVersion, "");
383  local_state->RegisterInt64Pref(prefs::kStabilityStatsBuildTime, 0);
384  local_state->RegisterBooleanPref(prefs::kStabilityExitedCleanly, true);
385  local_state->RegisterBooleanPref(prefs::kStabilitySessionEndCompleted, true);
386  local_state->RegisterIntegerPref(prefs::kMetricsSessionID, -1);
387  local_state->RegisterIntegerPref(prefs::kStabilityLaunchCount, 0);
388  local_state->RegisterIntegerPref(prefs::kStabilityCrashCount, 0);
389  local_state->RegisterIntegerPref(prefs::kStabilityIncompleteSessionEndCount,
390                                   0);
391  local_state->RegisterIntegerPref(prefs::kStabilityPageLoadCount, 0);
392  local_state->RegisterIntegerPref(prefs::kStabilityRendererCrashCount, 0);
393  local_state->RegisterIntegerPref(prefs::kStabilityExtensionRendererCrashCount,
394                                   0);
395  local_state->RegisterIntegerPref(prefs::kStabilityRendererHangCount, 0);
396  local_state->RegisterIntegerPref(prefs::kStabilityChildProcessCrashCount, 0);
397  local_state->RegisterIntegerPref(prefs::kStabilityBreakpadRegistrationFail,
398                                   0);
399  local_state->RegisterIntegerPref(prefs::kStabilityBreakpadRegistrationSuccess,
400                                   0);
401  local_state->RegisterIntegerPref(prefs::kStabilityDebuggerPresent, 0);
402  local_state->RegisterIntegerPref(prefs::kStabilityDebuggerNotPresent, 0);
403#if defined(OS_CHROMEOS)
404  local_state->RegisterIntegerPref(prefs::kStabilityOtherUserCrashCount, 0);
405  local_state->RegisterIntegerPref(prefs::kStabilityKernelCrashCount, 0);
406  local_state->RegisterIntegerPref(prefs::kStabilitySystemUncleanShutdownCount,
407                                   0);
408#endif  // OS_CHROMEOS
409
410  local_state->RegisterDictionaryPref(prefs::kProfileMetrics);
411  local_state->RegisterIntegerPref(prefs::kNumBookmarksOnBookmarkBar, 0);
412  local_state->RegisterIntegerPref(prefs::kNumFoldersOnBookmarkBar, 0);
413  local_state->RegisterIntegerPref(prefs::kNumBookmarksInOtherBookmarkFolder,
414                                   0);
415  local_state->RegisterIntegerPref(prefs::kNumFoldersInOtherBookmarkFolder, 0);
416  local_state->RegisterIntegerPref(prefs::kNumKeywords, 0);
417  local_state->RegisterListPref(prefs::kMetricsInitialLogs);
418  local_state->RegisterListPref(prefs::kMetricsOngoingLogs);
419
420  local_state->RegisterInt64Pref(prefs::kUninstallMetricsPageLoadCount, 0);
421  local_state->RegisterInt64Pref(prefs::kUninstallLaunchCount, 0);
422  local_state->RegisterInt64Pref(prefs::kUninstallMetricsInstallDate, 0);
423  local_state->RegisterInt64Pref(prefs::kUninstallMetricsUptimeSec, 0);
424  local_state->RegisterInt64Pref(prefs::kUninstallLastLaunchTimeSec, 0);
425  local_state->RegisterInt64Pref(prefs::kUninstallLastObservedRunTimeSec, 0);
426}
427
428// static
429void MetricsService::DiscardOldStabilityStats(PrefService* local_state) {
430  local_state->SetBoolean(prefs::kStabilityExitedCleanly, true);
431  local_state->SetBoolean(prefs::kStabilitySessionEndCompleted, true);
432
433  local_state->SetInteger(prefs::kStabilityIncompleteSessionEndCount, 0);
434  local_state->SetInteger(prefs::kStabilityBreakpadRegistrationSuccess, 0);
435  local_state->SetInteger(prefs::kStabilityBreakpadRegistrationFail, 0);
436  local_state->SetInteger(prefs::kStabilityDebuggerPresent, 0);
437  local_state->SetInteger(prefs::kStabilityDebuggerNotPresent, 0);
438
439  local_state->SetInteger(prefs::kStabilityLaunchCount, 0);
440  local_state->SetInteger(prefs::kStabilityCrashCount, 0);
441
442  local_state->SetInteger(prefs::kStabilityPageLoadCount, 0);
443  local_state->SetInteger(prefs::kStabilityRendererCrashCount, 0);
444  local_state->SetInteger(prefs::kStabilityRendererHangCount, 0);
445
446  local_state->SetInt64(prefs::kStabilityLaunchTimeSec, 0);
447  local_state->SetInt64(prefs::kStabilityLastTimestampSec, 0);
448
449  local_state->ClearPref(prefs::kStabilityPluginStats);
450
451  local_state->ClearPref(prefs::kMetricsInitialLogs);
452  local_state->ClearPref(prefs::kMetricsOngoingLogs);
453}
454
455MetricsService::MetricsService()
456    : recording_active_(false),
457      reporting_active_(false),
458      server_permits_upload_(true),
459      state_(INITIALIZED),
460      current_fetch_(NULL),
461      idle_since_last_transmission_(false),
462      next_window_id_(0),
463      ALLOW_THIS_IN_INITIALIZER_LIST(log_sender_factory_(this)),
464      ALLOW_THIS_IN_INITIALIZER_LIST(state_saver_factory_(this)),
465      interlog_duration_(TimeDelta::FromSeconds(kInitialInterlogDuration)),
466      timer_pending_(false) {
467  DCHECK(IsSingleThreaded());
468  InitializeMetricsState();
469}
470
471MetricsService::~MetricsService() {
472  SetRecording(false);
473}
474
475void MetricsService::Start() {
476  HandleIdleSinceLastTransmission(false);
477  SetRecording(true);
478  SetReporting(true);
479}
480
481void MetricsService::StartRecordingOnly() {
482  SetRecording(true);
483  SetReporting(false);
484}
485
486void MetricsService::Stop() {
487  HandleIdleSinceLastTransmission(false);
488  SetReporting(false);
489  SetRecording(false);
490}
491
492void MetricsService::SetRecording(bool enabled) {
493  DCHECK(IsSingleThreaded());
494
495  if (enabled == recording_active_)
496    return;
497
498  if (enabled) {
499    if (client_id_.empty()) {
500      PrefService* pref = g_browser_process->local_state();
501      DCHECK(pref);
502      client_id_ = pref->GetString(prefs::kMetricsClientID);
503      if (client_id_.empty()) {
504        client_id_ = GenerateClientID();
505        pref->SetString(prefs::kMetricsClientID, client_id_);
506
507        // Might as well make a note of how long this ID has existed
508        pref->SetString(prefs::kMetricsClientIDTimestamp,
509                        base::Int64ToString(Time::Now().ToTimeT()));
510      }
511    }
512    child_process_logging::SetClientId(client_id_);
513    StartRecording();
514
515    SetUpNotifications(&registrar_, this);
516  } else {
517    registrar_.RemoveAll();
518    PushPendingLogsToUnsentLists();
519    DCHECK(!pending_log());
520    if (state_ > INITIAL_LOG_READY && unsent_logs())
521      state_ = SEND_OLD_INITIAL_LOGS;
522  }
523  recording_active_ = enabled;
524}
525
526bool MetricsService::recording_active() const {
527  DCHECK(IsSingleThreaded());
528  return recording_active_;
529}
530
531void MetricsService::SetReporting(bool enable) {
532  if (reporting_active_ != enable) {
533    reporting_active_ = enable;
534    if (reporting_active_)
535      StartLogTransmissionTimer();
536  }
537}
538
539bool MetricsService::reporting_active() const {
540  DCHECK(IsSingleThreaded());
541  return reporting_active_;
542}
543
544// static
545void MetricsService::SetUpNotifications(NotificationRegistrar* registrar,
546                                        NotificationObserver* observer) {
547    registrar->Add(observer, NotificationType::BROWSER_OPENED,
548                   NotificationService::AllSources());
549    registrar->Add(observer, NotificationType::BROWSER_CLOSED,
550                   NotificationService::AllSources());
551    registrar->Add(observer, NotificationType::USER_ACTION,
552                   NotificationService::AllSources());
553    registrar->Add(observer, NotificationType::TAB_PARENTED,
554                   NotificationService::AllSources());
555    registrar->Add(observer, NotificationType::TAB_CLOSING,
556                   NotificationService::AllSources());
557    registrar->Add(observer, NotificationType::LOAD_START,
558                   NotificationService::AllSources());
559    registrar->Add(observer, NotificationType::LOAD_STOP,
560                   NotificationService::AllSources());
561    registrar->Add(observer, NotificationType::RENDERER_PROCESS_CLOSED,
562                   NotificationService::AllSources());
563    registrar->Add(observer, NotificationType::RENDERER_PROCESS_HANG,
564                   NotificationService::AllSources());
565    registrar->Add(observer, NotificationType::CHILD_PROCESS_HOST_CONNECTED,
566                   NotificationService::AllSources());
567    registrar->Add(observer, NotificationType::CHILD_INSTANCE_CREATED,
568                   NotificationService::AllSources());
569    registrar->Add(observer, NotificationType::CHILD_PROCESS_CRASHED,
570                   NotificationService::AllSources());
571    registrar->Add(observer, NotificationType::TEMPLATE_URL_MODEL_LOADED,
572                   NotificationService::AllSources());
573    registrar->Add(observer, NotificationType::OMNIBOX_OPENED_URL,
574                   NotificationService::AllSources());
575    registrar->Add(observer, NotificationType::BOOKMARK_MODEL_LOADED,
576                   NotificationService::AllSources());
577}
578
579void MetricsService::Observe(NotificationType type,
580                             const NotificationSource& source,
581                             const NotificationDetails& details) {
582  DCHECK(current_log_);
583  DCHECK(IsSingleThreaded());
584
585  if (!CanLogNotification(type, source, details))
586    return;
587
588  switch (type.value) {
589    case NotificationType::USER_ACTION:
590        current_log_->RecordUserAction(*Details<const char*>(details).ptr());
591      break;
592
593    case NotificationType::BROWSER_OPENED:
594    case NotificationType::BROWSER_CLOSED:
595      LogWindowChange(type, source, details);
596      break;
597
598    case NotificationType::TAB_PARENTED:
599    case NotificationType::TAB_CLOSING:
600      LogWindowChange(type, source, details);
601      break;
602
603    case NotificationType::LOAD_STOP:
604      LogLoadComplete(type, source, details);
605      break;
606
607    case NotificationType::LOAD_START:
608      LogLoadStarted();
609      break;
610
611    case NotificationType::RENDERER_PROCESS_CLOSED: {
612        RenderProcessHost::RendererClosedDetails* process_details =
613            Details<RenderProcessHost::RendererClosedDetails>(details).ptr();
614        if (process_details->status ==
615            base::TERMINATION_STATUS_PROCESS_CRASHED ||
616            process_details->status ==
617            base::TERMINATION_STATUS_ABNORMAL_TERMINATION) {
618          if (process_details->was_extension_renderer) {
619            LogExtensionRendererCrash();
620          } else {
621            LogRendererCrash();
622          }
623        }
624      }
625      break;
626
627    case NotificationType::RENDERER_PROCESS_HANG:
628      LogRendererHang();
629      break;
630
631    case NotificationType::CHILD_PROCESS_HOST_CONNECTED:
632    case NotificationType::CHILD_PROCESS_CRASHED:
633    case NotificationType::CHILD_INSTANCE_CREATED:
634      LogChildProcessChange(type, source, details);
635      break;
636
637    case NotificationType::TEMPLATE_URL_MODEL_LOADED:
638      LogKeywords(Source<TemplateURLModel>(source).ptr());
639      break;
640
641    case NotificationType::OMNIBOX_OPENED_URL: {
642      MetricsLog* current_log = current_log_->AsMetricsLog();
643      DCHECK(current_log);
644      current_log->RecordOmniboxOpenedURL(
645          *Details<AutocompleteLog>(details).ptr());
646      break;
647    }
648
649    case NotificationType::BOOKMARK_MODEL_LOADED: {
650      Profile* p = Source<Profile>(source).ptr();
651      if (p)
652        LogBookmarks(p->GetBookmarkModel());
653      break;
654    }
655    default:
656      NOTREACHED();
657      break;
658  }
659
660  HandleIdleSinceLastTransmission(false);
661
662  if (current_log_)
663    DVLOG(1) << "METRICS: NUMBER OF EVENTS = " << current_log_->num_events();
664}
665
666void MetricsService::HandleIdleSinceLastTransmission(bool in_idle) {
667  // If there wasn't a lot of action, maybe the computer was asleep, in which
668  // case, the log transmissions should have stopped.  Here we start them up
669  // again.
670  if (!in_idle && idle_since_last_transmission_)
671    StartLogTransmissionTimer();
672  idle_since_last_transmission_ = in_idle;
673}
674
675void MetricsService::RecordStartOfSessionEnd() {
676  LogCleanShutdown();
677  RecordBooleanPrefValue(prefs::kStabilitySessionEndCompleted, false);
678}
679
680void MetricsService::RecordCompletedSessionEnd() {
681  LogCleanShutdown();
682  RecordBooleanPrefValue(prefs::kStabilitySessionEndCompleted, true);
683}
684
685void MetricsService:: RecordBreakpadRegistration(bool success) {
686  if (!success)
687    IncrementPrefValue(prefs::kStabilityBreakpadRegistrationFail);
688  else
689    IncrementPrefValue(prefs::kStabilityBreakpadRegistrationSuccess);
690}
691
692void MetricsService::RecordBreakpadHasDebugger(bool has_debugger) {
693  if (!has_debugger)
694    IncrementPrefValue(prefs::kStabilityDebuggerNotPresent);
695  else
696    IncrementPrefValue(prefs::kStabilityDebuggerPresent);
697}
698
699//------------------------------------------------------------------------------
700// private methods
701//------------------------------------------------------------------------------
702
703
704//------------------------------------------------------------------------------
705// Initialization methods
706
707void MetricsService::InitializeMetricsState() {
708#if defined(OS_POSIX)
709  server_url_ = L"https://clients4.google.com/firefox/metrics/collect";
710#else
711  BrowserDistribution* dist = BrowserDistribution::GetDistribution();
712  server_url_ = dist->GetStatsServerURL();
713#endif
714
715  PrefService* pref = g_browser_process->local_state();
716  DCHECK(pref);
717
718  if ((pref->GetInt64(prefs::kStabilityStatsBuildTime)
719       != MetricsLog::GetBuildTime()) ||
720      (pref->GetString(prefs::kStabilityStatsVersion)
721       != MetricsLog::GetVersionString())) {
722    // This is a new version, so we don't want to confuse the stats about the
723    // old version with info that we upload.
724    DiscardOldStabilityStats(pref);
725    pref->SetString(prefs::kStabilityStatsVersion,
726                    MetricsLog::GetVersionString());
727    pref->SetInt64(prefs::kStabilityStatsBuildTime,
728                   MetricsLog::GetBuildTime());
729  }
730
731  // Update session ID
732  session_id_ = pref->GetInteger(prefs::kMetricsSessionID);
733  ++session_id_;
734  pref->SetInteger(prefs::kMetricsSessionID, session_id_);
735
736  // Stability bookkeeping
737  IncrementPrefValue(prefs::kStabilityLaunchCount);
738
739  if (!pref->GetBoolean(prefs::kStabilityExitedCleanly)) {
740    IncrementPrefValue(prefs::kStabilityCrashCount);
741  }
742
743  // This will be set to 'true' if we exit cleanly.
744  pref->SetBoolean(prefs::kStabilityExitedCleanly, false);
745
746  if (!pref->GetBoolean(prefs::kStabilitySessionEndCompleted)) {
747    IncrementPrefValue(prefs::kStabilityIncompleteSessionEndCount);
748    // This is marked false when we get a WM_ENDSESSION.
749    pref->SetBoolean(prefs::kStabilitySessionEndCompleted, true);
750  }
751
752  // Initialize uptime counters.
753  int64 startup_uptime = MetricsLog::GetIncrementalUptime(pref);
754  DCHECK_EQ(0, startup_uptime);
755  // For backwards compatibility, leave this intact in case Omaha is checking
756  // them.  prefs::kStabilityLastTimestampSec may also be useless now.
757  // TODO(jar): Delete these if they have no uses.
758  pref->SetInt64(prefs::kStabilityLaunchTimeSec, Time::Now().ToTimeT());
759
760  // Bookkeeping for the uninstall metrics.
761  IncrementLongPrefsValue(prefs::kUninstallLaunchCount);
762
763  // Save profile metrics.
764  PrefService* prefs = g_browser_process->local_state();
765  if (prefs) {
766    // Remove the current dictionary and store it for use when sending data to
767    // server. By removing the value we prune potentially dead profiles
768    // (and keys). All valid values are added back once services startup.
769    const DictionaryValue* profile_dictionary =
770        prefs->GetDictionary(prefs::kProfileMetrics);
771    if (profile_dictionary) {
772      // Do a deep copy of profile_dictionary since ClearPref will delete it.
773      profile_dictionary_.reset(static_cast<DictionaryValue*>(
774          profile_dictionary->DeepCopy()));
775      prefs->ClearPref(prefs::kProfileMetrics);
776    }
777  }
778
779  // Get stats on use of command line.
780  const CommandLine* command_line(CommandLine::ForCurrentProcess());
781  size_t common_commands = 0;
782  if (command_line->HasSwitch(switches::kUserDataDir)) {
783    ++common_commands;
784    UMA_HISTOGRAM_COUNTS_100("Chrome.CommandLineDatDirCount", 1);
785  }
786
787  if (command_line->HasSwitch(switches::kApp)) {
788    ++common_commands;
789    UMA_HISTOGRAM_COUNTS_100("Chrome.CommandLineAppModeCount", 1);
790  }
791
792  UMA_HISTOGRAM_COUNTS_100("Chrome.CommandLineFlagCount",
793                           command_line->GetSwitchCount());
794  UMA_HISTOGRAM_COUNTS_100("Chrome.CommandLineUncommonFlagCount",
795                           command_line->GetSwitchCount() - common_commands);
796
797  // Kick off the process of saving the state (so the uptime numbers keep
798  // getting updated) every n minutes.
799  ScheduleNextStateSave();
800}
801
802void MetricsService::OnInitTaskComplete(
803    const std::string& hardware_class,
804    const std::vector<webkit::npapi::WebPluginInfo>& plugins) {
805  DCHECK(state_ == INIT_TASK_SCHEDULED);
806  hardware_class_ = hardware_class;
807  plugins_ = plugins;
808  if (state_ == INIT_TASK_SCHEDULED)
809    state_ = INIT_TASK_DONE;
810}
811
812std::string MetricsService::GenerateClientID() {
813  return guid::GenerateGUID();
814}
815
816//------------------------------------------------------------------------------
817// State save methods
818
819void MetricsService::ScheduleNextStateSave() {
820  state_saver_factory_.RevokeAll();
821
822  MessageLoop::current()->PostDelayedTask(FROM_HERE,
823      state_saver_factory_.NewRunnableMethod(&MetricsService::SaveLocalState),
824      kSaveStateInterval * 1000);
825}
826
827void MetricsService::SaveLocalState() {
828  PrefService* pref = g_browser_process->local_state();
829  if (!pref) {
830    NOTREACHED();
831    return;
832  }
833
834  RecordCurrentState(pref);
835  pref->ScheduleSavePersistentPrefs();
836
837  // TODO(jar): Does this run down the batteries????
838  ScheduleNextStateSave();
839}
840
841
842//------------------------------------------------------------------------------
843// Recording control methods
844
845void MetricsService::StartRecording() {
846  if (current_log_)
847    return;
848
849  current_log_ = new MetricsLog(client_id_, session_id_);
850  if (state_ == INITIALIZED) {
851    // We only need to schedule that run once.
852    state_ = INIT_TASK_SCHEDULED;
853
854    // Schedules a task on the file thread for execution of slower
855    // initialization steps (such as plugin list generation) necessary
856    // for sending the initial log.  This avoids blocking the main UI
857    // thread.
858    g_browser_process->file_thread()->message_loop()->PostDelayedTask(FROM_HERE,
859        new InitTask(MessageLoop::current()),
860        kInitialInterlogDuration * 1000 / 2);
861  }
862}
863
864void MetricsService::StopRecording(MetricsLogBase** log) {
865  if (!current_log_)
866    return;
867
868  current_log_->set_hardware_class(hardware_class_);  // Adds to ongoing logs.
869
870  // TODO(jar): Integrate bounds on log recording more consistently, so that we
871  // can stop recording logs that are too big much sooner.
872  if (current_log_->num_events() > kEventLimit) {
873    UMA_HISTOGRAM_COUNTS("UMA.Discarded Log Events",
874                         current_log_->num_events());
875    current_log_->CloseLog();
876    delete current_log_;
877    current_log_ = NULL;
878    StartRecording();  // Start trivial log to hold our histograms.
879  }
880
881  // Put incremental data (histogram deltas, and realtime stats deltas) at the
882  // end of all log transmissions (initial log handles this separately).
883  // Don't bother if we're going to discard current_log_.
884  if (log) {
885    // RecordIncrementalStabilityElements only exists on the derived
886    // MetricsLog class.
887    MetricsLog* current_log = current_log_->AsMetricsLog();
888    DCHECK(current_log);
889    current_log->RecordIncrementalStabilityElements();
890    RecordCurrentHistograms();
891  }
892
893  current_log_->CloseLog();
894  if (log)
895    *log = current_log_;
896  else
897    delete current_log_;
898  current_log_ = NULL;
899}
900
901void MetricsService::PushPendingLogsToUnsentLists() {
902  if (state_ < INITIAL_LOG_READY)
903    return;  // We didn't and still don't have time to get plugin list etc.
904
905  if (pending_log()) {
906    PreparePendingLogText();
907    if (state_ == INITIAL_LOG_READY) {
908      // We may race here, and send second copy of initial log later.
909      unsent_initial_logs_.push_back(compressed_log_);
910      state_ = SEND_OLD_INITIAL_LOGS;
911    } else {
912      // TODO(jar): Verify correctness in other states, including sending unsent
913      // initial logs.
914      PushPendingLogTextToUnsentOngoingLogs();
915    }
916    DiscardPendingLog();
917  }
918  DCHECK(!pending_log());
919  StopRecording(&pending_log_);
920  PreparePendingLogText();
921  PushPendingLogTextToUnsentOngoingLogs();
922  DiscardPendingLog();
923  StoreUnsentLogs();
924}
925
926void MetricsService::PushPendingLogTextToUnsentOngoingLogs() {
927  // If UMA response told us not to upload, there's no need to save the pending
928  // log.  It wasn't supposed to be uploaded anyway.
929  if (!server_permits_upload_)
930    return;
931  if (compressed_log_.length() >
932      static_cast<size_t>(kUploadLogAvoidRetransmitSize)) {
933    UMA_HISTOGRAM_COUNTS("UMA.Large Accumulated Log Not Persisted",
934                         static_cast<int>(compressed_log_.length()));
935    return;
936  }
937  unsent_ongoing_logs_.push_back(compressed_log_);
938}
939
940//------------------------------------------------------------------------------
941// Transmission of logs methods
942
943void MetricsService::StartLogTransmissionTimer() {
944  // If we're not reporting, there's no point in starting a log transmission
945  // timer.
946  if (!reporting_active())
947    return;
948
949  if (!current_log_)
950    return;  // Recorder is shutdown.
951
952  // If there is already a timer running, we leave it running.
953  // If timer_pending is true because the fetch is waiting for a response,
954  // we return for now and let the response handler start the timer.
955  if (timer_pending_)
956    return;
957
958  // Before starting the timer, set timer_pending_ to true.
959  timer_pending_ = true;
960
961  // Right before the UMA transmission gets started, there's one more thing we'd
962  // like to record: the histogram of memory usage, so we spawn a task to
963  // collect the memory details and when that task is finished, it will call
964  // OnMemoryDetailCollectionDone, which will call HistogramSynchronization to
965  // collect histograms from all renderers and then we will call
966  // OnHistogramSynchronizationDone to continue processing.
967  MessageLoop::current()->PostDelayedTask(FROM_HERE,
968      log_sender_factory_.
969          NewRunnableMethod(&MetricsService::LogTransmissionTimerDone),
970      interlog_duration_.InMilliseconds());
971}
972
973void MetricsService::LogTransmissionTimerDone() {
974  Task* task = log_sender_factory_.
975      NewRunnableMethod(&MetricsService::OnMemoryDetailCollectionDone);
976
977  scoped_refptr<MetricsMemoryDetails> details(new MetricsMemoryDetails(task));
978  details->StartFetch();
979
980  // Collect WebCore cache information to put into a histogram.
981  for (RenderProcessHost::iterator i(RenderProcessHost::AllHostsIterator());
982       !i.IsAtEnd(); i.Advance())
983    i.GetCurrentValue()->Send(new ViewMsg_GetCacheResourceStats());
984}
985
986void MetricsService::OnMemoryDetailCollectionDone() {
987  DCHECK(IsSingleThreaded());
988
989  // HistogramSynchronizer will Collect histograms from all renderers and it
990  // will call OnHistogramSynchronizationDone (if wait time elapses before it
991  // heard from all renderers, then also it will call
992  // OnHistogramSynchronizationDone).
993
994  // Create a callback_task for OnHistogramSynchronizationDone.
995  Task* callback_task = log_sender_factory_.NewRunnableMethod(
996      &MetricsService::OnHistogramSynchronizationDone);
997
998  // Set up the callback to task to call after we receive histograms from all
999  // renderer processes. Wait time specifies how long to wait before absolutely
1000  // calling us back on the task.
1001  HistogramSynchronizer::FetchRendererHistogramsAsynchronously(
1002      MessageLoop::current(), callback_task,
1003      kMaxHistogramGatheringWaitDuration);
1004}
1005
1006void MetricsService::OnHistogramSynchronizationDone() {
1007  DCHECK(IsSingleThreaded());
1008
1009  // This function should only be called via timer, so timer_pending_
1010  // should be true.
1011  DCHECK(timer_pending_);
1012  timer_pending_ = false;
1013
1014  DCHECK(!current_fetch_.get());
1015
1016  // If we're getting no notifications, then the log won't have much in it, and
1017  // it's possible the computer is about to go to sleep, so don't upload and
1018  // don't restart the transmission timer.
1019  if (idle_since_last_transmission_)
1020    return;
1021
1022  // If somehow there is a fetch in progress, we return setting timer_pending_
1023  // to true and hope things work out.
1024  if (current_fetch_.get()) {
1025    timer_pending_ = true;
1026    return;
1027  }
1028
1029  // If uploads are forbidden by UMA response, there's no point in keeping
1030  // the current_log_, and the more often we delete it, the less likely it is
1031  // to expand forever.
1032  if (!server_permits_upload_ && current_log_) {
1033    StopRecording(NULL);
1034    StartRecording();
1035  }
1036
1037  if (!current_log_)
1038    return;  // Logging was disabled.
1039  if (!reporting_active())
1040    return;  // Don't do work if we're not going to send anything now.
1041
1042  MakePendingLog();
1043
1044  // MakePendingLog should have put something in the pending log, if it didn't,
1045  // we start the timer again, return and hope things work out.
1046  if (!pending_log()) {
1047    StartLogTransmissionTimer();
1048    return;
1049  }
1050
1051  // If we're not supposed to upload any UMA data because the response said so,
1052  // cancel the upload at this point, but start the timer.
1053  if (!ServerPermitsTransmission()) {
1054    DiscardPendingLog();
1055    StartLogTransmissionTimer();
1056    return;
1057  }
1058
1059  PrepareFetchWithPendingLog();
1060
1061  if (!current_fetch_.get()) {
1062    // Compression failed, and log discarded :-/.
1063    DiscardPendingLog();
1064    StartLogTransmissionTimer();  // Maybe we'll do better next time
1065    // TODO(jar): If compression failed, we should have created a tiny log and
1066    // compressed that, so that we can signal that we're losing logs.
1067    return;
1068  }
1069
1070  DCHECK(!timer_pending_);
1071
1072  // The URL fetch is a like timer in that after a while we get called back
1073  // so we set timer_pending_ true just as we start the url fetch.
1074  timer_pending_ = true;
1075  current_fetch_->Start();
1076
1077  HandleIdleSinceLastTransmission(true);
1078}
1079
1080
1081void MetricsService::MakePendingLog() {
1082  if (pending_log())
1083    return;
1084
1085  switch (state_) {
1086    case INITIALIZED:
1087    case INIT_TASK_SCHEDULED:  // We should be further along by now.
1088      DCHECK(false);
1089      return;
1090
1091    case INIT_TASK_DONE:
1092      // We need to wait for the initial log to be ready before sending
1093      // anything, because the server will tell us whether it wants to hear
1094      // from us.
1095      PrepareInitialLog();
1096      DCHECK(state_ == INIT_TASK_DONE);
1097      RecallUnsentLogs();
1098      state_ = INITIAL_LOG_READY;
1099      break;
1100
1101    case SEND_OLD_INITIAL_LOGS:
1102      if (!unsent_initial_logs_.empty()) {
1103        compressed_log_ = unsent_initial_logs_.back();
1104        break;
1105      }
1106      state_ = SENDING_OLD_LOGS;
1107      // Fall through.
1108
1109    case SENDING_OLD_LOGS:
1110      if (!unsent_ongoing_logs_.empty()) {
1111        compressed_log_ = unsent_ongoing_logs_.back();
1112        break;
1113      }
1114      state_ = SENDING_CURRENT_LOGS;
1115      // Fall through.
1116
1117    case SENDING_CURRENT_LOGS:
1118      StopRecording(&pending_log_);
1119      StartRecording();
1120      break;
1121
1122    default:
1123      NOTREACHED();
1124      return;
1125  }
1126
1127  DCHECK(pending_log());
1128}
1129
1130bool MetricsService::ServerPermitsTransmission() const {
1131  // If the server forbids uploading, we take that to mean it forbids current
1132  // logs, but we still send up the inital logs and any old logs.
1133  if (server_permits_upload_)
1134    return true;
1135
1136  switch (state_) {
1137    case INITIAL_LOG_READY:
1138    case SEND_OLD_INITIAL_LOGS:
1139    case SENDING_OLD_LOGS:
1140      return true;
1141
1142    case SENDING_CURRENT_LOGS:
1143    default:
1144      return false;
1145  }
1146}
1147
1148void MetricsService::PrepareInitialLog() {
1149  DCHECK(state_ == INIT_TASK_DONE);
1150
1151  MetricsLog* log = new MetricsLog(client_id_, session_id_);
1152  log->set_hardware_class(hardware_class_);  // Adds to initial log.
1153  log->RecordEnvironment(plugins_, profile_dictionary_.get());
1154
1155  // Histograms only get written to current_log_, so setup for the write.
1156  MetricsLogBase* save_log = current_log_;
1157  current_log_ = log;
1158  RecordCurrentHistograms();  // Into current_log_... which is really log.
1159  current_log_ = save_log;
1160
1161  log->CloseLog();
1162  DCHECK(!pending_log());
1163  pending_log_ = log;
1164}
1165
1166// static
1167MetricsService::LogRecallStatus MetricsService::RecallUnsentLogsHelper(
1168    const ListValue& list,
1169    std::vector<std::string>* local_list) {
1170  DCHECK(local_list->empty());
1171  if (list.GetSize() == 0)
1172    return MakeRecallStatusHistogram(LIST_EMPTY);
1173  if (list.GetSize() < 3)
1174    return MakeRecallStatusHistogram(LIST_SIZE_TOO_SMALL);
1175
1176  // The size is stored at the beginning of the list.
1177  int size;
1178  bool valid = (*list.begin())->GetAsInteger(&size);
1179  if (!valid)
1180    return MakeRecallStatusHistogram(LIST_SIZE_MISSING);
1181
1182  // Account for checksum and size included in the list.
1183  if (static_cast<unsigned int>(size) !=
1184      list.GetSize() - kChecksumEntryCount)
1185    return MakeRecallStatusHistogram(LIST_SIZE_CORRUPTION);
1186
1187  MD5Context ctx;
1188  MD5Init(&ctx);
1189  std::string encoded_log;
1190  std::string decoded_log;
1191  for (ListValue::const_iterator it = list.begin() + 1;
1192       it != list.end() - 1; ++it) {  // Last element is the checksum.
1193    valid = (*it)->GetAsString(&encoded_log);
1194    if (!valid) {
1195      local_list->clear();
1196      return MakeRecallStatusHistogram(LOG_STRING_CORRUPTION);
1197    }
1198
1199    MD5Update(&ctx, encoded_log.data(), encoded_log.length());
1200
1201    if (!base::Base64Decode(encoded_log, &decoded_log)) {
1202      local_list->clear();
1203      return MakeRecallStatusHistogram(DECODE_FAIL);
1204    }
1205    local_list->push_back(decoded_log);
1206  }
1207
1208  // Verify checksum.
1209  MD5Digest digest;
1210  MD5Final(&digest, &ctx);
1211  std::string recovered_md5;
1212  // We store the hash at the end of the list.
1213  valid = (*(list.end() - 1))->GetAsString(&recovered_md5);
1214  if (!valid) {
1215    local_list->clear();
1216    return MakeRecallStatusHistogram(CHECKSUM_STRING_CORRUPTION);
1217  }
1218  if (recovered_md5 != MD5DigestToBase16(digest)) {
1219    local_list->clear();
1220    return MakeRecallStatusHistogram(CHECKSUM_CORRUPTION);
1221  }
1222  return MakeRecallStatusHistogram(RECALL_SUCCESS);
1223}
1224void MetricsService::RecallUnsentLogs() {
1225  PrefService* local_state = g_browser_process->local_state();
1226  DCHECK(local_state);
1227
1228  const ListValue* unsent_initial_logs = local_state->GetList(
1229      prefs::kMetricsInitialLogs);
1230  RecallUnsentLogsHelper(*unsent_initial_logs, &unsent_initial_logs_);
1231
1232  const ListValue* unsent_ongoing_logs = local_state->GetList(
1233      prefs::kMetricsOngoingLogs);
1234  RecallUnsentLogsHelper(*unsent_ongoing_logs, &unsent_ongoing_logs_);
1235}
1236
1237// static
1238void MetricsService::StoreUnsentLogsHelper(
1239    const std::vector<std::string>& local_list,
1240    const size_t kMaxLocalListSize,
1241    ListValue* list) {
1242  list->Clear();
1243  size_t start = 0;
1244  if (local_list.size() > kMaxLocalListSize)
1245    start = local_list.size() - kMaxLocalListSize;
1246  DCHECK(start <= local_list.size());
1247  if (local_list.size() == start)
1248    return;
1249
1250  // Store size at the beginning of the list.
1251  list->Append(Value::CreateIntegerValue(local_list.size() - start));
1252
1253  MD5Context ctx;
1254  MD5Init(&ctx);
1255  std::string encoded_log;
1256  for (std::vector<std::string>::const_iterator it = local_list.begin() + start;
1257       it != local_list.end(); ++it) {
1258    // We encode the compressed log as Value::CreateStringValue() expects to
1259    // take a valid UTF8 string.
1260    if (!base::Base64Encode(*it, &encoded_log)) {
1261      MakeStoreStatusHistogram(ENCODE_FAIL);
1262      list->Clear();
1263      return;
1264    }
1265    MD5Update(&ctx, encoded_log.data(), encoded_log.length());
1266    list->Append(Value::CreateStringValue(encoded_log));
1267  }
1268
1269  // Append hash to the end of the list.
1270  MD5Digest digest;
1271  MD5Final(&digest, &ctx);
1272  list->Append(Value::CreateStringValue(MD5DigestToBase16(digest)));
1273  DCHECK(list->GetSize() >= 3);  // Minimum of 3 elements (size, data, hash).
1274  MakeStoreStatusHistogram(STORE_SUCCESS);
1275}
1276
1277void MetricsService::StoreUnsentLogs() {
1278  if (state_ < INITIAL_LOG_READY)
1279    return;  // We never Recalled the prior unsent logs.
1280
1281  PrefService* local_state = g_browser_process->local_state();
1282  DCHECK(local_state);
1283
1284  {
1285    ListPrefUpdate update(local_state, prefs::kMetricsInitialLogs);
1286    ListValue* unsent_initial_logs = update.Get();
1287    StoreUnsentLogsHelper(unsent_initial_logs_, kMaxInitialLogsPersisted,
1288                          unsent_initial_logs);
1289  }
1290
1291  {
1292    ListPrefUpdate update(local_state, prefs::kMetricsOngoingLogs);
1293    ListValue* unsent_ongoing_logs = update.Get();
1294    StoreUnsentLogsHelper(unsent_ongoing_logs_, kMaxOngoingLogsPersisted,
1295                          unsent_ongoing_logs);
1296  }
1297}
1298
1299void MetricsService::PreparePendingLogText() {
1300  DCHECK(pending_log());
1301  if (!compressed_log_.empty())
1302    return;
1303  int text_size = pending_log_->GetEncodedLogSize();
1304
1305  std::string pending_log_text;
1306  // Leave room for the NULL terminator.
1307  pending_log_->GetEncodedLog(WriteInto(&pending_log_text, text_size + 1),
1308                              text_size);
1309
1310  if (Bzip2Compress(pending_log_text, &compressed_log_)) {
1311    // Allow security conscious users to see all metrics logs that we send.
1312    VLOG(1) << "COMPRESSED FOLLOWING METRICS LOG: " << pending_log_text;
1313  } else {
1314    LOG(DFATAL) << "Failed to compress log for transmission.";
1315    // We can't discard the logs as other caller functions expect that
1316    // |compressed_log_| not be empty. We can detect this failure at the server
1317    // after we transmit.
1318    compressed_log_ = "Unable to compress!";
1319    MakeStoreStatusHistogram(COMPRESS_FAIL);
1320    return;
1321  }
1322}
1323
1324void MetricsService::PrepareFetchWithPendingLog() {
1325  DCHECK(pending_log());
1326  DCHECK(!current_fetch_.get());
1327  PreparePendingLogText();
1328  DCHECK(!compressed_log_.empty());
1329
1330  current_fetch_.reset(new URLFetcher(GURL(WideToUTF16(server_url_)),
1331                                      URLFetcher::POST,
1332                                      this));
1333  current_fetch_->set_request_context(Profile::GetDefaultRequestContext());
1334  current_fetch_->set_upload_data(kMetricsType, compressed_log_);
1335}
1336
1337static const char* StatusToString(const net::URLRequestStatus& status) {
1338  switch (status.status()) {
1339    case net::URLRequestStatus::SUCCESS:
1340      return "SUCCESS";
1341
1342    case net::URLRequestStatus::IO_PENDING:
1343      return "IO_PENDING";
1344
1345    case net::URLRequestStatus::HANDLED_EXTERNALLY:
1346      return "HANDLED_EXTERNALLY";
1347
1348    case net::URLRequestStatus::CANCELED:
1349      return "CANCELED";
1350
1351    case net::URLRequestStatus::FAILED:
1352      return "FAILED";
1353
1354    default:
1355      NOTREACHED();
1356      return "Unknown";
1357  }
1358}
1359
1360void MetricsService::OnURLFetchComplete(const URLFetcher* source,
1361                                        const GURL& url,
1362                                        const net::URLRequestStatus& status,
1363                                        int response_code,
1364                                        const ResponseCookies& cookies,
1365                                        const std::string& data) {
1366  DCHECK(timer_pending_);
1367  timer_pending_ = false;
1368  DCHECK(current_fetch_.get());
1369  current_fetch_.reset(NULL);  // We're not allowed to re-use it.
1370
1371  // Confirm send so that we can move on.
1372  VLOG(1) << "METRICS RESPONSE CODE: " << response_code
1373          << " status=" << StatusToString(status);
1374
1375  // Provide boolean for error recovery (allow us to ignore response_code).
1376  bool discard_log = false;
1377
1378  if (response_code != 200 &&
1379      (compressed_log_.length() >
1380          static_cast<size_t>(kUploadLogAvoidRetransmitSize))) {
1381    UMA_HISTOGRAM_COUNTS("UMA.Large Rejected Log was Discarded",
1382                         static_cast<int>(compressed_log_.length()));
1383    discard_log = true;
1384  } else if (response_code == 400) {
1385    // Bad syntax.  Retransmission won't work.
1386    UMA_HISTOGRAM_COUNTS("UMA.Unacceptable_Log_Discarded", state_);
1387    discard_log = true;
1388  }
1389
1390  if (response_code != 200 && !discard_log) {
1391    VLOG(1) << "METRICS: transmission attempt returned a failure code: "
1392            << response_code << ". Verify network connectivity";
1393    HandleBadResponseCode();
1394  } else {  // Successful receipt (or we are discarding log).
1395    VLOG(1) << "METRICS RESPONSE DATA: " << data;
1396    switch (state_) {
1397      case INITIAL_LOG_READY:
1398        state_ = SEND_OLD_INITIAL_LOGS;
1399        break;
1400
1401      case SEND_OLD_INITIAL_LOGS:
1402        DCHECK(!unsent_initial_logs_.empty());
1403        unsent_initial_logs_.pop_back();
1404        StoreUnsentLogs();
1405        break;
1406
1407      case SENDING_OLD_LOGS:
1408        DCHECK(!unsent_ongoing_logs_.empty());
1409        unsent_ongoing_logs_.pop_back();
1410        StoreUnsentLogs();
1411        break;
1412
1413      case SENDING_CURRENT_LOGS:
1414        break;
1415
1416      default:
1417        NOTREACHED();
1418        break;
1419    }
1420
1421    DiscardPendingLog();
1422    // Since we sent a log, make sure our in-memory state is recorded to disk.
1423    PrefService* local_state = g_browser_process->local_state();
1424    DCHECK(local_state);
1425    if (local_state)
1426      local_state->ScheduleSavePersistentPrefs();
1427
1428    // Override usual interlog delay if there are unsent logs to transmit,
1429    // otherwise reset back to default.
1430    if (unsent_logs()) {
1431      DCHECK(state_ < SENDING_CURRENT_LOGS);
1432      interlog_duration_ = TimeDelta::FromSeconds(kUnsentLogDelay);
1433    } else {
1434      interlog_duration_ = TimeDelta::FromSeconds(kMinSecondsPerLog);
1435    }
1436  }
1437
1438  StartLogTransmissionTimer();
1439}
1440
1441void MetricsService::HandleBadResponseCode() {
1442  VLOG(1) << "Verify your metrics logs are formatted correctly.  Verify server "
1443             "is active at " << server_url_;
1444  if (!pending_log()) {
1445    VLOG(1) << "METRICS: Recorder shutdown during log transmission.";
1446  } else {
1447    // Send progressively less frequently.
1448    DCHECK(kBackoff > 1.0);
1449    interlog_duration_ = TimeDelta::FromMicroseconds(
1450        static_cast<int64>(kBackoff * interlog_duration_.InMicroseconds()));
1451
1452    if (kMaxBackoff * TimeDelta::FromSeconds(kMinSecondsPerLog) <
1453        interlog_duration_) {
1454      interlog_duration_ = kMaxBackoff *
1455          TimeDelta::FromSeconds(kMinSecondsPerLog);
1456    }
1457
1458    VLOG(1) << "METRICS: transmission retry being scheduled in "
1459            << interlog_duration_.InSeconds() << " seconds for "
1460            << compressed_log_;
1461  }
1462}
1463
1464void MetricsService::LogWindowChange(NotificationType type,
1465                                     const NotificationSource& source,
1466                                     const NotificationDetails& details) {
1467  int controller_id = -1;
1468  uintptr_t window_or_tab = source.map_key();
1469  MetricsLog::WindowEventType window_type;
1470
1471  // Note: since we stop all logging when a single OTR session is active, it is
1472  // possible that we start getting notifications about a window that we don't
1473  // know about.
1474  if (window_map_.find(window_or_tab) == window_map_.end()) {
1475    controller_id = next_window_id_++;
1476    window_map_[window_or_tab] = controller_id;
1477  } else {
1478    controller_id = window_map_[window_or_tab];
1479  }
1480  DCHECK_NE(controller_id, -1);
1481
1482  switch (type.value) {
1483    case NotificationType::TAB_PARENTED:
1484    case NotificationType::BROWSER_OPENED:
1485      window_type = MetricsLog::WINDOW_CREATE;
1486      break;
1487
1488    case NotificationType::TAB_CLOSING:
1489    case NotificationType::BROWSER_CLOSED:
1490      window_map_.erase(window_map_.find(window_or_tab));
1491      window_type = MetricsLog::WINDOW_DESTROY;
1492      break;
1493
1494    default:
1495      NOTREACHED();
1496      return;
1497  }
1498
1499  // TODO(brettw) we should have some kind of ID for the parent.
1500  current_log_->RecordWindowEvent(window_type, controller_id, 0);
1501}
1502
1503void MetricsService::LogLoadComplete(NotificationType type,
1504                                     const NotificationSource& source,
1505                                     const NotificationDetails& details) {
1506  if (details == NotificationService::NoDetails())
1507    return;
1508
1509  // TODO(jar): There is a bug causing this to be called too many times, and
1510  // the log overflows.  For now, we won't record these events.
1511  UMA_HISTOGRAM_COUNTS("UMA.LogLoadComplete called", 1);
1512  return;
1513
1514  const Details<LoadNotificationDetails> load_details(details);
1515  int controller_id = window_map_[details.map_key()];
1516  current_log_->RecordLoadEvent(controller_id,
1517                                load_details->url(),
1518                                load_details->origin(),
1519                                load_details->session_index(),
1520                                load_details->load_time());
1521}
1522
1523void MetricsService::IncrementPrefValue(const char* path) {
1524  PrefService* pref = g_browser_process->local_state();
1525  DCHECK(pref);
1526  int value = pref->GetInteger(path);
1527  pref->SetInteger(path, value + 1);
1528}
1529
1530void MetricsService::IncrementLongPrefsValue(const char* path) {
1531  PrefService* pref = g_browser_process->local_state();
1532  DCHECK(pref);
1533  int64 value = pref->GetInt64(path);
1534  pref->SetInt64(path, value + 1);
1535}
1536
1537void MetricsService::LogLoadStarted() {
1538  IncrementPrefValue(prefs::kStabilityPageLoadCount);
1539  IncrementLongPrefsValue(prefs::kUninstallMetricsPageLoadCount);
1540  // We need to save the prefs, as page load count is a critical stat, and it
1541  // might be lost due to a crash :-(.
1542}
1543
1544void MetricsService::LogRendererCrash() {
1545  IncrementPrefValue(prefs::kStabilityRendererCrashCount);
1546}
1547
1548void MetricsService::LogExtensionRendererCrash() {
1549  IncrementPrefValue(prefs::kStabilityExtensionRendererCrashCount);
1550}
1551
1552void MetricsService::LogRendererHang() {
1553  IncrementPrefValue(prefs::kStabilityRendererHangCount);
1554}
1555
1556void MetricsService::LogCleanShutdown() {
1557  RecordBooleanPrefValue(prefs::kStabilityExitedCleanly, true);
1558}
1559
1560#if defined(OS_CHROMEOS)
1561void MetricsService::LogChromeOSCrash(const std::string &crash_type) {
1562  if (crash_type == "user")
1563    IncrementPrefValue(prefs::kStabilityOtherUserCrashCount);
1564  else if (crash_type == "kernel")
1565    IncrementPrefValue(prefs::kStabilityKernelCrashCount);
1566  else if (crash_type == "uncleanshutdown")
1567    IncrementPrefValue(prefs::kStabilitySystemUncleanShutdownCount);
1568  else
1569    NOTREACHED() << "Unexpected Chrome OS crash type " << crash_type;
1570  // Wake up metrics logs sending if necessary now that new
1571  // log data is available.
1572  HandleIdleSinceLastTransmission(false);
1573}
1574#endif  // OS_CHROMEOS
1575
1576void MetricsService::LogChildProcessChange(
1577    NotificationType type,
1578    const NotificationSource& source,
1579    const NotificationDetails& details) {
1580  Details<ChildProcessInfo> child_details(details);
1581  const std::wstring& child_name = child_details->name();
1582
1583  if (child_process_stats_buffer_.find(child_name) ==
1584      child_process_stats_buffer_.end()) {
1585    child_process_stats_buffer_[child_name] =
1586        ChildProcessStats(child_details->type());
1587  }
1588
1589  ChildProcessStats& stats = child_process_stats_buffer_[child_name];
1590  switch (type.value) {
1591    case NotificationType::CHILD_PROCESS_HOST_CONNECTED:
1592      stats.process_launches++;
1593      break;
1594
1595    case NotificationType::CHILD_INSTANCE_CREATED:
1596      stats.instances++;
1597      break;
1598
1599    case NotificationType::CHILD_PROCESS_CRASHED:
1600      stats.process_crashes++;
1601      // Exclude plugin crashes from the count below because we report them via
1602      // a separate UMA metric.
1603      if (child_details->type() != ChildProcessInfo::PLUGIN_PROCESS) {
1604        IncrementPrefValue(prefs::kStabilityChildProcessCrashCount);
1605      }
1606      break;
1607
1608    default:
1609      NOTREACHED() << "Unexpected notification type " << type.value;
1610      return;
1611  }
1612}
1613
1614// Recursively counts the number of bookmarks and folders in node.
1615static void CountBookmarks(const BookmarkNode* node,
1616                           int* bookmarks,
1617                           int* folders) {
1618  if (node->type() == BookmarkNode::URL)
1619    (*bookmarks)++;
1620  else
1621    (*folders)++;
1622  for (int i = 0; i < node->child_count(); ++i)
1623    CountBookmarks(node->GetChild(i), bookmarks, folders);
1624}
1625
1626void MetricsService::LogBookmarks(const BookmarkNode* node,
1627                                  const char* num_bookmarks_key,
1628                                  const char* num_folders_key) {
1629  DCHECK(node);
1630  int num_bookmarks = 0;
1631  int num_folders = 0;
1632  CountBookmarks(node, &num_bookmarks, &num_folders);
1633  num_folders--;  // Don't include the root folder in the count.
1634
1635  PrefService* pref = g_browser_process->local_state();
1636  DCHECK(pref);
1637  pref->SetInteger(num_bookmarks_key, num_bookmarks);
1638  pref->SetInteger(num_folders_key, num_folders);
1639}
1640
1641void MetricsService::LogBookmarks(BookmarkModel* model) {
1642  DCHECK(model);
1643  LogBookmarks(model->GetBookmarkBarNode(),
1644               prefs::kNumBookmarksOnBookmarkBar,
1645               prefs::kNumFoldersOnBookmarkBar);
1646  LogBookmarks(model->other_node(),
1647               prefs::kNumBookmarksInOtherBookmarkFolder,
1648               prefs::kNumFoldersInOtherBookmarkFolder);
1649  ScheduleNextStateSave();
1650}
1651
1652void MetricsService::LogKeywords(const TemplateURLModel* url_model) {
1653  DCHECK(url_model);
1654
1655  PrefService* pref = g_browser_process->local_state();
1656  DCHECK(pref);
1657  pref->SetInteger(prefs::kNumKeywords,
1658                   static_cast<int>(url_model->GetTemplateURLs().size()));
1659  ScheduleNextStateSave();
1660}
1661
1662void MetricsService::RecordPluginChanges(PrefService* pref) {
1663  ListPrefUpdate update(pref, prefs::kStabilityPluginStats);
1664  ListValue* plugins = update.Get();
1665  DCHECK(plugins);
1666
1667  for (ListValue::iterator value_iter = plugins->begin();
1668       value_iter != plugins->end(); ++value_iter) {
1669    if (!(*value_iter)->IsType(Value::TYPE_DICTIONARY)) {
1670      NOTREACHED();
1671      continue;
1672    }
1673
1674    DictionaryValue* plugin_dict = static_cast<DictionaryValue*>(*value_iter);
1675    std::string plugin_name;
1676    plugin_dict->GetString(prefs::kStabilityPluginName, &plugin_name);
1677    if (plugin_name.empty()) {
1678      NOTREACHED();
1679      continue;
1680    }
1681
1682    // TODO(viettrungluu): remove conversions
1683    if (child_process_stats_buffer_.find(UTF8ToWide(plugin_name)) ==
1684        child_process_stats_buffer_.end())
1685      continue;
1686
1687    ChildProcessStats stats =
1688        child_process_stats_buffer_[UTF8ToWide(plugin_name)];
1689    if (stats.process_launches) {
1690      int launches = 0;
1691      plugin_dict->GetInteger(prefs::kStabilityPluginLaunches, &launches);
1692      launches += stats.process_launches;
1693      plugin_dict->SetInteger(prefs::kStabilityPluginLaunches, launches);
1694    }
1695    if (stats.process_crashes) {
1696      int crashes = 0;
1697      plugin_dict->GetInteger(prefs::kStabilityPluginCrashes, &crashes);
1698      crashes += stats.process_crashes;
1699      plugin_dict->SetInteger(prefs::kStabilityPluginCrashes, crashes);
1700    }
1701    if (stats.instances) {
1702      int instances = 0;
1703      plugin_dict->GetInteger(prefs::kStabilityPluginInstances, &instances);
1704      instances += stats.instances;
1705      plugin_dict->SetInteger(prefs::kStabilityPluginInstances, instances);
1706    }
1707
1708    child_process_stats_buffer_.erase(UTF8ToWide(plugin_name));
1709  }
1710
1711  // Now go through and add dictionaries for plugins that didn't already have
1712  // reports in Local State.
1713  for (std::map<std::wstring, ChildProcessStats>::iterator cache_iter =
1714           child_process_stats_buffer_.begin();
1715       cache_iter != child_process_stats_buffer_.end(); ++cache_iter) {
1716    ChildProcessStats stats = cache_iter->second;
1717
1718    // Insert only plugins information into the plugins list.
1719    if (ChildProcessInfo::PLUGIN_PROCESS != stats.process_type)
1720      continue;
1721
1722    // TODO(viettrungluu): remove conversion
1723    std::string plugin_name = WideToUTF8(cache_iter->first);
1724
1725    DictionaryValue* plugin_dict = new DictionaryValue;
1726
1727    plugin_dict->SetString(prefs::kStabilityPluginName, plugin_name);
1728    plugin_dict->SetInteger(prefs::kStabilityPluginLaunches,
1729                            stats.process_launches);
1730    plugin_dict->SetInteger(prefs::kStabilityPluginCrashes,
1731                            stats.process_crashes);
1732    plugin_dict->SetInteger(prefs::kStabilityPluginInstances,
1733                            stats.instances);
1734    plugins->Append(plugin_dict);
1735  }
1736  child_process_stats_buffer_.clear();
1737}
1738
1739bool MetricsService::CanLogNotification(NotificationType type,
1740                                        const NotificationSource& source,
1741                                        const NotificationDetails& details) {
1742  // We simply don't log anything to UMA if there is a single incognito
1743  // session visible. The problem is that we always notify using the orginal
1744  // profile in order to simplify notification processing.
1745  return !BrowserList::IsOffTheRecordSessionActive();
1746}
1747
1748void MetricsService::RecordBooleanPrefValue(const char* path, bool value) {
1749  DCHECK(IsSingleThreaded());
1750
1751  PrefService* pref = g_browser_process->local_state();
1752  DCHECK(pref);
1753
1754  pref->SetBoolean(path, value);
1755  RecordCurrentState(pref);
1756}
1757
1758void MetricsService::RecordCurrentState(PrefService* pref) {
1759  pref->SetInt64(prefs::kStabilityLastTimestampSec, Time::Now().ToTimeT());
1760
1761  RecordPluginChanges(pref);
1762}
1763
1764static bool IsSingleThreaded() {
1765  static base::PlatformThreadId thread_id = 0;
1766  if (!thread_id)
1767    thread_id = base::PlatformThread::CurrentId();
1768  return base::PlatformThread::CurrentId() == thread_id;
1769}
1770
1771#if defined(OS_CHROMEOS)
1772void MetricsService::StartExternalMetrics() {
1773  external_metrics_ = new chromeos::ExternalMetrics;
1774  external_metrics_->Start();
1775}
1776#endif
1777