metrics_collector.h revision 53ca76f2f31b90a9767a45f0cd076017db436cc0
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef METRICS_METRICS_COLLECTOR_H_
18#define METRICS_METRICS_COLLECTOR_H_
19
20#include <stdint.h>
21
22#include <map>
23#include <memory>
24#include <string>
25#include <vector>
26
27#include <base/files/file_path.h>
28#include <base/memory/weak_ptr.h>
29#include <base/time/time.h>
30#include <brillo/binder_watcher.h>
31#include <brillo/daemons/dbus_daemon.h>
32#include <libweaved/command.h>
33#include <libweaved/service.h>
34#include <gtest/gtest_prod.h>  // for FRIEND_TEST
35
36#include "collectors/averaged_statistics_collector.h"
37#include "collectors/cpu_usage_collector.h"
38#include "collectors/disk_usage_collector.h"
39#include "metrics/metrics_library.h"
40#include "persistent_integer.h"
41
42using chromeos_metrics::PersistentInteger;
43using std::unique_ptr;
44
45class MetricsCollector : public brillo::DBusDaemon {
46 public:
47  MetricsCollector();
48  ~MetricsCollector();
49
50  // Initializes metrics class variables.
51  void Init(bool testing,
52            MetricsLibraryInterface* metrics_lib,
53            const std::string& diskstats_path,
54            const base::FilePath& private_metrics_directory,
55            const base::FilePath& shared_metrics_directory);
56
57  // Initializes DBus and MessageLoop variables before running the MessageLoop.
58  int OnInit() override;
59
60  // Clean up data set up in OnInit before shutting down message loop.
61  void OnShutdown(int* return_code) override;
62
63  // Does all the work.
64  int Run() override;
65
66  // Returns the active time since boot (uptime minus sleep time) in seconds.
67  static double GetActiveTime();
68
69  // Updates the active use time and logs time between user-space
70  // process crashes.  Called via MetricsCollectorServiceTrampoline.
71  void ProcessUserCrash();
72
73 protected:
74  // Used also by the unit tests.
75  static const char kComprDataSizeName[];
76  static const char kOrigDataSizeName[];
77  static const char kZeroPagesName[];
78
79 private:
80  friend class MetricsCollectorTest;
81  FRIEND_TEST(MetricsCollectorTest, CheckSystemCrash);
82  FRIEND_TEST(MetricsCollectorTest, ComputeEpochNoCurrent);
83  FRIEND_TEST(MetricsCollectorTest, ComputeEpochNoLast);
84  FRIEND_TEST(MetricsCollectorTest, GetHistogramPath);
85  FRIEND_TEST(MetricsCollectorTest, IsNewEpoch);
86  FRIEND_TEST(MetricsCollectorTest, MessageFilter);
87  FRIEND_TEST(MetricsCollectorTest, ProcessKernelCrash);
88  FRIEND_TEST(MetricsCollectorTest, ProcessMeminfo);
89  FRIEND_TEST(MetricsCollectorTest, ProcessMeminfo2);
90  FRIEND_TEST(MetricsCollectorTest, ProcessUncleanShutdown);
91  FRIEND_TEST(MetricsCollectorTest, ProcessUserCrash);
92  FRIEND_TEST(MetricsCollectorTest, ReportCrashesDailyFrequency);
93  FRIEND_TEST(MetricsCollectorTest, ReportKernelCrashInterval);
94  FRIEND_TEST(MetricsCollectorTest, ReportUncleanShutdownInterval);
95  FRIEND_TEST(MetricsCollectorTest, ReportUserCrashInterval);
96  FRIEND_TEST(MetricsCollectorTest, SendSample);
97  FRIEND_TEST(MetricsCollectorTest, SendZramMetrics);
98
99  // Type of scale to use for meminfo histograms.  For most of them we use
100  // percent of total RAM, but for some we use absolute numbers, usually in
101  // megabytes, on a log scale from 0 to 4000, and 0 to 8000 for compressed
102  // swap (since it can be larger than total RAM).
103  enum MeminfoOp {
104    kMeminfoOp_HistPercent = 0,
105    kMeminfoOp_HistLog,
106    kMeminfoOp_SwapTotal,
107    kMeminfoOp_SwapFree,
108  };
109
110  // Record for retrieving and reporting values from /proc/meminfo.
111  struct MeminfoRecord {
112    const char* name;        // print name
113    const char* match;       // string to match in output of /proc/meminfo
114    MeminfoOp op;            // histogram scale selector, or other operator
115    int value;               // value from /proc/meminfo
116  };
117
118  // Enables metrics reporting.
119  void OnEnableMetrics(std::unique_ptr<weaved::Command> command);
120
121  // Disables metrics reporting.
122  void OnDisableMetrics(std::unique_ptr<weaved::Command> command);
123
124  // Updates the weave device state.
125  void UpdateWeaveState();
126
127  // Updates the active use time and logs time between kernel crashes.
128  void ProcessKernelCrash();
129
130  // Updates the active use time and logs time between unclean shutdowns.
131  void ProcessUncleanShutdown();
132
133  // Checks if a kernel crash has been detected and returns true if
134  // so.  The method assumes that a kernel crash has happened if
135  // |crash_file| exists.  It removes the file immediately if it
136  // exists, so it must not be called more than once.
137  bool CheckSystemCrash(const std::string& crash_file);
138
139  // Sends a regular (exponential) histogram sample to Chrome for
140  // transport to UMA. See MetricsLibrary::SendToUMA in
141  // metrics_library.h for a description of the arguments.
142  void SendSample(const std::string& name, int sample,
143                  int min, int max, int nbuckets);
144
145  // Sends a linear histogram sample to Chrome for transport to UMA. See
146  // MetricsLibrary::SendToUMA in metrics_library.h for a description of the
147  // arguments.
148  void SendLinearSample(const std::string& name, int sample,
149                        int max, int nbuckets);
150
151  // Sends various cumulative kernel crash-related stats, for instance the
152  // total number of kernel crashes since the last version update.
153  void SendKernelCrashesCumulativeCountStats();
154
155  // Sends a sample representing the number of seconds of active use
156  // for a 24-hour period and reset |use|.
157  void SendAndResetDailyUseSample(const unique_ptr<PersistentInteger>& use);
158
159  // Sends a sample representing a time interval between two crashes of the
160  // same type and reset |interval|.
161  void SendAndResetCrashIntervalSample(
162      const unique_ptr<PersistentInteger>& interval);
163
164  // Sends a sample representing a frequency of crashes of some type and reset
165  // |frequency|.
166  void SendAndResetCrashFrequencySample(
167      const unique_ptr<PersistentInteger>& frequency);
168
169  // Initializes vm and disk stats reporting.
170  void StatsReporterInit();
171
172  // Schedules meminfo collection callback.
173  void ScheduleMeminfoCallback(int wait);
174
175  // Reports memory statistics.  Reschedules callback on success.
176  void MeminfoCallback(base::TimeDelta wait);
177
178  // Parses content of /proc/meminfo and sends fields of interest to UMA.
179  // Returns false on errors.  |meminfo_raw| contains the content of
180  // /proc/meminfo.
181  bool ProcessMeminfo(const std::string& meminfo_raw);
182
183  // Parses meminfo data from |meminfo_raw|.  |fields| is a vector containing
184  // the fields of interest.  The order of the fields must be the same in which
185  // /proc/meminfo prints them.  The result of parsing fields[i] is placed in
186  // fields[i].value.
187  bool FillMeminfo(const std::string& meminfo_raw,
188                   std::vector<MeminfoRecord>* fields);
189
190  // Schedule a memory use callback in |interval| seconds.
191  void ScheduleMemuseCallback(double interval);
192
193  // Calls MemuseCallbackWork, and possibly schedules next callback, if enough
194  // active time has passed.  Otherwise reschedules itself to simulate active
195  // time callbacks (i.e. wall clock time minus sleep time).
196  void MemuseCallback();
197
198  // Reads /proc/meminfo and sends total anonymous memory usage to UMA.
199  bool MemuseCallbackWork();
200
201  // Parses meminfo data and sends it to UMA.
202  bool ProcessMemuse(const std::string& meminfo_raw);
203
204  // Reads the current OS version from /etc/lsb-release and hashes it
205  // to a unsigned 32-bit int.
206  uint32_t GetOsVersionHash();
207
208  // Updates stats, additionally sending them to UMA if enough time has elapsed
209  // since the last report.
210  void UpdateStats(base::TimeTicks now_ticks, base::Time now_wall_time);
211
212  // Invoked periodically by |update_stats_timeout_id_| to call UpdateStats().
213  void HandleUpdateStatsTimeout();
214
215  // Reports zram statistics.
216  bool ReportZram(const base::FilePath& zram_dir);
217
218  // Reads a string from a file and converts it to uint64_t.
219  static bool ReadFileToUint64(const base::FilePath& path, uint64_t* value);
220
221  // Callback invoked when a connection to weaved's service is established
222  // over Binder interface.
223  void OnWeaveServiceConnected(const std::weak_ptr<weaved::Service>& service);
224
225  // VARIABLES
226
227  // Test mode.
228  bool testing_;
229
230  // Publicly readable metrics directory.
231  base::FilePath shared_metrics_directory_;
232
233  // The metrics library handle.
234  MetricsLibraryInterface* metrics_lib_;
235
236  // The last time that UpdateStats() was called.
237  base::TimeTicks last_update_stats_time_;
238
239  // End time of current memuse stat collection interval.
240  double memuse_final_time_;
241
242  // Selects the wait time for the next memory use callback.
243  unsigned int memuse_interval_index_;
244
245  // Used internally by GetIncrementalCpuUse() to return the CPU utilization
246  // between calls.
247  base::TimeDelta latest_cpu_use_microseconds_;
248
249  // Persistent values and accumulators for crash statistics.
250  unique_ptr<PersistentInteger> daily_cycle_;
251  unique_ptr<PersistentInteger> weekly_cycle_;
252  unique_ptr<PersistentInteger> version_cycle_;
253
254  // Active use accumulated in a day.
255  unique_ptr<PersistentInteger> daily_active_use_;
256  // Active use accumulated since the latest version update.
257  unique_ptr<PersistentInteger> version_cumulative_active_use_;
258
259  // The CPU time accumulator.  This contains the CPU time, in milliseconds,
260  // used by the system since the most recent OS version update.
261  unique_ptr<PersistentInteger> version_cumulative_cpu_use_;
262
263  unique_ptr<PersistentInteger> user_crash_interval_;
264  unique_ptr<PersistentInteger> kernel_crash_interval_;
265  unique_ptr<PersistentInteger> unclean_shutdown_interval_;
266
267  unique_ptr<PersistentInteger> any_crashes_daily_count_;
268  unique_ptr<PersistentInteger> any_crashes_weekly_count_;
269  unique_ptr<PersistentInteger> user_crashes_daily_count_;
270  unique_ptr<PersistentInteger> user_crashes_weekly_count_;
271  unique_ptr<PersistentInteger> kernel_crashes_daily_count_;
272  unique_ptr<PersistentInteger> kernel_crashes_weekly_count_;
273  unique_ptr<PersistentInteger> kernel_crashes_version_count_;
274  unique_ptr<PersistentInteger> unclean_shutdowns_daily_count_;
275  unique_ptr<PersistentInteger> unclean_shutdowns_weekly_count_;
276
277  unique_ptr<CpuUsageCollector> cpu_usage_collector_;
278  unique_ptr<DiskUsageCollector> disk_usage_collector_;
279  unique_ptr<AveragedStatisticsCollector> averaged_stats_collector_;
280
281  unique_ptr<weaved::Service::Subscription> weave_service_subscription_;
282  std::weak_ptr<weaved::Service> service_;
283
284  base::WeakPtrFactory<MetricsCollector> weak_ptr_factory_{this};
285};
286
287#endif  // METRICS_METRICS_COLLECTOR_H_
288