1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef METRICS_METRICS_COLLECTOR_H_
18#define METRICS_METRICS_COLLECTOR_H_
19
20#include <stdint.h>
21
22#include <map>
23#include <memory>
24#include <string>
25#include <vector>
26
27#include <base/files/file_path.h>
28#include <base/memory/weak_ptr.h>
29#include <base/time/time.h>
30#include <brillo/binder_watcher.h>
31#include <brillo/daemons/daemon.h>
32#include <libweaved/command.h>
33#include <libweaved/service.h>
34#include <gtest/gtest_prod.h>  // for FRIEND_TEST
35
36#include "collectors/averaged_statistics_collector.h"
37#include "collectors/cpu_usage_collector.h"
38#include "collectors/disk_usage_collector.h"
39#include "metrics/metrics_library.h"
40#include "persistent_integer.h"
41
42using chromeos_metrics::PersistentInteger;
43using std::unique_ptr;
44
45class MetricsCollector : public brillo::Daemon {
46 public:
47  MetricsCollector();
48  ~MetricsCollector();
49
50  // Initializes metrics class variables.
51  void Init(bool testing,
52            MetricsLibraryInterface* metrics_lib,
53            const std::string& diskstats_path,
54            const base::FilePath& private_metrics_directory,
55            const base::FilePath& shared_metrics_directory);
56
57  // Initializes the daemon.
58  int OnInit() override;
59
60  // Does all the work.
61  int Run() override;
62
63  // Returns the active time since boot (uptime minus sleep time) in seconds.
64  static double GetActiveTime();
65
66  // Updates the active use time and logs time between user-space
67  // process crashes.  Called via MetricsCollectorServiceTrampoline.
68  void ProcessUserCrash();
69
70 protected:
71  // Used also by the unit tests.
72  static const char kComprDataSizeName[];
73  static const char kOrigDataSizeName[];
74  static const char kZeroPagesName[];
75
76 private:
77  friend class MetricsCollectorTest;
78  FRIEND_TEST(MetricsCollectorTest, CheckSystemCrash);
79  FRIEND_TEST(MetricsCollectorTest, ComputeEpochNoCurrent);
80  FRIEND_TEST(MetricsCollectorTest, ComputeEpochNoLast);
81  FRIEND_TEST(MetricsCollectorTest, GetHistogramPath);
82  FRIEND_TEST(MetricsCollectorTest, IsNewEpoch);
83  FRIEND_TEST(MetricsCollectorTest, MessageFilter);
84  FRIEND_TEST(MetricsCollectorTest, ProcessKernelCrash);
85  FRIEND_TEST(MetricsCollectorTest, ProcessMeminfo);
86  FRIEND_TEST(MetricsCollectorTest, ProcessMeminfo2);
87  FRIEND_TEST(MetricsCollectorTest, ProcessUncleanShutdown);
88  FRIEND_TEST(MetricsCollectorTest, ProcessUserCrash);
89  FRIEND_TEST(MetricsCollectorTest, ReportCrashesDailyFrequency);
90  FRIEND_TEST(MetricsCollectorTest, ReportKernelCrashInterval);
91  FRIEND_TEST(MetricsCollectorTest, ReportUncleanShutdownInterval);
92  FRIEND_TEST(MetricsCollectorTest, ReportUserCrashInterval);
93  FRIEND_TEST(MetricsCollectorTest, SendSample);
94  FRIEND_TEST(MetricsCollectorTest, SendZramMetrics);
95
96  // Type of scale to use for meminfo histograms.  For most of them we use
97  // percent of total RAM, but for some we use absolute numbers, usually in
98  // megabytes, on a log scale from 0 to 4000, and 0 to 8000 for compressed
99  // swap (since it can be larger than total RAM).
100  enum MeminfoOp {
101    kMeminfoOp_HistPercent = 0,
102    kMeminfoOp_HistLog,
103    kMeminfoOp_SwapTotal,
104    kMeminfoOp_SwapFree,
105  };
106
107  // Record for retrieving and reporting values from /proc/meminfo.
108  struct MeminfoRecord {
109    const char* name;        // print name
110    const char* match;       // string to match in output of /proc/meminfo
111    MeminfoOp op;            // histogram scale selector, or other operator
112    int value;               // value from /proc/meminfo
113  };
114
115  // Enables metrics reporting.
116  void OnEnableMetrics(std::unique_ptr<weaved::Command> command);
117
118  // Disables metrics reporting.
119  void OnDisableMetrics(std::unique_ptr<weaved::Command> command);
120
121  // Updates the weave device state.
122  void UpdateWeaveState();
123
124  // Updates the active use time and logs time between kernel crashes.
125  void ProcessKernelCrash();
126
127  // Updates the active use time and logs time between unclean shutdowns.
128  void ProcessUncleanShutdown();
129
130  // Checks if a kernel crash has been detected and returns true if
131  // so.  The method assumes that a kernel crash has happened if
132  // |crash_file| exists.  It removes the file immediately if it
133  // exists, so it must not be called more than once.
134  bool CheckSystemCrash(const std::string& crash_file);
135
136  // Sends a regular (exponential) histogram sample to Chrome for
137  // transport to UMA. See MetricsLibrary::SendToUMA in
138  // metrics_library.h for a description of the arguments.
139  void SendSample(const std::string& name, int sample,
140                  int min, int max, int nbuckets);
141
142  // Sends a linear histogram sample to Chrome for transport to UMA. See
143  // MetricsLibrary::SendToUMA in metrics_library.h for a description of the
144  // arguments.
145  void SendLinearSample(const std::string& name, int sample,
146                        int max, int nbuckets);
147
148  // Sends various cumulative kernel crash-related stats, for instance the
149  // total number of kernel crashes since the last version update.
150  void SendKernelCrashesCumulativeCountStats();
151
152  // Sends a sample representing the number of seconds of active use
153  // for a 24-hour period and reset |use|.
154  void SendAndResetDailyUseSample(const unique_ptr<PersistentInteger>& use);
155
156  // Sends a sample representing a time interval between two crashes of the
157  // same type and reset |interval|.
158  void SendAndResetCrashIntervalSample(
159      const unique_ptr<PersistentInteger>& interval);
160
161  // Sends a sample representing a frequency of crashes of some type and reset
162  // |frequency|.
163  void SendAndResetCrashFrequencySample(
164      const unique_ptr<PersistentInteger>& frequency);
165
166  // Initializes vm and disk stats reporting.
167  void StatsReporterInit();
168
169  // Schedules meminfo collection callback.
170  void ScheduleMeminfoCallback(int wait);
171
172  // Reports memory statistics.  Reschedules callback on success.
173  void MeminfoCallback(base::TimeDelta wait);
174
175  // Parses content of /proc/meminfo and sends fields of interest to UMA.
176  // Returns false on errors.  |meminfo_raw| contains the content of
177  // /proc/meminfo.
178  bool ProcessMeminfo(const std::string& meminfo_raw);
179
180  // Parses meminfo data from |meminfo_raw|.  |fields| is a vector containing
181  // the fields of interest.  The order of the fields must be the same in which
182  // /proc/meminfo prints them.  The result of parsing fields[i] is placed in
183  // fields[i].value.
184  bool FillMeminfo(const std::string& meminfo_raw,
185                   std::vector<MeminfoRecord>* fields);
186
187  // Schedule a memory use callback in |interval| seconds.
188  void ScheduleMemuseCallback(double interval);
189
190  // Calls MemuseCallbackWork, and possibly schedules next callback, if enough
191  // active time has passed.  Otherwise reschedules itself to simulate active
192  // time callbacks (i.e. wall clock time minus sleep time).
193  void MemuseCallback();
194
195  // Reads /proc/meminfo and sends total anonymous memory usage to UMA.
196  bool MemuseCallbackWork();
197
198  // Parses meminfo data and sends it to UMA.
199  bool ProcessMemuse(const std::string& meminfo_raw);
200
201  // Reads the current OS version from /etc/lsb-release and hashes it
202  // to a unsigned 32-bit int.
203  uint32_t GetOsVersionHash();
204
205  // Updates stats, additionally sending them to UMA if enough time has elapsed
206  // since the last report.
207  void UpdateStats(base::TimeTicks now_ticks, base::Time now_wall_time);
208
209  // Invoked periodically by |update_stats_timeout_id_| to call UpdateStats().
210  void HandleUpdateStatsTimeout();
211
212  // Reports zram statistics.
213  bool ReportZram(const base::FilePath& zram_dir);
214
215  // Reads a string from a file and converts it to uint64_t.
216  static bool ReadFileToUint64(const base::FilePath& path, uint64_t* value);
217
218  // Callback invoked when a connection to weaved's service is established
219  // over Binder interface.
220  void OnWeaveServiceConnected(const std::weak_ptr<weaved::Service>& service);
221
222  // VARIABLES
223
224  // Test mode.
225  bool testing_;
226
227  // Publicly readable metrics directory.
228  base::FilePath shared_metrics_directory_;
229
230  // The metrics library handle.
231  MetricsLibraryInterface* metrics_lib_;
232
233  // The last time that UpdateStats() was called.
234  base::TimeTicks last_update_stats_time_;
235
236  // End time of current memuse stat collection interval.
237  double memuse_final_time_;
238
239  // Selects the wait time for the next memory use callback.
240  unsigned int memuse_interval_index_;
241
242  // Used internally by GetIncrementalCpuUse() to return the CPU utilization
243  // between calls.
244  base::TimeDelta latest_cpu_use_microseconds_;
245
246  // Persistent values and accumulators for crash statistics.
247  unique_ptr<PersistentInteger> daily_cycle_;
248  unique_ptr<PersistentInteger> weekly_cycle_;
249  unique_ptr<PersistentInteger> version_cycle_;
250
251  // Active use accumulated in a day.
252  unique_ptr<PersistentInteger> daily_active_use_;
253  // Active use accumulated since the latest version update.
254  unique_ptr<PersistentInteger> version_cumulative_active_use_;
255
256  // The CPU time accumulator.  This contains the CPU time, in milliseconds,
257  // used by the system since the most recent OS version update.
258  unique_ptr<PersistentInteger> version_cumulative_cpu_use_;
259
260  unique_ptr<PersistentInteger> user_crash_interval_;
261  unique_ptr<PersistentInteger> kernel_crash_interval_;
262  unique_ptr<PersistentInteger> unclean_shutdown_interval_;
263
264  unique_ptr<PersistentInteger> any_crashes_daily_count_;
265  unique_ptr<PersistentInteger> any_crashes_weekly_count_;
266  unique_ptr<PersistentInteger> user_crashes_daily_count_;
267  unique_ptr<PersistentInteger> user_crashes_weekly_count_;
268  unique_ptr<PersistentInteger> kernel_crashes_daily_count_;
269  unique_ptr<PersistentInteger> kernel_crashes_weekly_count_;
270  unique_ptr<PersistentInteger> kernel_crashes_version_count_;
271  unique_ptr<PersistentInteger> unclean_shutdowns_daily_count_;
272  unique_ptr<PersistentInteger> unclean_shutdowns_weekly_count_;
273
274  unique_ptr<CpuUsageCollector> cpu_usage_collector_;
275  unique_ptr<DiskUsageCollector> disk_usage_collector_;
276  unique_ptr<AveragedStatisticsCollector> averaged_stats_collector_;
277
278  unique_ptr<weaved::Service::Subscription> weave_service_subscription_;
279  std::weak_ptr<weaved::Service> service_;
280
281  base::WeakPtrFactory<MetricsCollector> weak_ptr_factory_{this};
282};
283
284#endif  // METRICS_METRICS_COLLECTOR_H_
285