metrics_collector.h revision 53ca76f2f31b90a9767a45f0cd076017db436cc0
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef METRICS_METRICS_COLLECTOR_H_ 18#define METRICS_METRICS_COLLECTOR_H_ 19 20#include <stdint.h> 21 22#include <map> 23#include <memory> 24#include <string> 25#include <vector> 26 27#include <base/files/file_path.h> 28#include <base/memory/weak_ptr.h> 29#include <base/time/time.h> 30#include <brillo/binder_watcher.h> 31#include <brillo/daemons/dbus_daemon.h> 32#include <libweaved/command.h> 33#include <libweaved/service.h> 34#include <gtest/gtest_prod.h> // for FRIEND_TEST 35 36#include "collectors/averaged_statistics_collector.h" 37#include "collectors/cpu_usage_collector.h" 38#include "collectors/disk_usage_collector.h" 39#include "metrics/metrics_library.h" 40#include "persistent_integer.h" 41 42using chromeos_metrics::PersistentInteger; 43using std::unique_ptr; 44 45class MetricsCollector : public brillo::DBusDaemon { 46 public: 47 MetricsCollector(); 48 ~MetricsCollector(); 49 50 // Initializes metrics class variables. 51 void Init(bool testing, 52 MetricsLibraryInterface* metrics_lib, 53 const std::string& diskstats_path, 54 const base::FilePath& private_metrics_directory, 55 const base::FilePath& shared_metrics_directory); 56 57 // Initializes DBus and MessageLoop variables before running the MessageLoop. 58 int OnInit() override; 59 60 // Clean up data set up in OnInit before shutting down message loop. 61 void OnShutdown(int* return_code) override; 62 63 // Does all the work. 64 int Run() override; 65 66 // Returns the active time since boot (uptime minus sleep time) in seconds. 67 static double GetActiveTime(); 68 69 // Updates the active use time and logs time between user-space 70 // process crashes. Called via MetricsCollectorServiceTrampoline. 71 void ProcessUserCrash(); 72 73 protected: 74 // Used also by the unit tests. 75 static const char kComprDataSizeName[]; 76 static const char kOrigDataSizeName[]; 77 static const char kZeroPagesName[]; 78 79 private: 80 friend class MetricsCollectorTest; 81 FRIEND_TEST(MetricsCollectorTest, CheckSystemCrash); 82 FRIEND_TEST(MetricsCollectorTest, ComputeEpochNoCurrent); 83 FRIEND_TEST(MetricsCollectorTest, ComputeEpochNoLast); 84 FRIEND_TEST(MetricsCollectorTest, GetHistogramPath); 85 FRIEND_TEST(MetricsCollectorTest, IsNewEpoch); 86 FRIEND_TEST(MetricsCollectorTest, MessageFilter); 87 FRIEND_TEST(MetricsCollectorTest, ProcessKernelCrash); 88 FRIEND_TEST(MetricsCollectorTest, ProcessMeminfo); 89 FRIEND_TEST(MetricsCollectorTest, ProcessMeminfo2); 90 FRIEND_TEST(MetricsCollectorTest, ProcessUncleanShutdown); 91 FRIEND_TEST(MetricsCollectorTest, ProcessUserCrash); 92 FRIEND_TEST(MetricsCollectorTest, ReportCrashesDailyFrequency); 93 FRIEND_TEST(MetricsCollectorTest, ReportKernelCrashInterval); 94 FRIEND_TEST(MetricsCollectorTest, ReportUncleanShutdownInterval); 95 FRIEND_TEST(MetricsCollectorTest, ReportUserCrashInterval); 96 FRIEND_TEST(MetricsCollectorTest, SendSample); 97 FRIEND_TEST(MetricsCollectorTest, SendZramMetrics); 98 99 // Type of scale to use for meminfo histograms. For most of them we use 100 // percent of total RAM, but for some we use absolute numbers, usually in 101 // megabytes, on a log scale from 0 to 4000, and 0 to 8000 for compressed 102 // swap (since it can be larger than total RAM). 103 enum MeminfoOp { 104 kMeminfoOp_HistPercent = 0, 105 kMeminfoOp_HistLog, 106 kMeminfoOp_SwapTotal, 107 kMeminfoOp_SwapFree, 108 }; 109 110 // Record for retrieving and reporting values from /proc/meminfo. 111 struct MeminfoRecord { 112 const char* name; // print name 113 const char* match; // string to match in output of /proc/meminfo 114 MeminfoOp op; // histogram scale selector, or other operator 115 int value; // value from /proc/meminfo 116 }; 117 118 // Enables metrics reporting. 119 void OnEnableMetrics(std::unique_ptr<weaved::Command> command); 120 121 // Disables metrics reporting. 122 void OnDisableMetrics(std::unique_ptr<weaved::Command> command); 123 124 // Updates the weave device state. 125 void UpdateWeaveState(); 126 127 // Updates the active use time and logs time between kernel crashes. 128 void ProcessKernelCrash(); 129 130 // Updates the active use time and logs time between unclean shutdowns. 131 void ProcessUncleanShutdown(); 132 133 // Checks if a kernel crash has been detected and returns true if 134 // so. The method assumes that a kernel crash has happened if 135 // |crash_file| exists. It removes the file immediately if it 136 // exists, so it must not be called more than once. 137 bool CheckSystemCrash(const std::string& crash_file); 138 139 // Sends a regular (exponential) histogram sample to Chrome for 140 // transport to UMA. See MetricsLibrary::SendToUMA in 141 // metrics_library.h for a description of the arguments. 142 void SendSample(const std::string& name, int sample, 143 int min, int max, int nbuckets); 144 145 // Sends a linear histogram sample to Chrome for transport to UMA. See 146 // MetricsLibrary::SendToUMA in metrics_library.h for a description of the 147 // arguments. 148 void SendLinearSample(const std::string& name, int sample, 149 int max, int nbuckets); 150 151 // Sends various cumulative kernel crash-related stats, for instance the 152 // total number of kernel crashes since the last version update. 153 void SendKernelCrashesCumulativeCountStats(); 154 155 // Sends a sample representing the number of seconds of active use 156 // for a 24-hour period and reset |use|. 157 void SendAndResetDailyUseSample(const unique_ptr<PersistentInteger>& use); 158 159 // Sends a sample representing a time interval between two crashes of the 160 // same type and reset |interval|. 161 void SendAndResetCrashIntervalSample( 162 const unique_ptr<PersistentInteger>& interval); 163 164 // Sends a sample representing a frequency of crashes of some type and reset 165 // |frequency|. 166 void SendAndResetCrashFrequencySample( 167 const unique_ptr<PersistentInteger>& frequency); 168 169 // Initializes vm and disk stats reporting. 170 void StatsReporterInit(); 171 172 // Schedules meminfo collection callback. 173 void ScheduleMeminfoCallback(int wait); 174 175 // Reports memory statistics. Reschedules callback on success. 176 void MeminfoCallback(base::TimeDelta wait); 177 178 // Parses content of /proc/meminfo and sends fields of interest to UMA. 179 // Returns false on errors. |meminfo_raw| contains the content of 180 // /proc/meminfo. 181 bool ProcessMeminfo(const std::string& meminfo_raw); 182 183 // Parses meminfo data from |meminfo_raw|. |fields| is a vector containing 184 // the fields of interest. The order of the fields must be the same in which 185 // /proc/meminfo prints them. The result of parsing fields[i] is placed in 186 // fields[i].value. 187 bool FillMeminfo(const std::string& meminfo_raw, 188 std::vector<MeminfoRecord>* fields); 189 190 // Schedule a memory use callback in |interval| seconds. 191 void ScheduleMemuseCallback(double interval); 192 193 // Calls MemuseCallbackWork, and possibly schedules next callback, if enough 194 // active time has passed. Otherwise reschedules itself to simulate active 195 // time callbacks (i.e. wall clock time minus sleep time). 196 void MemuseCallback(); 197 198 // Reads /proc/meminfo and sends total anonymous memory usage to UMA. 199 bool MemuseCallbackWork(); 200 201 // Parses meminfo data and sends it to UMA. 202 bool ProcessMemuse(const std::string& meminfo_raw); 203 204 // Reads the current OS version from /etc/lsb-release and hashes it 205 // to a unsigned 32-bit int. 206 uint32_t GetOsVersionHash(); 207 208 // Updates stats, additionally sending them to UMA if enough time has elapsed 209 // since the last report. 210 void UpdateStats(base::TimeTicks now_ticks, base::Time now_wall_time); 211 212 // Invoked periodically by |update_stats_timeout_id_| to call UpdateStats(). 213 void HandleUpdateStatsTimeout(); 214 215 // Reports zram statistics. 216 bool ReportZram(const base::FilePath& zram_dir); 217 218 // Reads a string from a file and converts it to uint64_t. 219 static bool ReadFileToUint64(const base::FilePath& path, uint64_t* value); 220 221 // Callback invoked when a connection to weaved's service is established 222 // over Binder interface. 223 void OnWeaveServiceConnected(const std::weak_ptr<weaved::Service>& service); 224 225 // VARIABLES 226 227 // Test mode. 228 bool testing_; 229 230 // Publicly readable metrics directory. 231 base::FilePath shared_metrics_directory_; 232 233 // The metrics library handle. 234 MetricsLibraryInterface* metrics_lib_; 235 236 // The last time that UpdateStats() was called. 237 base::TimeTicks last_update_stats_time_; 238 239 // End time of current memuse stat collection interval. 240 double memuse_final_time_; 241 242 // Selects the wait time for the next memory use callback. 243 unsigned int memuse_interval_index_; 244 245 // Used internally by GetIncrementalCpuUse() to return the CPU utilization 246 // between calls. 247 base::TimeDelta latest_cpu_use_microseconds_; 248 249 // Persistent values and accumulators for crash statistics. 250 unique_ptr<PersistentInteger> daily_cycle_; 251 unique_ptr<PersistentInteger> weekly_cycle_; 252 unique_ptr<PersistentInteger> version_cycle_; 253 254 // Active use accumulated in a day. 255 unique_ptr<PersistentInteger> daily_active_use_; 256 // Active use accumulated since the latest version update. 257 unique_ptr<PersistentInteger> version_cumulative_active_use_; 258 259 // The CPU time accumulator. This contains the CPU time, in milliseconds, 260 // used by the system since the most recent OS version update. 261 unique_ptr<PersistentInteger> version_cumulative_cpu_use_; 262 263 unique_ptr<PersistentInteger> user_crash_interval_; 264 unique_ptr<PersistentInteger> kernel_crash_interval_; 265 unique_ptr<PersistentInteger> unclean_shutdown_interval_; 266 267 unique_ptr<PersistentInteger> any_crashes_daily_count_; 268 unique_ptr<PersistentInteger> any_crashes_weekly_count_; 269 unique_ptr<PersistentInteger> user_crashes_daily_count_; 270 unique_ptr<PersistentInteger> user_crashes_weekly_count_; 271 unique_ptr<PersistentInteger> kernel_crashes_daily_count_; 272 unique_ptr<PersistentInteger> kernel_crashes_weekly_count_; 273 unique_ptr<PersistentInteger> kernel_crashes_version_count_; 274 unique_ptr<PersistentInteger> unclean_shutdowns_daily_count_; 275 unique_ptr<PersistentInteger> unclean_shutdowns_weekly_count_; 276 277 unique_ptr<CpuUsageCollector> cpu_usage_collector_; 278 unique_ptr<DiskUsageCollector> disk_usage_collector_; 279 unique_ptr<AveragedStatisticsCollector> averaged_stats_collector_; 280 281 unique_ptr<weaved::Service::Subscription> weave_service_subscription_; 282 std::weak_ptr<weaved::Service> service_; 283 284 base::WeakPtrFactory<MetricsCollector> weak_ptr_factory_{this}; 285}; 286 287#endif // METRICS_METRICS_COLLECTOR_H_ 288