StatsLogProcessor.cpp revision 9e6dbbdadf8de3bcc58a6c26784219217cd35b53
1/* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define DEBUG false // STOPSHIP if true 18#include "Log.h" 19#include "statslog.h" 20 21#include <android-base/file.h> 22#include <dirent.h> 23#include "StatsLogProcessor.h" 24#include "stats_log_util.h" 25#include "android-base/stringprintf.h" 26#include "guardrail/StatsdStats.h" 27#include "metrics/CountMetricProducer.h" 28#include "external/StatsPullerManager.h" 29#include "stats_util.h" 30#include "storage/StorageManager.h" 31 32#include <log/log_event_list.h> 33#include <utils/Errors.h> 34#include <utils/SystemClock.h> 35 36using namespace android; 37using android::base::StringPrintf; 38using android::util::FIELD_COUNT_REPEATED; 39using android::util::FIELD_TYPE_BOOL; 40using android::util::FIELD_TYPE_FLOAT; 41using android::util::FIELD_TYPE_INT32; 42using android::util::FIELD_TYPE_INT64; 43using android::util::FIELD_TYPE_MESSAGE; 44using android::util::FIELD_TYPE_STRING; 45using android::util::ProtoOutputStream; 46using std::make_unique; 47using std::unique_ptr; 48using std::vector; 49 50namespace android { 51namespace os { 52namespace statsd { 53 54// for ConfigMetricsReportList 55const int FIELD_ID_CONFIG_KEY = 1; 56const int FIELD_ID_REPORTS = 2; 57// for ConfigKey 58const int FIELD_ID_UID = 1; 59const int FIELD_ID_ID = 2; 60// for ConfigMetricsReport 61// const int FIELD_ID_METRICS = 1; // written in MetricsManager.cpp 62const int FIELD_ID_UID_MAP = 2; 63const int FIELD_ID_LAST_REPORT_ELAPSED_NANOS = 3; 64const int FIELD_ID_CURRENT_REPORT_ELAPSED_NANOS = 4; 65const int FIELD_ID_LAST_REPORT_WALL_CLOCK_NANOS = 5; 66const int FIELD_ID_CURRENT_REPORT_WALL_CLOCK_NANOS = 6; 67const int FIELD_ID_DUMP_REPORT_REASON = 8; 68const int FIELD_ID_STRINGS = 9; 69 70#define NS_PER_HOUR 3600 * NS_PER_SEC 71 72#define STATS_DATA_DIR "/data/misc/stats-data" 73 74StatsLogProcessor::StatsLogProcessor(const sp<UidMap>& uidMap, 75 const sp<AlarmMonitor>& anomalyAlarmMonitor, 76 const sp<AlarmMonitor>& periodicAlarmMonitor, 77 const int64_t timeBaseNs, 78 const std::function<bool(const ConfigKey&)>& sendBroadcast) 79 : mUidMap(uidMap), 80 mAnomalyAlarmMonitor(anomalyAlarmMonitor), 81 mPeriodicAlarmMonitor(periodicAlarmMonitor), 82 mSendBroadcast(sendBroadcast), 83 mTimeBaseNs(timeBaseNs), 84 mLargestTimestampSeen(0), 85 mLastTimestampSeen(0) { 86} 87 88StatsLogProcessor::~StatsLogProcessor() { 89} 90 91void StatsLogProcessor::onAnomalyAlarmFired( 92 const int64_t& timestampNs, 93 unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>> alarmSet) { 94 std::lock_guard<std::mutex> lock(mMetricsMutex); 95 for (const auto& itr : mMetricsManagers) { 96 itr.second->onAnomalyAlarmFired(timestampNs, alarmSet); 97 } 98} 99void StatsLogProcessor::onPeriodicAlarmFired( 100 const int64_t& timestampNs, 101 unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>> alarmSet) { 102 103 std::lock_guard<std::mutex> lock(mMetricsMutex); 104 for (const auto& itr : mMetricsManagers) { 105 itr.second->onPeriodicAlarmFired(timestampNs, alarmSet); 106 } 107} 108 109void updateUid(Value* value, int hostUid) { 110 int uid = value->int_value; 111 if (uid != hostUid) { 112 value->setInt(hostUid); 113 } 114} 115 116void StatsLogProcessor::mapIsolatedUidToHostUidIfNecessaryLocked(LogEvent* event) const { 117 if (android::util::AtomsInfo::kAtomsWithAttributionChain.find(event->GetTagId()) != 118 android::util::AtomsInfo::kAtomsWithAttributionChain.end()) { 119 for (auto& value : *(event->getMutableValues())) { 120 if (value.mField.getPosAtDepth(0) > kAttributionField) { 121 break; 122 } 123 if (isAttributionUidField(value)) { 124 const int hostUid = mUidMap->getHostUidOrSelf(value.mValue.int_value); 125 updateUid(&value.mValue, hostUid); 126 } 127 } 128 } else { 129 auto it = android::util::AtomsInfo::kAtomsWithUidField.find(event->GetTagId()); 130 if (it != android::util::AtomsInfo::kAtomsWithUidField.end()) { 131 int uidField = it->second; // uidField is the field number in proto, 132 // starting from 1 133 if (uidField > 0 && (int)event->getValues().size() >= uidField && 134 (event->getValues())[uidField - 1].mValue.getType() == INT) { 135 Value& value = (*event->getMutableValues())[uidField - 1].mValue; 136 const int hostUid = mUidMap->getHostUidOrSelf(value.int_value); 137 updateUid(&value, hostUid); 138 } else { 139 ALOGE("Malformed log, uid not found. %s", event->ToString().c_str()); 140 } 141 } 142 } 143} 144 145void StatsLogProcessor::onIsolatedUidChangedEventLocked(const LogEvent& event) { 146 status_t err = NO_ERROR, err2 = NO_ERROR, err3 = NO_ERROR; 147 bool is_create = event.GetBool(3, &err); 148 auto parent_uid = int(event.GetLong(1, &err2)); 149 auto isolated_uid = int(event.GetLong(2, &err3)); 150 if (err == NO_ERROR && err2 == NO_ERROR && err3 == NO_ERROR) { 151 if (is_create) { 152 mUidMap->assignIsolatedUid(isolated_uid, parent_uid); 153 } else { 154 mUidMap->removeIsolatedUid(isolated_uid, parent_uid); 155 } 156 } else { 157 ALOGE("Failed to parse uid in the isolated uid change event."); 158 } 159} 160 161void StatsLogProcessor::OnLogEvent(LogEvent* event) { 162 OnLogEvent(event, false); 163} 164 165void StatsLogProcessor::resetConfigs() { 166 std::lock_guard<std::mutex> lock(mMetricsMutex); 167 resetConfigsLocked(getElapsedRealtimeNs()); 168} 169 170void StatsLogProcessor::resetConfigsLocked(const int64_t timestampNs) { 171 std::vector<ConfigKey> configKeys; 172 for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) { 173 configKeys.push_back(it->first); 174 } 175 resetConfigsLocked(timestampNs, configKeys); 176} 177 178void StatsLogProcessor::OnLogEvent(LogEvent* event, bool reconnected) { 179 std::lock_guard<std::mutex> lock(mMetricsMutex); 180 181#ifdef VERY_VERBOSE_PRINTING 182 if (mPrintAllLogs) { 183 ALOGI("%s", event->ToString().c_str()); 184 } 185#endif 186 const int64_t currentTimestampNs = event->GetElapsedTimestampNs(); 187 188 if (reconnected && mLastTimestampSeen != 0) { 189 // LogReader tells us the connection has just been reset. Now we need 190 // to enter reconnection state to find the last CP. 191 mInReconnection = true; 192 } 193 194 if (mInReconnection) { 195 // We see the checkpoint 196 if (currentTimestampNs == mLastTimestampSeen) { 197 mInReconnection = false; 198 // Found the CP. ignore this event, and we will start to read from next event. 199 return; 200 } 201 if (currentTimestampNs > mLargestTimestampSeen) { 202 // We see a new log but CP has not been found yet. Give up now. 203 mLogLossCount++; 204 mInReconnection = false; 205 StatsdStats::getInstance().noteLogLost(currentTimestampNs); 206 // Persist the data before we reset. Do we want this? 207 WriteDataToDiskLocked(CONFIG_RESET); 208 // We see fresher event before we see the checkpoint. We might have lost data. 209 // The best we can do is to reset. 210 resetConfigsLocked(currentTimestampNs); 211 } else { 212 // Still in search of the CP. Keep going. 213 return; 214 } 215 } 216 217 mLogCount++; 218 mLastTimestampSeen = currentTimestampNs; 219 if (mLargestTimestampSeen < currentTimestampNs) { 220 mLargestTimestampSeen = currentTimestampNs; 221 } 222 223 resetIfConfigTtlExpiredLocked(currentTimestampNs); 224 225 StatsdStats::getInstance().noteAtomLogged( 226 event->GetTagId(), event->GetElapsedTimestampNs() / NS_PER_SEC); 227 228 // Hard-coded logic to update the isolated uid's in the uid-map. 229 // The field numbers need to be currently updated by hand with atoms.proto 230 if (event->GetTagId() == android::util::ISOLATED_UID_CHANGED) { 231 onIsolatedUidChangedEventLocked(*event); 232 } 233 234 if (mMetricsManagers.empty()) { 235 return; 236 } 237 238 int64_t curTimeSec = getElapsedRealtimeSec(); 239 if (curTimeSec - mLastPullerCacheClearTimeSec > StatsdStats::kPullerCacheClearIntervalSec) { 240 mStatsPullerManager.ClearPullerCacheIfNecessary(curTimeSec * NS_PER_SEC); 241 mLastPullerCacheClearTimeSec = curTimeSec; 242 } 243 244 245 if (event->GetTagId() != android::util::ISOLATED_UID_CHANGED) { 246 // Map the isolated uid to host uid if necessary. 247 mapIsolatedUidToHostUidIfNecessaryLocked(event); 248 } 249 250 // pass the event to metrics managers. 251 for (auto& pair : mMetricsManagers) { 252 pair.second->onLogEvent(*event); 253 flushIfNecessaryLocked(event->GetElapsedTimestampNs(), pair.first, *(pair.second)); 254 } 255} 256 257void StatsLogProcessor::OnConfigUpdated(const int64_t timestampNs, const ConfigKey& key, 258 const StatsdConfig& config) { 259 std::lock_guard<std::mutex> lock(mMetricsMutex); 260 WriteDataToDiskLocked(key, timestampNs, CONFIG_UPDATED); 261 OnConfigUpdatedLocked(timestampNs, key, config); 262} 263 264void StatsLogProcessor::OnConfigUpdatedLocked( 265 const int64_t timestampNs, const ConfigKey& key, const StatsdConfig& config) { 266 VLOG("Updated configuration for key %s", key.ToString().c_str()); 267 sp<MetricsManager> newMetricsManager = 268 new MetricsManager(key, config, mTimeBaseNs, timestampNs, mUidMap, 269 mAnomalyAlarmMonitor, mPeriodicAlarmMonitor); 270 if (newMetricsManager->isConfigValid()) { 271 mUidMap->OnConfigUpdated(key); 272 if (newMetricsManager->shouldAddUidMapListener()) { 273 // We have to add listener after the MetricsManager is constructed because it's 274 // not safe to create wp or sp from this pointer inside its constructor. 275 mUidMap->addListener(newMetricsManager.get()); 276 } 277 newMetricsManager->refreshTtl(timestampNs); 278 mMetricsManagers[key] = newMetricsManager; 279 VLOG("StatsdConfig valid"); 280 } else { 281 // If there is any error in the config, don't use it. 282 ALOGE("StatsdConfig NOT valid"); 283 } 284} 285 286size_t StatsLogProcessor::GetMetricsSize(const ConfigKey& key) const { 287 std::lock_guard<std::mutex> lock(mMetricsMutex); 288 auto it = mMetricsManagers.find(key); 289 if (it == mMetricsManagers.end()) { 290 ALOGW("Config source %s does not exist", key.ToString().c_str()); 291 return 0; 292 } 293 return it->second->byteSize(); 294} 295 296void StatsLogProcessor::dumpStates(FILE* out, bool verbose) { 297 std::lock_guard<std::mutex> lock(mMetricsMutex); 298 fprintf(out, "MetricsManager count: %lu\n", (unsigned long)mMetricsManagers.size()); 299 for (auto metricsManager : mMetricsManagers) { 300 metricsManager.second->dumpStates(out, verbose); 301 } 302} 303 304/* 305 * onDumpReport dumps serialized ConfigMetricsReportList into outData. 306 */ 307void StatsLogProcessor::onDumpReport(const ConfigKey& key, const int64_t dumpTimeStampNs, 308 const bool include_current_partial_bucket, 309 const bool include_string, 310 const DumpReportReason dumpReportReason, 311 vector<uint8_t>* outData) { 312 std::lock_guard<std::mutex> lock(mMetricsMutex); 313 314 ProtoOutputStream proto; 315 316 // Start of ConfigKey. 317 uint64_t configKeyToken = proto.start(FIELD_TYPE_MESSAGE | FIELD_ID_CONFIG_KEY); 318 proto.write(FIELD_TYPE_INT32 | FIELD_ID_UID, key.GetUid()); 319 proto.write(FIELD_TYPE_INT64 | FIELD_ID_ID, (long long)key.GetId()); 320 proto.end(configKeyToken); 321 // End of ConfigKey. 322 323 // Then, check stats-data directory to see there's any file containing 324 // ConfigMetricsReport from previous shutdowns to concatenate to reports. 325 StorageManager::appendConfigMetricsReport(key, &proto); 326 327 auto it = mMetricsManagers.find(key); 328 if (it != mMetricsManagers.end()) { 329 // This allows another broadcast to be sent within the rate-limit period if we get close to 330 // filling the buffer again soon. 331 mLastBroadcastTimes.erase(key); 332 333 // Start of ConfigMetricsReport (reports). 334 uint64_t reportsToken = 335 proto.start(FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_REPORTS); 336 onConfigMetricsReportLocked(key, dumpTimeStampNs, include_current_partial_bucket, 337 include_string, dumpReportReason, &proto); 338 proto.end(reportsToken); 339 // End of ConfigMetricsReport (reports). 340 } else { 341 ALOGW("Config source %s does not exist", key.ToString().c_str()); 342 } 343 344 if (outData != nullptr) { 345 outData->clear(); 346 outData->resize(proto.size()); 347 size_t pos = 0; 348 auto iter = proto.data(); 349 while (iter.readBuffer() != NULL) { 350 size_t toRead = iter.currentToRead(); 351 std::memcpy(&((*outData)[pos]), iter.readBuffer(), toRead); 352 pos += toRead; 353 iter.rp()->move(toRead); 354 } 355 } 356 357 StatsdStats::getInstance().noteMetricsReportSent(key, proto.size()); 358} 359 360/* 361 * onConfigMetricsReportLocked dumps serialized ConfigMetricsReport into outData. 362 */ 363void StatsLogProcessor::onConfigMetricsReportLocked(const ConfigKey& key, 364 const int64_t dumpTimeStampNs, 365 const bool include_current_partial_bucket, 366 const bool include_string, 367 const DumpReportReason dumpReportReason, 368 ProtoOutputStream* proto) { 369 // We already checked whether key exists in mMetricsManagers in 370 // WriteDataToDisk. 371 auto it = mMetricsManagers.find(key); 372 if (it == mMetricsManagers.end()) { 373 return; 374 } 375 int64_t lastReportTimeNs = it->second->getLastReportTimeNs(); 376 int64_t lastReportWallClockNs = it->second->getLastReportWallClockNs(); 377 378 std::set<string> str_set; 379 380 // First, fill in ConfigMetricsReport using current data on memory, which 381 // starts from filling in StatsLogReport's. 382 it->second->onDumpReport(dumpTimeStampNs, include_current_partial_bucket, 383 &str_set, proto); 384 385 // Fill in UidMap if there is at least one metric to report. 386 // This skips the uid map if it's an empty config. 387 if (it->second->getNumMetrics() > 0) { 388 uint64_t uidMapToken = proto->start(FIELD_TYPE_MESSAGE | FIELD_ID_UID_MAP); 389 mUidMap->appendUidMap(dumpTimeStampNs, key, &str_set, proto); 390 proto->end(uidMapToken); 391 } 392 393 // Fill in the timestamps. 394 proto->write(FIELD_TYPE_INT64 | FIELD_ID_LAST_REPORT_ELAPSED_NANOS, 395 (long long)lastReportTimeNs); 396 proto->write(FIELD_TYPE_INT64 | FIELD_ID_CURRENT_REPORT_ELAPSED_NANOS, 397 (long long)dumpTimeStampNs); 398 proto->write(FIELD_TYPE_INT64 | FIELD_ID_LAST_REPORT_WALL_CLOCK_NANOS, 399 (long long)lastReportWallClockNs); 400 proto->write(FIELD_TYPE_INT64 | FIELD_ID_CURRENT_REPORT_WALL_CLOCK_NANOS, 401 (long long)getWallClockNs()); 402 // Dump report reason 403 proto->write(FIELD_TYPE_INT32 | FIELD_ID_DUMP_REPORT_REASON, dumpReportReason); 404 405 if (include_string) { 406 for (const auto& str : str_set) { 407 proto->write(FIELD_TYPE_STRING | FIELD_COUNT_REPEATED | FIELD_ID_STRINGS, str); 408 } 409 } 410} 411 412void StatsLogProcessor::resetConfigsLocked(const int64_t timestampNs, 413 const std::vector<ConfigKey>& configs) { 414 for (const auto& key : configs) { 415 StatsdConfig config; 416 if (StorageManager::readConfigFromDisk(key, &config)) { 417 OnConfigUpdatedLocked(timestampNs, key, config); 418 StatsdStats::getInstance().noteConfigReset(key); 419 } else { 420 ALOGE("Failed to read backup config from disk for : %s", key.ToString().c_str()); 421 auto it = mMetricsManagers.find(key); 422 if (it != mMetricsManagers.end()) { 423 it->second->refreshTtl(timestampNs); 424 } 425 } 426 } 427} 428 429void StatsLogProcessor::resetIfConfigTtlExpiredLocked(const int64_t timestampNs) { 430 std::vector<ConfigKey> configKeysTtlExpired; 431 for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) { 432 if (it->second != nullptr && !it->second->isInTtl(timestampNs)) { 433 configKeysTtlExpired.push_back(it->first); 434 } 435 } 436 if (configKeysTtlExpired.size() > 0) { 437 WriteDataToDiskLocked(CONFIG_RESET); 438 resetConfigsLocked(timestampNs, configKeysTtlExpired); 439 } 440} 441 442void StatsLogProcessor::OnConfigRemoved(const ConfigKey& key) { 443 std::lock_guard<std::mutex> lock(mMetricsMutex); 444 auto it = mMetricsManagers.find(key); 445 if (it != mMetricsManagers.end()) { 446 WriteDataToDiskLocked(key, getElapsedRealtimeNs(), CONFIG_REMOVED); 447 mMetricsManagers.erase(it); 448 mUidMap->OnConfigRemoved(key); 449 } 450 StatsdStats::getInstance().noteConfigRemoved(key); 451 452 mLastBroadcastTimes.erase(key); 453 454 if (mMetricsManagers.empty()) { 455 mStatsPullerManager.ForceClearPullerCache(); 456 } 457} 458 459void StatsLogProcessor::flushIfNecessaryLocked( 460 int64_t timestampNs, const ConfigKey& key, MetricsManager& metricsManager) { 461 auto lastCheckTime = mLastByteSizeTimes.find(key); 462 if (lastCheckTime != mLastByteSizeTimes.end()) { 463 if (timestampNs - lastCheckTime->second < StatsdStats::kMinByteSizeCheckPeriodNs) { 464 return; 465 } 466 } 467 468 // We suspect that the byteSize() computation is expensive, so we set a rate limit. 469 size_t totalBytes = metricsManager.byteSize(); 470 mLastByteSizeTimes[key] = timestampNs; 471 bool requestDump = false; 472 if (totalBytes > 473 StatsdStats::kMaxMetricsBytesPerConfig) { // Too late. We need to start clearing data. 474 metricsManager.dropData(timestampNs); 475 StatsdStats::getInstance().noteDataDropped(key); 476 VLOG("StatsD had to toss out metrics for %s", key.ToString().c_str()); 477 } else if ((totalBytes > StatsdStats::kBytesPerConfigTriggerGetData) || 478 (mOnDiskDataConfigs.find(key) != mOnDiskDataConfigs.end())) { 479 // Request to send a broadcast if: 480 // 1. in memory data > threshold OR 481 // 2. config has old data report on disk. 482 requestDump = true; 483 } 484 485 if (requestDump) { 486 // Send broadcast so that receivers can pull data. 487 auto lastBroadcastTime = mLastBroadcastTimes.find(key); 488 if (lastBroadcastTime != mLastBroadcastTimes.end()) { 489 if (timestampNs - lastBroadcastTime->second < StatsdStats::kMinBroadcastPeriodNs) { 490 VLOG("StatsD would've sent a broadcast but the rate limit stopped us."); 491 return; 492 } 493 } 494 if (mSendBroadcast(key)) { 495 mOnDiskDataConfigs.erase(key); 496 VLOG("StatsD triggered data fetch for %s", key.ToString().c_str()); 497 mLastBroadcastTimes[key] = timestampNs; 498 StatsdStats::getInstance().noteBroadcastSent(key); 499 } 500 } 501} 502 503void StatsLogProcessor::WriteDataToDiskLocked(const ConfigKey& key, 504 const int64_t timestampNs, 505 const DumpReportReason dumpReportReason) { 506 if (mMetricsManagers.find(key) == mMetricsManagers.end()) { 507 return; 508 } 509 ProtoOutputStream proto; 510 onConfigMetricsReportLocked(key, timestampNs, 511 true /* include_current_partial_bucket*/, 512 false /* include strings */, dumpReportReason, &proto); 513 string file_name = StringPrintf("%s/%ld_%d_%lld", STATS_DATA_DIR, 514 (long)getWallClockSec(), key.GetUid(), (long long)key.GetId()); 515 android::base::unique_fd fd(open(file_name.c_str(), 516 O_WRONLY | O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR)); 517 if (fd == -1) { 518 ALOGE("Attempt to write %s but failed", file_name.c_str()); 519 return; 520 } 521 proto.flush(fd.get()); 522 // We were able to write the ConfigMetricsReport to disk, so we should trigger collection ASAP. 523 mOnDiskDataConfigs.insert(key); 524} 525 526void StatsLogProcessor::WriteDataToDiskLocked(const DumpReportReason dumpReportReason) { 527 const int64_t timeNs = getElapsedRealtimeNs(); 528 for (auto& pair : mMetricsManagers) { 529 WriteDataToDiskLocked(pair.first, timeNs, dumpReportReason); 530 } 531} 532 533void StatsLogProcessor::WriteDataToDisk(const DumpReportReason dumpReportReason) { 534 std::lock_guard<std::mutex> lock(mMetricsMutex); 535 WriteDataToDiskLocked(dumpReportReason); 536} 537 538void StatsLogProcessor::informPullAlarmFired(const int64_t timestampNs) { 539 std::lock_guard<std::mutex> lock(mMetricsMutex); 540 mStatsPullerManager.OnAlarmFired(timestampNs); 541} 542 543int64_t StatsLogProcessor::getLastReportTimeNs(const ConfigKey& key) { 544 auto it = mMetricsManagers.find(key); 545 if (it == mMetricsManagers.end()) { 546 return 0; 547 } else { 548 return it->second->getLastReportTimeNs(); 549 } 550} 551 552void StatsLogProcessor::noteOnDiskData(const ConfigKey& key) { 553 std::lock_guard<std::mutex> lock(mMetricsMutex); 554 mOnDiskDataConfigs.insert(key); 555} 556 557} // namespace statsd 558} // namespace os 559} // namespace android 560