StatsLogProcessor.cpp revision 56ae0d9a48212c6e886e5887a6f9191f3020af40
1/* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define DEBUG false // STOPSHIP if true 18#include "Log.h" 19#include "statslog.h" 20 21#include <android-base/file.h> 22#include <dirent.h> 23#include "StatsLogProcessor.h" 24#include "stats_log_util.h" 25#include "android-base/stringprintf.h" 26#include "guardrail/StatsdStats.h" 27#include "metrics/CountMetricProducer.h" 28#include "external/StatsPullerManager.h" 29#include "stats_util.h" 30#include "storage/StorageManager.h" 31 32#include <log/log_event_list.h> 33#include <utils/Errors.h> 34#include <utils/SystemClock.h> 35 36using namespace android; 37using android::base::StringPrintf; 38using android::util::FIELD_COUNT_REPEATED; 39using android::util::FIELD_TYPE_BOOL; 40using android::util::FIELD_TYPE_FLOAT; 41using android::util::FIELD_TYPE_INT32; 42using android::util::FIELD_TYPE_INT64; 43using android::util::FIELD_TYPE_MESSAGE; 44using android::util::FIELD_TYPE_STRING; 45using android::util::ProtoOutputStream; 46using std::make_unique; 47using std::unique_ptr; 48using std::vector; 49 50namespace android { 51namespace os { 52namespace statsd { 53 54// for ConfigMetricsReportList 55const int FIELD_ID_CONFIG_KEY = 1; 56const int FIELD_ID_REPORTS = 2; 57// for ConfigKey 58const int FIELD_ID_UID = 1; 59const int FIELD_ID_ID = 2; 60// for ConfigMetricsReport 61// const int FIELD_ID_METRICS = 1; // written in MetricsManager.cpp 62const int FIELD_ID_UID_MAP = 2; 63const int FIELD_ID_LAST_REPORT_ELAPSED_NANOS = 3; 64const int FIELD_ID_CURRENT_REPORT_ELAPSED_NANOS = 4; 65const int FIELD_ID_LAST_REPORT_WALL_CLOCK_NANOS = 5; 66const int FIELD_ID_CURRENT_REPORT_WALL_CLOCK_NANOS = 6; 67const int FIELD_ID_DUMP_REPORT_REASON = 8; 68const int FIELD_ID_STRINGS = 9; 69 70#define NS_PER_HOUR 3600 * NS_PER_SEC 71 72#define STATS_DATA_DIR "/data/misc/stats-data" 73 74StatsLogProcessor::StatsLogProcessor(const sp<UidMap>& uidMap, 75 const sp<AlarmMonitor>& anomalyAlarmMonitor, 76 const sp<AlarmMonitor>& periodicAlarmMonitor, 77 const int64_t timeBaseNs, 78 const std::function<bool(const ConfigKey&)>& sendBroadcast) 79 : mUidMap(uidMap), 80 mAnomalyAlarmMonitor(anomalyAlarmMonitor), 81 mPeriodicAlarmMonitor(periodicAlarmMonitor), 82 mSendBroadcast(sendBroadcast), 83 mTimeBaseNs(timeBaseNs), 84 mLargestTimestampSeen(0), 85 mLastTimestampSeen(0) { 86} 87 88StatsLogProcessor::~StatsLogProcessor() { 89} 90 91void StatsLogProcessor::onAnomalyAlarmFired( 92 const int64_t& timestampNs, 93 unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>> alarmSet) { 94 std::lock_guard<std::mutex> lock(mMetricsMutex); 95 for (const auto& itr : mMetricsManagers) { 96 itr.second->onAnomalyAlarmFired(timestampNs, alarmSet); 97 } 98} 99void StatsLogProcessor::onPeriodicAlarmFired( 100 const int64_t& timestampNs, 101 unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>> alarmSet) { 102 103 std::lock_guard<std::mutex> lock(mMetricsMutex); 104 for (const auto& itr : mMetricsManagers) { 105 itr.second->onPeriodicAlarmFired(timestampNs, alarmSet); 106 } 107} 108 109void updateUid(Value* value, int hostUid) { 110 int uid = value->int_value; 111 if (uid != hostUid) { 112 value->setInt(hostUid); 113 } 114} 115 116void StatsLogProcessor::mapIsolatedUidToHostUidIfNecessaryLocked(LogEvent* event) const { 117 if (android::util::AtomsInfo::kAtomsWithAttributionChain.find(event->GetTagId()) != 118 android::util::AtomsInfo::kAtomsWithAttributionChain.end()) { 119 for (auto& value : *(event->getMutableValues())) { 120 if (value.mField.getPosAtDepth(0) > kAttributionField) { 121 break; 122 } 123 if (isAttributionUidField(value)) { 124 const int hostUid = mUidMap->getHostUidOrSelf(value.mValue.int_value); 125 updateUid(&value.mValue, hostUid); 126 } 127 } 128 } else { 129 auto it = android::util::AtomsInfo::kAtomsWithUidField.find(event->GetTagId()); 130 if (it != android::util::AtomsInfo::kAtomsWithUidField.end()) { 131 int uidField = it->second; // uidField is the field number in proto, 132 // starting from 1 133 if (uidField > 0 && (int)event->getValues().size() >= uidField && 134 (event->getValues())[uidField - 1].mValue.getType() == INT) { 135 Value& value = (*event->getMutableValues())[uidField - 1].mValue; 136 const int hostUid = mUidMap->getHostUidOrSelf(value.int_value); 137 updateUid(&value, hostUid); 138 } else { 139 ALOGE("Malformed log, uid not found. %s", event->ToString().c_str()); 140 } 141 } 142 } 143} 144 145void StatsLogProcessor::onIsolatedUidChangedEventLocked(const LogEvent& event) { 146 status_t err = NO_ERROR, err2 = NO_ERROR, err3 = NO_ERROR; 147 bool is_create = event.GetBool(3, &err); 148 auto parent_uid = int(event.GetLong(1, &err2)); 149 auto isolated_uid = int(event.GetLong(2, &err3)); 150 if (err == NO_ERROR && err2 == NO_ERROR && err3 == NO_ERROR) { 151 if (is_create) { 152 mUidMap->assignIsolatedUid(isolated_uid, parent_uid); 153 } else { 154 mUidMap->removeIsolatedUid(isolated_uid, parent_uid); 155 } 156 } else { 157 ALOGE("Failed to parse uid in the isolated uid change event."); 158 } 159} 160 161void StatsLogProcessor::OnLogEvent(LogEvent* event) { 162 OnLogEvent(event, false); 163} 164 165void StatsLogProcessor::resetConfigs() { 166 std::lock_guard<std::mutex> lock(mMetricsMutex); 167 resetConfigsLocked(getElapsedRealtimeNs()); 168} 169 170void StatsLogProcessor::resetConfigsLocked(const int64_t timestampNs) { 171 std::vector<ConfigKey> configKeys; 172 for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) { 173 configKeys.push_back(it->first); 174 } 175 resetConfigsLocked(timestampNs, configKeys); 176} 177 178void StatsLogProcessor::OnLogEvent(LogEvent* event, bool reconnected) { 179 std::lock_guard<std::mutex> lock(mMetricsMutex); 180 181#ifdef VERY_VERBOSE_PRINTING 182 if (mPrintAllLogs) { 183 ALOGI("%s", event->ToString().c_str()); 184 } 185#endif 186 const int64_t currentTimestampNs = event->GetElapsedTimestampNs(); 187 188 if (reconnected && mLastTimestampSeen != 0) { 189 // LogReader tells us the connection has just been reset. Now we need 190 // to enter reconnection state to find the last CP. 191 mInReconnection = true; 192 } 193 194 if (mInReconnection) { 195 // We see the checkpoint 196 if (currentTimestampNs == mLastTimestampSeen) { 197 mInReconnection = false; 198 // Found the CP. ignore this event, and we will start to read from next event. 199 return; 200 } 201 if (currentTimestampNs > mLargestTimestampSeen) { 202 // We see a new log but CP has not been found yet. Give up now. 203 mLogLossCount++; 204 mInReconnection = false; 205 StatsdStats::getInstance().noteLogLost(currentTimestampNs); 206 // Persist the data before we reset. Do we want this? 207 WriteDataToDiskLocked(CONFIG_RESET); 208 // We see fresher event before we see the checkpoint. We might have lost data. 209 // The best we can do is to reset. 210 resetConfigsLocked(currentTimestampNs); 211 } else { 212 // Still in search of the CP. Keep going. 213 return; 214 } 215 } 216 217 mLogCount++; 218 mLastTimestampSeen = currentTimestampNs; 219 if (mLargestTimestampSeen < currentTimestampNs) { 220 mLargestTimestampSeen = currentTimestampNs; 221 } 222 223 resetIfConfigTtlExpiredLocked(currentTimestampNs); 224 225 StatsdStats::getInstance().noteAtomLogged( 226 event->GetTagId(), event->GetElapsedTimestampNs() / NS_PER_SEC); 227 228 // Hard-coded logic to update the isolated uid's in the uid-map. 229 // The field numbers need to be currently updated by hand with atoms.proto 230 if (event->GetTagId() == android::util::ISOLATED_UID_CHANGED) { 231 onIsolatedUidChangedEventLocked(*event); 232 } 233 234 if (mMetricsManagers.empty()) { 235 return; 236 } 237 238 int64_t curTimeSec = getElapsedRealtimeSec(); 239 if (curTimeSec - mLastPullerCacheClearTimeSec > StatsdStats::kPullerCacheClearIntervalSec) { 240 mStatsPullerManager.ClearPullerCacheIfNecessary(curTimeSec * NS_PER_SEC); 241 mLastPullerCacheClearTimeSec = curTimeSec; 242 } 243 244 245 if (event->GetTagId() != android::util::ISOLATED_UID_CHANGED) { 246 // Map the isolated uid to host uid if necessary. 247 mapIsolatedUidToHostUidIfNecessaryLocked(event); 248 } 249 250 // pass the event to metrics managers. 251 for (auto& pair : mMetricsManagers) { 252 pair.second->onLogEvent(*event); 253 flushIfNecessaryLocked(event->GetElapsedTimestampNs(), pair.first, *(pair.second)); 254 } 255} 256 257void StatsLogProcessor::OnConfigUpdated(const int64_t timestampNs, const ConfigKey& key, 258 const StatsdConfig& config) { 259 std::lock_guard<std::mutex> lock(mMetricsMutex); 260 WriteDataToDiskLocked(key, timestampNs, CONFIG_UPDATED); 261 OnConfigUpdatedLocked(timestampNs, key, config); 262} 263 264void StatsLogProcessor::OnConfigUpdatedLocked( 265 const int64_t timestampNs, const ConfigKey& key, const StatsdConfig& config) { 266 VLOG("Updated configuration for key %s", key.ToString().c_str()); 267 sp<MetricsManager> newMetricsManager = 268 new MetricsManager(key, config, mTimeBaseNs, timestampNs, mUidMap, 269 mAnomalyAlarmMonitor, mPeriodicAlarmMonitor); 270 if (newMetricsManager->isConfigValid()) { 271 mUidMap->OnConfigUpdated(key); 272 if (newMetricsManager->shouldAddUidMapListener()) { 273 // We have to add listener after the MetricsManager is constructed because it's 274 // not safe to create wp or sp from this pointer inside its constructor. 275 mUidMap->addListener(newMetricsManager.get()); 276 } 277 newMetricsManager->refreshTtl(timestampNs); 278 mMetricsManagers[key] = newMetricsManager; 279 VLOG("StatsdConfig valid"); 280 } else { 281 // If there is any error in the config, don't use it. 282 ALOGE("StatsdConfig NOT valid"); 283 } 284} 285 286size_t StatsLogProcessor::GetMetricsSize(const ConfigKey& key) const { 287 std::lock_guard<std::mutex> lock(mMetricsMutex); 288 auto it = mMetricsManagers.find(key); 289 if (it == mMetricsManagers.end()) { 290 ALOGW("Config source %s does not exist", key.ToString().c_str()); 291 return 0; 292 } 293 return it->second->byteSize(); 294} 295 296void StatsLogProcessor::dumpStates(FILE* out, bool verbose) { 297 std::lock_guard<std::mutex> lock(mMetricsMutex); 298 fprintf(out, "MetricsManager count: %lu\n", (unsigned long)mMetricsManagers.size()); 299 for (auto metricsManager : mMetricsManagers) { 300 metricsManager.second->dumpStates(out, verbose); 301 } 302} 303 304/* 305 * onDumpReport dumps serialized ConfigMetricsReportList into outData. 306 */ 307void StatsLogProcessor::onDumpReport(const ConfigKey& key, const int64_t dumpTimeStampNs, 308 const bool include_current_partial_bucket, 309 const DumpReportReason dumpReportReason, 310 vector<uint8_t>* outData) { 311 std::lock_guard<std::mutex> lock(mMetricsMutex); 312 313 ProtoOutputStream proto; 314 315 // Start of ConfigKey. 316 uint64_t configKeyToken = proto.start(FIELD_TYPE_MESSAGE | FIELD_ID_CONFIG_KEY); 317 proto.write(FIELD_TYPE_INT32 | FIELD_ID_UID, key.GetUid()); 318 proto.write(FIELD_TYPE_INT64 | FIELD_ID_ID, (long long)key.GetId()); 319 proto.end(configKeyToken); 320 // End of ConfigKey. 321 322 // Then, check stats-data directory to see there's any file containing 323 // ConfigMetricsReport from previous shutdowns to concatenate to reports. 324 StorageManager::appendConfigMetricsReport(key, &proto); 325 326 auto it = mMetricsManagers.find(key); 327 if (it != mMetricsManagers.end()) { 328 // This allows another broadcast to be sent within the rate-limit period if we get close to 329 // filling the buffer again soon. 330 mLastBroadcastTimes.erase(key); 331 332 // Start of ConfigMetricsReport (reports). 333 uint64_t reportsToken = 334 proto.start(FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_REPORTS); 335 onConfigMetricsReportLocked(key, dumpTimeStampNs, include_current_partial_bucket, 336 dumpReportReason, &proto); 337 proto.end(reportsToken); 338 // End of ConfigMetricsReport (reports). 339 } else { 340 ALOGW("Config source %s does not exist", key.ToString().c_str()); 341 } 342 343 if (outData != nullptr) { 344 outData->clear(); 345 outData->resize(proto.size()); 346 size_t pos = 0; 347 auto iter = proto.data(); 348 while (iter.readBuffer() != NULL) { 349 size_t toRead = iter.currentToRead(); 350 std::memcpy(&((*outData)[pos]), iter.readBuffer(), toRead); 351 pos += toRead; 352 iter.rp()->move(toRead); 353 } 354 } 355 356 StatsdStats::getInstance().noteMetricsReportSent(key, proto.size()); 357} 358 359/* 360 * onConfigMetricsReportLocked dumps serialized ConfigMetricsReport into outData. 361 */ 362void StatsLogProcessor::onConfigMetricsReportLocked(const ConfigKey& key, 363 const int64_t dumpTimeStampNs, 364 const bool include_current_partial_bucket, 365 const DumpReportReason dumpReportReason, 366 ProtoOutputStream* proto) { 367 // We already checked whether key exists in mMetricsManagers in 368 // WriteDataToDisk. 369 auto it = mMetricsManagers.find(key); 370 if (it == mMetricsManagers.end()) { 371 return; 372 } 373 int64_t lastReportTimeNs = it->second->getLastReportTimeNs(); 374 int64_t lastReportWallClockNs = it->second->getLastReportWallClockNs(); 375 376 std::set<string> str_set; 377 378 // First, fill in ConfigMetricsReport using current data on memory, which 379 // starts from filling in StatsLogReport's. 380 it->second->onDumpReport(dumpTimeStampNs, include_current_partial_bucket, 381 &str_set, proto); 382 383 // Fill in UidMap if there is at least one metric to report. 384 // This skips the uid map if it's an empty config. 385 if (it->second->getNumMetrics() > 0) { 386 uint64_t uidMapToken = proto->start(FIELD_TYPE_MESSAGE | FIELD_ID_UID_MAP); 387 mUidMap->appendUidMap(dumpTimeStampNs, key, &str_set, proto); 388 proto->end(uidMapToken); 389 } 390 391 // Fill in the timestamps. 392 proto->write(FIELD_TYPE_INT64 | FIELD_ID_LAST_REPORT_ELAPSED_NANOS, 393 (long long)lastReportTimeNs); 394 proto->write(FIELD_TYPE_INT64 | FIELD_ID_CURRENT_REPORT_ELAPSED_NANOS, 395 (long long)dumpTimeStampNs); 396 proto->write(FIELD_TYPE_INT64 | FIELD_ID_LAST_REPORT_WALL_CLOCK_NANOS, 397 (long long)lastReportWallClockNs); 398 proto->write(FIELD_TYPE_INT64 | FIELD_ID_CURRENT_REPORT_WALL_CLOCK_NANOS, 399 (long long)getWallClockNs()); 400 // Dump report reason 401 proto->write(FIELD_TYPE_INT32 | FIELD_ID_DUMP_REPORT_REASON, dumpReportReason); 402 403 for (const auto& str : str_set) { 404 proto->write(FIELD_TYPE_STRING | FIELD_COUNT_REPEATED | FIELD_ID_STRINGS, str); 405 } 406} 407 408void StatsLogProcessor::resetConfigsLocked(const int64_t timestampNs, 409 const std::vector<ConfigKey>& configs) { 410 for (const auto& key : configs) { 411 StatsdConfig config; 412 if (StorageManager::readConfigFromDisk(key, &config)) { 413 OnConfigUpdatedLocked(timestampNs, key, config); 414 StatsdStats::getInstance().noteConfigReset(key); 415 } else { 416 ALOGE("Failed to read backup config from disk for : %s", key.ToString().c_str()); 417 auto it = mMetricsManagers.find(key); 418 if (it != mMetricsManagers.end()) { 419 it->second->refreshTtl(timestampNs); 420 } 421 } 422 } 423} 424 425void StatsLogProcessor::resetIfConfigTtlExpiredLocked(const int64_t timestampNs) { 426 std::vector<ConfigKey> configKeysTtlExpired; 427 for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) { 428 if (it->second != nullptr && !it->second->isInTtl(timestampNs)) { 429 configKeysTtlExpired.push_back(it->first); 430 } 431 } 432 if (configKeysTtlExpired.size() > 0) { 433 WriteDataToDiskLocked(CONFIG_RESET); 434 resetConfigsLocked(timestampNs, configKeysTtlExpired); 435 } 436} 437 438void StatsLogProcessor::OnConfigRemoved(const ConfigKey& key) { 439 std::lock_guard<std::mutex> lock(mMetricsMutex); 440 auto it = mMetricsManagers.find(key); 441 if (it != mMetricsManagers.end()) { 442 WriteDataToDiskLocked(key, getElapsedRealtimeNs(), CONFIG_REMOVED); 443 mMetricsManagers.erase(it); 444 mUidMap->OnConfigRemoved(key); 445 } 446 StatsdStats::getInstance().noteConfigRemoved(key); 447 448 mLastBroadcastTimes.erase(key); 449 450 if (mMetricsManagers.empty()) { 451 mStatsPullerManager.ForceClearPullerCache(); 452 } 453} 454 455void StatsLogProcessor::flushIfNecessaryLocked( 456 int64_t timestampNs, const ConfigKey& key, MetricsManager& metricsManager) { 457 auto lastCheckTime = mLastByteSizeTimes.find(key); 458 if (lastCheckTime != mLastByteSizeTimes.end()) { 459 if (timestampNs - lastCheckTime->second < StatsdStats::kMinByteSizeCheckPeriodNs) { 460 return; 461 } 462 } 463 464 // We suspect that the byteSize() computation is expensive, so we set a rate limit. 465 size_t totalBytes = metricsManager.byteSize(); 466 mLastByteSizeTimes[key] = timestampNs; 467 bool requestDump = false; 468 if (totalBytes > 469 StatsdStats::kMaxMetricsBytesPerConfig) { // Too late. We need to start clearing data. 470 metricsManager.dropData(timestampNs); 471 StatsdStats::getInstance().noteDataDropped(key); 472 VLOG("StatsD had to toss out metrics for %s", key.ToString().c_str()); 473 } else if ((totalBytes > StatsdStats::kBytesPerConfigTriggerGetData) || 474 (mOnDiskDataConfigs.find(key) != mOnDiskDataConfigs.end())) { 475 // Request to send a broadcast if: 476 // 1. in memory data > threshold OR 477 // 2. config has old data report on disk. 478 requestDump = true; 479 } 480 481 if (requestDump) { 482 // Send broadcast so that receivers can pull data. 483 auto lastBroadcastTime = mLastBroadcastTimes.find(key); 484 if (lastBroadcastTime != mLastBroadcastTimes.end()) { 485 if (timestampNs - lastBroadcastTime->second < StatsdStats::kMinBroadcastPeriodNs) { 486 VLOG("StatsD would've sent a broadcast but the rate limit stopped us."); 487 return; 488 } 489 } 490 if (mSendBroadcast(key)) { 491 mOnDiskDataConfigs.erase(key); 492 VLOG("StatsD triggered data fetch for %s", key.ToString().c_str()); 493 mLastBroadcastTimes[key] = timestampNs; 494 StatsdStats::getInstance().noteBroadcastSent(key); 495 } 496 } 497} 498 499void StatsLogProcessor::WriteDataToDiskLocked(const ConfigKey& key, 500 const int64_t timestampNs, 501 const DumpReportReason dumpReportReason) { 502 if (mMetricsManagers.find(key) == mMetricsManagers.end() || 503 !mMetricsManagers.find(key)->second->shouldWriteToDisk()) { 504 return; 505 } 506 ProtoOutputStream proto; 507 onConfigMetricsReportLocked(key, timestampNs, true /* include_current_partial_bucket*/, 508 dumpReportReason, &proto); 509 string file_name = StringPrintf("%s/%ld_%d_%lld", STATS_DATA_DIR, 510 (long)getWallClockSec(), key.GetUid(), (long long)key.GetId()); 511 android::base::unique_fd fd(open(file_name.c_str(), 512 O_WRONLY | O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR)); 513 if (fd == -1) { 514 ALOGE("Attempt to write %s but failed", file_name.c_str()); 515 return; 516 } 517 proto.flush(fd.get()); 518 // We were able to write the ConfigMetricsReport to disk, so we should trigger collection ASAP. 519 mOnDiskDataConfigs.insert(key); 520} 521 522void StatsLogProcessor::WriteDataToDiskLocked(const DumpReportReason dumpReportReason) { 523 const int64_t timeNs = getElapsedRealtimeNs(); 524 for (auto& pair : mMetricsManagers) { 525 WriteDataToDiskLocked(pair.first, timeNs, dumpReportReason); 526 } 527} 528 529void StatsLogProcessor::WriteDataToDisk(const DumpReportReason dumpReportReason) { 530 std::lock_guard<std::mutex> lock(mMetricsMutex); 531 WriteDataToDiskLocked(dumpReportReason); 532} 533 534void StatsLogProcessor::informPullAlarmFired(const int64_t timestampNs) { 535 std::lock_guard<std::mutex> lock(mMetricsMutex); 536 mStatsPullerManager.OnAlarmFired(timestampNs); 537} 538 539int64_t StatsLogProcessor::getLastReportTimeNs(const ConfigKey& key) { 540 auto it = mMetricsManagers.find(key); 541 if (it == mMetricsManagers.end()) { 542 return 0; 543 } else { 544 return it->second->getLastReportTimeNs(); 545 } 546} 547 548void StatsLogProcessor::noteOnDiskData(const ConfigKey& key) { 549 std::lock_guard<std::mutex> lock(mMetricsMutex); 550 mOnDiskDataConfigs.insert(key); 551} 552 553} // namespace statsd 554} // namespace os 555} // namespace android 556