StatsLogProcessor.cpp revision 892f3d32293b8358c72da4a94633827abbda640b
1/* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define DEBUG true // STOPSHIP if true 18#include "Log.h" 19#include "statslog.h" 20 21#include <android-base/file.h> 22#include <dirent.h> 23#include "StatsLogProcessor.h" 24#include "stats_log_util.h" 25#include "android-base/stringprintf.h" 26#include "guardrail/StatsdStats.h" 27#include "metrics/CountMetricProducer.h" 28#include "external/StatsPullerManager.h" 29#include "stats_util.h" 30#include "storage/StorageManager.h" 31 32#include <log/log_event_list.h> 33#include <utils/Errors.h> 34#include <utils/SystemClock.h> 35 36using namespace android; 37using android::base::StringPrintf; 38using android::util::FIELD_COUNT_REPEATED; 39using android::util::FIELD_TYPE_BOOL; 40using android::util::FIELD_TYPE_FLOAT; 41using android::util::FIELD_TYPE_INT32; 42using android::util::FIELD_TYPE_INT64; 43using android::util::FIELD_TYPE_MESSAGE; 44using android::util::FIELD_TYPE_STRING; 45using android::util::ProtoOutputStream; 46using std::make_unique; 47using std::unique_ptr; 48using std::vector; 49 50namespace android { 51namespace os { 52namespace statsd { 53 54// for ConfigMetricsReportList 55const int FIELD_ID_CONFIG_KEY = 1; 56const int FIELD_ID_REPORTS = 2; 57// for ConfigKey 58const int FIELD_ID_UID = 1; 59const int FIELD_ID_ID = 2; 60// for ConfigMetricsReport 61// const int FIELD_ID_METRICS = 1; // written in MetricsManager.cpp 62const int FIELD_ID_UID_MAP = 2; 63const int FIELD_ID_LAST_REPORT_ELAPSED_NANOS = 3; 64const int FIELD_ID_CURRENT_REPORT_ELAPSED_NANOS = 4; 65const int FIELD_ID_LAST_REPORT_WALL_CLOCK_NANOS = 5; 66const int FIELD_ID_CURRENT_REPORT_WALL_CLOCK_NANOS = 6; 67const int FIELD_ID_DUMP_REPORT_REASON = 8; 68const int FIELD_ID_STRINGS = 9; 69 70#define NS_PER_HOUR 3600 * NS_PER_SEC 71 72#define STATS_DATA_DIR "/data/misc/stats-data" 73 74StatsLogProcessor::StatsLogProcessor(const sp<UidMap>& uidMap, 75 const sp<AlarmMonitor>& anomalyAlarmMonitor, 76 const sp<AlarmMonitor>& periodicAlarmMonitor, 77 const int64_t timeBaseNs, 78 const std::function<void(const ConfigKey&)>& sendBroadcast) 79 : mUidMap(uidMap), 80 mAnomalyAlarmMonitor(anomalyAlarmMonitor), 81 mPeriodicAlarmMonitor(periodicAlarmMonitor), 82 mSendBroadcast(sendBroadcast), 83 mTimeBaseNs(timeBaseNs), 84 mLargestTimestampSeen(0), 85 mLastTimestampSeen(0) { 86} 87 88StatsLogProcessor::~StatsLogProcessor() { 89} 90 91void StatsLogProcessor::onAnomalyAlarmFired( 92 const int64_t& timestampNs, 93 unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>> alarmSet) { 94 std::lock_guard<std::mutex> lock(mMetricsMutex); 95 for (const auto& itr : mMetricsManagers) { 96 itr.second->onAnomalyAlarmFired(timestampNs, alarmSet); 97 } 98} 99void StatsLogProcessor::onPeriodicAlarmFired( 100 const int64_t& timestampNs, 101 unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>> alarmSet) { 102 103 std::lock_guard<std::mutex> lock(mMetricsMutex); 104 for (const auto& itr : mMetricsManagers) { 105 itr.second->onPeriodicAlarmFired(timestampNs, alarmSet); 106 } 107} 108 109void updateUid(Value* value, int hostUid) { 110 int uid = value->int_value; 111 if (uid != hostUid) { 112 value->setInt(hostUid); 113 } 114} 115 116void StatsLogProcessor::mapIsolatedUidToHostUidIfNecessaryLocked(LogEvent* event) const { 117 if (android::util::AtomsInfo::kAtomsWithAttributionChain.find(event->GetTagId()) != 118 android::util::AtomsInfo::kAtomsWithAttributionChain.end()) { 119 for (auto& value : *(event->getMutableValues())) { 120 if (value.mField.getPosAtDepth(0) > kAttributionField) { 121 break; 122 } 123 if (isAttributionUidField(value)) { 124 const int hostUid = mUidMap->getHostUidOrSelf(value.mValue.int_value); 125 updateUid(&value.mValue, hostUid); 126 } 127 } 128 } else { 129 auto it = android::util::AtomsInfo::kAtomsWithUidField.find(event->GetTagId()); 130 if (it != android::util::AtomsInfo::kAtomsWithUidField.end()) { 131 int uidField = it->second; // uidField is the field number in proto, 132 // starting from 1 133 if (uidField > 0 && (int)event->getValues().size() >= uidField && 134 (event->getValues())[uidField - 1].mValue.getType() == INT) { 135 Value& value = (*event->getMutableValues())[uidField - 1].mValue; 136 const int hostUid = mUidMap->getHostUidOrSelf(value.int_value); 137 updateUid(&value, hostUid); 138 } else { 139 ALOGE("Malformed log, uid not found. %s", event->ToString().c_str()); 140 } 141 } 142 } 143} 144 145void StatsLogProcessor::onIsolatedUidChangedEventLocked(const LogEvent& event) { 146 status_t err = NO_ERROR, err2 = NO_ERROR, err3 = NO_ERROR; 147 bool is_create = event.GetBool(3, &err); 148 auto parent_uid = int(event.GetLong(1, &err2)); 149 auto isolated_uid = int(event.GetLong(2, &err3)); 150 if (err == NO_ERROR && err2 == NO_ERROR && err3 == NO_ERROR) { 151 if (is_create) { 152 mUidMap->assignIsolatedUid(isolated_uid, parent_uid); 153 } else { 154 mUidMap->removeIsolatedUid(isolated_uid, parent_uid); 155 } 156 } else { 157 ALOGE("Failed to parse uid in the isolated uid change event."); 158 } 159} 160 161void StatsLogProcessor::OnLogEvent(LogEvent* event) { 162 OnLogEvent(event, false); 163} 164 165void StatsLogProcessor::resetConfigs() { 166 std::lock_guard<std::mutex> lock(mMetricsMutex); 167 resetConfigsLocked(getElapsedRealtimeNs()); 168} 169 170void StatsLogProcessor::resetConfigsLocked(const int64_t timestampNs) { 171 std::vector<ConfigKey> configKeys; 172 for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) { 173 configKeys.push_back(it->first); 174 } 175 resetConfigsLocked(timestampNs, configKeys); 176} 177 178void StatsLogProcessor::OnLogEvent(LogEvent* event, bool reconnected) { 179 std::lock_guard<std::mutex> lock(mMetricsMutex); 180 const int64_t currentTimestampNs = event->GetElapsedTimestampNs(); 181 182 if (reconnected && mLastTimestampSeen != 0) { 183 // LogReader tells us the connection has just been reset. Now we need 184 // to enter reconnection state to find the last CP. 185 mInReconnection = true; 186 } 187 188 if (mInReconnection) { 189 // We see the checkpoint 190 if (currentTimestampNs == mLastTimestampSeen) { 191 mInReconnection = false; 192 // Found the CP. ignore this event, and we will start to read from next event. 193 return; 194 } 195 if (currentTimestampNs > mLargestTimestampSeen) { 196 // We see a new log but CP has not been found yet. Give up now. 197 mLogLossCount++; 198 mInReconnection = false; 199 StatsdStats::getInstance().noteLogLost(currentTimestampNs); 200 // Persist the data before we reset. Do we want this? 201 WriteDataToDiskLocked(CONFIG_RESET); 202 // We see fresher event before we see the checkpoint. We might have lost data. 203 // The best we can do is to reset. 204 resetConfigsLocked(currentTimestampNs); 205 } else { 206 // Still in search of the CP. Keep going. 207 return; 208 } 209 } 210 211 mLogCount++; 212 mLastTimestampSeen = currentTimestampNs; 213 if (mLargestTimestampSeen < currentTimestampNs) { 214 mLargestTimestampSeen = currentTimestampNs; 215 } 216 217 resetIfConfigTtlExpiredLocked(currentTimestampNs); 218 219 StatsdStats::getInstance().noteAtomLogged( 220 event->GetTagId(), event->GetElapsedTimestampNs() / NS_PER_SEC); 221 222 // Hard-coded logic to update the isolated uid's in the uid-map. 223 // The field numbers need to be currently updated by hand with atoms.proto 224 if (event->GetTagId() == android::util::ISOLATED_UID_CHANGED) { 225 onIsolatedUidChangedEventLocked(*event); 226 } 227 228 if (mMetricsManagers.empty()) { 229 return; 230 } 231 232 int64_t curTimeSec = getElapsedRealtimeSec(); 233 if (curTimeSec - mLastPullerCacheClearTimeSec > StatsdStats::kPullerCacheClearIntervalSec) { 234 mStatsPullerManager.ClearPullerCacheIfNecessary(curTimeSec * NS_PER_SEC); 235 mLastPullerCacheClearTimeSec = curTimeSec; 236 } 237 238 239 if (event->GetTagId() != android::util::ISOLATED_UID_CHANGED) { 240 // Map the isolated uid to host uid if necessary. 241 mapIsolatedUidToHostUidIfNecessaryLocked(event); 242 } 243 244 // pass the event to metrics managers. 245 for (auto& pair : mMetricsManagers) { 246 pair.second->onLogEvent(*event); 247 flushIfNecessaryLocked(event->GetElapsedTimestampNs(), pair.first, *(pair.second)); 248 } 249} 250 251void StatsLogProcessor::OnConfigUpdated(const int64_t timestampNs, const ConfigKey& key, 252 const StatsdConfig& config) { 253 std::lock_guard<std::mutex> lock(mMetricsMutex); 254 WriteDataToDiskLocked(key, timestampNs, CONFIG_UPDATED); 255 OnConfigUpdatedLocked(timestampNs, key, config); 256} 257 258void StatsLogProcessor::OnConfigUpdatedLocked( 259 const int64_t timestampNs, const ConfigKey& key, const StatsdConfig& config) { 260 VLOG("Updated configuration for key %s", key.ToString().c_str()); 261 sp<MetricsManager> newMetricsManager = 262 new MetricsManager(key, config, mTimeBaseNs, timestampNs, mUidMap, 263 mAnomalyAlarmMonitor, mPeriodicAlarmMonitor); 264 if (newMetricsManager->isConfigValid()) { 265 mUidMap->OnConfigUpdated(key); 266 if (newMetricsManager->shouldAddUidMapListener()) { 267 // We have to add listener after the MetricsManager is constructed because it's 268 // not safe to create wp or sp from this pointer inside its constructor. 269 mUidMap->addListener(newMetricsManager.get()); 270 } 271 newMetricsManager->refreshTtl(timestampNs); 272 mMetricsManagers[key] = newMetricsManager; 273 VLOG("StatsdConfig valid"); 274 } else { 275 // If there is any error in the config, don't use it. 276 ALOGE("StatsdConfig NOT valid"); 277 } 278} 279 280size_t StatsLogProcessor::GetMetricsSize(const ConfigKey& key) const { 281 std::lock_guard<std::mutex> lock(mMetricsMutex); 282 auto it = mMetricsManagers.find(key); 283 if (it == mMetricsManagers.end()) { 284 ALOGW("Config source %s does not exist", key.ToString().c_str()); 285 return 0; 286 } 287 return it->second->byteSize(); 288} 289 290void StatsLogProcessor::dumpStates(FILE* out, bool verbose) { 291 std::lock_guard<std::mutex> lock(mMetricsMutex); 292 fprintf(out, "MetricsManager count: %lu\n", (unsigned long)mMetricsManagers.size()); 293 for (auto metricsManager : mMetricsManagers) { 294 metricsManager.second->dumpStates(out, verbose); 295 } 296} 297 298/* 299 * onDumpReport dumps serialized ConfigMetricsReportList into outData. 300 */ 301void StatsLogProcessor::onDumpReport(const ConfigKey& key, const int64_t dumpTimeStampNs, 302 const bool include_current_partial_bucket, 303 const bool include_string, 304 const DumpReportReason dumpReportReason, 305 vector<uint8_t>* outData) { 306 std::lock_guard<std::mutex> lock(mMetricsMutex); 307 308 ProtoOutputStream proto; 309 310 // Start of ConfigKey. 311 uint64_t configKeyToken = proto.start(FIELD_TYPE_MESSAGE | FIELD_ID_CONFIG_KEY); 312 proto.write(FIELD_TYPE_INT32 | FIELD_ID_UID, key.GetUid()); 313 proto.write(FIELD_TYPE_INT64 | FIELD_ID_ID, (long long)key.GetId()); 314 proto.end(configKeyToken); 315 // End of ConfigKey. 316 317 // Then, check stats-data directory to see there's any file containing 318 // ConfigMetricsReport from previous shutdowns to concatenate to reports. 319 StorageManager::appendConfigMetricsReport(key, &proto); 320 321 auto it = mMetricsManagers.find(key); 322 if (it != mMetricsManagers.end()) { 323 // This allows another broadcast to be sent within the rate-limit period if we get close to 324 // filling the buffer again soon. 325 mLastBroadcastTimes.erase(key); 326 327 // Start of ConfigMetricsReport (reports). 328 uint64_t reportsToken = 329 proto.start(FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_REPORTS); 330 onConfigMetricsReportLocked(key, dumpTimeStampNs, include_current_partial_bucket, 331 include_string, dumpReportReason, &proto); 332 proto.end(reportsToken); 333 // End of ConfigMetricsReport (reports). 334 } else { 335 ALOGW("Config source %s does not exist", key.ToString().c_str()); 336 } 337 338 if (outData != nullptr) { 339 outData->clear(); 340 outData->resize(proto.size()); 341 size_t pos = 0; 342 auto iter = proto.data(); 343 while (iter.readBuffer() != NULL) { 344 size_t toRead = iter.currentToRead(); 345 std::memcpy(&((*outData)[pos]), iter.readBuffer(), toRead); 346 pos += toRead; 347 iter.rp()->move(toRead); 348 } 349 } 350 351 StatsdStats::getInstance().noteMetricsReportSent(key, proto.size()); 352} 353 354/* 355 * onConfigMetricsReportLocked dumps serialized ConfigMetricsReport into outData. 356 */ 357void StatsLogProcessor::onConfigMetricsReportLocked(const ConfigKey& key, 358 const int64_t dumpTimeStampNs, 359 const bool include_current_partial_bucket, 360 const bool include_string, 361 const DumpReportReason dumpReportReason, 362 ProtoOutputStream* proto) { 363 // We already checked whether key exists in mMetricsManagers in 364 // WriteDataToDisk. 365 auto it = mMetricsManagers.find(key); 366 if (it == mMetricsManagers.end()) { 367 return; 368 } 369 int64_t lastReportTimeNs = it->second->getLastReportTimeNs(); 370 int64_t lastReportWallClockNs = it->second->getLastReportWallClockNs(); 371 372 std::set<string> str_set; 373 374 // First, fill in ConfigMetricsReport using current data on memory, which 375 // starts from filling in StatsLogReport's. 376 it->second->onDumpReport(dumpTimeStampNs, include_current_partial_bucket, 377 &str_set, proto); 378 379 // Fill in UidMap. 380 uint64_t uidMapToken = proto->start(FIELD_TYPE_MESSAGE | FIELD_ID_UID_MAP); 381 mUidMap->appendUidMap(dumpTimeStampNs, key, &str_set, proto); 382 proto->end(uidMapToken); 383 384 // Fill in the timestamps. 385 proto->write(FIELD_TYPE_INT64 | FIELD_ID_LAST_REPORT_ELAPSED_NANOS, 386 (long long)lastReportTimeNs); 387 proto->write(FIELD_TYPE_INT64 | FIELD_ID_CURRENT_REPORT_ELAPSED_NANOS, 388 (long long)dumpTimeStampNs); 389 proto->write(FIELD_TYPE_INT64 | FIELD_ID_LAST_REPORT_WALL_CLOCK_NANOS, 390 (long long)lastReportWallClockNs); 391 proto->write(FIELD_TYPE_INT64 | FIELD_ID_CURRENT_REPORT_WALL_CLOCK_NANOS, 392 (long long)getWallClockNs()); 393 // Dump report reason 394 proto->write(FIELD_TYPE_INT32 | FIELD_ID_DUMP_REPORT_REASON, dumpReportReason); 395 396 if (include_string) { 397 for (const auto& str : str_set) { 398 proto->write(FIELD_TYPE_STRING | FIELD_COUNT_REPEATED | FIELD_ID_STRINGS, str); 399 } 400 } 401} 402 403void StatsLogProcessor::resetConfigsLocked(const int64_t timestampNs, 404 const std::vector<ConfigKey>& configs) { 405 for (const auto& key : configs) { 406 StatsdConfig config; 407 if (StorageManager::readConfigFromDisk(key, &config)) { 408 OnConfigUpdatedLocked(timestampNs, key, config); 409 StatsdStats::getInstance().noteConfigReset(key); 410 } else { 411 ALOGE("Failed to read backup config from disk for : %s", key.ToString().c_str()); 412 auto it = mMetricsManagers.find(key); 413 if (it != mMetricsManagers.end()) { 414 it->second->refreshTtl(timestampNs); 415 } 416 } 417 } 418} 419 420void StatsLogProcessor::resetIfConfigTtlExpiredLocked(const int64_t timestampNs) { 421 std::vector<ConfigKey> configKeysTtlExpired; 422 for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) { 423 if (it->second != nullptr && !it->second->isInTtl(timestampNs)) { 424 configKeysTtlExpired.push_back(it->first); 425 } 426 } 427 if (configKeysTtlExpired.size() > 0) { 428 WriteDataToDiskLocked(CONFIG_RESET); 429 resetConfigsLocked(timestampNs, configKeysTtlExpired); 430 } 431} 432 433void StatsLogProcessor::OnConfigRemoved(const ConfigKey& key) { 434 std::lock_guard<std::mutex> lock(mMetricsMutex); 435 auto it = mMetricsManagers.find(key); 436 if (it != mMetricsManagers.end()) { 437 WriteDataToDiskLocked(key, getElapsedRealtimeNs(), CONFIG_REMOVED); 438 mMetricsManagers.erase(it); 439 mUidMap->OnConfigRemoved(key); 440 } 441 StatsdStats::getInstance().noteConfigRemoved(key); 442 443 mLastBroadcastTimes.erase(key); 444 445 if (mMetricsManagers.empty()) { 446 mStatsPullerManager.ForceClearPullerCache(); 447 } 448} 449 450void StatsLogProcessor::flushIfNecessaryLocked( 451 int64_t timestampNs, const ConfigKey& key, MetricsManager& metricsManager) { 452 auto lastCheckTime = mLastByteSizeTimes.find(key); 453 if (lastCheckTime != mLastByteSizeTimes.end()) { 454 if (timestampNs - lastCheckTime->second < StatsdStats::kMinByteSizeCheckPeriodNs) { 455 return; 456 } 457 } 458 459 // We suspect that the byteSize() computation is expensive, so we set a rate limit. 460 size_t totalBytes = metricsManager.byteSize(); 461 mLastByteSizeTimes[key] = timestampNs; 462 if (totalBytes > 463 StatsdStats::kMaxMetricsBytesPerConfig) { // Too late. We need to start clearing data. 464 metricsManager.dropData(timestampNs); 465 StatsdStats::getInstance().noteDataDropped(key); 466 VLOG("StatsD had to toss out metrics for %s", key.ToString().c_str()); 467 } else if (totalBytes > StatsdStats::kBytesPerConfigTriggerGetData) { 468 // Send broadcast so that receivers can pull data. 469 auto lastBroadcastTime = mLastBroadcastTimes.find(key); 470 if (lastBroadcastTime != mLastBroadcastTimes.end()) { 471 if (timestampNs - lastBroadcastTime->second < StatsdStats::kMinBroadcastPeriodNs) { 472 VLOG("StatsD would've sent a broadcast but the rate limit stopped us."); 473 return; 474 } 475 } 476 mLastBroadcastTimes[key] = timestampNs; 477 VLOG("StatsD requesting broadcast for %s", key.ToString().c_str()); 478 mSendBroadcast(key); 479 StatsdStats::getInstance().noteBroadcastSent(key); 480 } 481} 482 483void StatsLogProcessor::WriteDataToDiskLocked(const ConfigKey& key, 484 const int64_t timestampNs, 485 const DumpReportReason dumpReportReason) { 486 if (mMetricsManagers.find(key) == mMetricsManagers.end()) { 487 return; 488 } 489 ProtoOutputStream proto; 490 onConfigMetricsReportLocked(key, timestampNs, 491 true /* include_current_partial_bucket*/, 492 false /* include strings */, dumpReportReason, &proto); 493 string file_name = StringPrintf("%s/%ld_%d_%lld", STATS_DATA_DIR, 494 (long)getWallClockSec(), key.GetUid(), (long long)key.GetId()); 495 android::base::unique_fd fd(open(file_name.c_str(), 496 O_WRONLY | O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR)); 497 if (fd == -1) { 498 ALOGE("Attempt to write %s but failed", file_name.c_str()); 499 return; 500 } 501 proto.flush(fd.get()); 502} 503 504void StatsLogProcessor::WriteDataToDiskLocked(const DumpReportReason dumpReportReason) { 505 const int64_t timeNs = getElapsedRealtimeNs(); 506 for (auto& pair : mMetricsManagers) { 507 WriteDataToDiskLocked(pair.first, timeNs, dumpReportReason); 508 } 509} 510 511void StatsLogProcessor::WriteDataToDisk(const DumpReportReason dumpReportReason) { 512 std::lock_guard<std::mutex> lock(mMetricsMutex); 513 WriteDataToDiskLocked(dumpReportReason); 514} 515 516void StatsLogProcessor::informPullAlarmFired(const int64_t timestampNs) { 517 std::lock_guard<std::mutex> lock(mMetricsMutex); 518 mStatsPullerManager.OnAlarmFired(timestampNs); 519} 520 521int64_t StatsLogProcessor::getLastReportTimeNs(const ConfigKey& key) { 522 auto it = mMetricsManagers.find(key); 523 if (it == mMetricsManagers.end()) { 524 return 0; 525 } else { 526 return it->second->getLastReportTimeNs(); 527 } 528} 529 530} // namespace statsd 531} // namespace os 532} // namespace android 533