StatsLogProcessor.cpp revision 56ae0d9a48212c6e886e5887a6f9191f3020af40
1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define DEBUG false // STOPSHIP if true
18#include "Log.h"
19#include "statslog.h"
20
21#include <android-base/file.h>
22#include <dirent.h>
23#include "StatsLogProcessor.h"
24#include "stats_log_util.h"
25#include "android-base/stringprintf.h"
26#include "guardrail/StatsdStats.h"
27#include "metrics/CountMetricProducer.h"
28#include "external/StatsPullerManager.h"
29#include "stats_util.h"
30#include "storage/StorageManager.h"
31
32#include <log/log_event_list.h>
33#include <utils/Errors.h>
34#include <utils/SystemClock.h>
35
36using namespace android;
37using android::base::StringPrintf;
38using android::util::FIELD_COUNT_REPEATED;
39using android::util::FIELD_TYPE_BOOL;
40using android::util::FIELD_TYPE_FLOAT;
41using android::util::FIELD_TYPE_INT32;
42using android::util::FIELD_TYPE_INT64;
43using android::util::FIELD_TYPE_MESSAGE;
44using android::util::FIELD_TYPE_STRING;
45using android::util::ProtoOutputStream;
46using std::make_unique;
47using std::unique_ptr;
48using std::vector;
49
50namespace android {
51namespace os {
52namespace statsd {
53
54// for ConfigMetricsReportList
55const int FIELD_ID_CONFIG_KEY = 1;
56const int FIELD_ID_REPORTS = 2;
57// for ConfigKey
58const int FIELD_ID_UID = 1;
59const int FIELD_ID_ID = 2;
60// for ConfigMetricsReport
61// const int FIELD_ID_METRICS = 1; // written in MetricsManager.cpp
62const int FIELD_ID_UID_MAP = 2;
63const int FIELD_ID_LAST_REPORT_ELAPSED_NANOS = 3;
64const int FIELD_ID_CURRENT_REPORT_ELAPSED_NANOS = 4;
65const int FIELD_ID_LAST_REPORT_WALL_CLOCK_NANOS = 5;
66const int FIELD_ID_CURRENT_REPORT_WALL_CLOCK_NANOS = 6;
67const int FIELD_ID_DUMP_REPORT_REASON = 8;
68const int FIELD_ID_STRINGS = 9;
69
70#define NS_PER_HOUR 3600 * NS_PER_SEC
71
72#define STATS_DATA_DIR "/data/misc/stats-data"
73
74StatsLogProcessor::StatsLogProcessor(const sp<UidMap>& uidMap,
75                                     const sp<AlarmMonitor>& anomalyAlarmMonitor,
76                                     const sp<AlarmMonitor>& periodicAlarmMonitor,
77                                     const int64_t timeBaseNs,
78                                     const std::function<bool(const ConfigKey&)>& sendBroadcast)
79    : mUidMap(uidMap),
80      mAnomalyAlarmMonitor(anomalyAlarmMonitor),
81      mPeriodicAlarmMonitor(periodicAlarmMonitor),
82      mSendBroadcast(sendBroadcast),
83      mTimeBaseNs(timeBaseNs),
84      mLargestTimestampSeen(0),
85      mLastTimestampSeen(0) {
86}
87
88StatsLogProcessor::~StatsLogProcessor() {
89}
90
91void StatsLogProcessor::onAnomalyAlarmFired(
92        const int64_t& timestampNs,
93        unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>> alarmSet) {
94    std::lock_guard<std::mutex> lock(mMetricsMutex);
95    for (const auto& itr : mMetricsManagers) {
96        itr.second->onAnomalyAlarmFired(timestampNs, alarmSet);
97    }
98}
99void StatsLogProcessor::onPeriodicAlarmFired(
100        const int64_t& timestampNs,
101        unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>> alarmSet) {
102
103    std::lock_guard<std::mutex> lock(mMetricsMutex);
104    for (const auto& itr : mMetricsManagers) {
105        itr.second->onPeriodicAlarmFired(timestampNs, alarmSet);
106    }
107}
108
109void updateUid(Value* value, int hostUid) {
110    int uid = value->int_value;
111    if (uid != hostUid) {
112        value->setInt(hostUid);
113    }
114}
115
116void StatsLogProcessor::mapIsolatedUidToHostUidIfNecessaryLocked(LogEvent* event) const {
117    if (android::util::AtomsInfo::kAtomsWithAttributionChain.find(event->GetTagId()) !=
118        android::util::AtomsInfo::kAtomsWithAttributionChain.end()) {
119        for (auto& value : *(event->getMutableValues())) {
120            if (value.mField.getPosAtDepth(0) > kAttributionField) {
121                break;
122            }
123            if (isAttributionUidField(value)) {
124                const int hostUid = mUidMap->getHostUidOrSelf(value.mValue.int_value);
125                updateUid(&value.mValue, hostUid);
126            }
127        }
128    } else {
129        auto it = android::util::AtomsInfo::kAtomsWithUidField.find(event->GetTagId());
130        if (it != android::util::AtomsInfo::kAtomsWithUidField.end()) {
131            int uidField = it->second;  // uidField is the field number in proto,
132                                        // starting from 1
133            if (uidField > 0 && (int)event->getValues().size() >= uidField &&
134                (event->getValues())[uidField - 1].mValue.getType() == INT) {
135                Value& value = (*event->getMutableValues())[uidField - 1].mValue;
136                const int hostUid = mUidMap->getHostUidOrSelf(value.int_value);
137                updateUid(&value, hostUid);
138            } else {
139                ALOGE("Malformed log, uid not found. %s", event->ToString().c_str());
140            }
141        }
142    }
143}
144
145void StatsLogProcessor::onIsolatedUidChangedEventLocked(const LogEvent& event) {
146    status_t err = NO_ERROR, err2 = NO_ERROR, err3 = NO_ERROR;
147    bool is_create = event.GetBool(3, &err);
148    auto parent_uid = int(event.GetLong(1, &err2));
149    auto isolated_uid = int(event.GetLong(2, &err3));
150    if (err == NO_ERROR && err2 == NO_ERROR && err3 == NO_ERROR) {
151        if (is_create) {
152            mUidMap->assignIsolatedUid(isolated_uid, parent_uid);
153        } else {
154            mUidMap->removeIsolatedUid(isolated_uid, parent_uid);
155        }
156    } else {
157        ALOGE("Failed to parse uid in the isolated uid change event.");
158    }
159}
160
161void StatsLogProcessor::OnLogEvent(LogEvent* event) {
162    OnLogEvent(event, false);
163}
164
165void StatsLogProcessor::resetConfigs() {
166    std::lock_guard<std::mutex> lock(mMetricsMutex);
167    resetConfigsLocked(getElapsedRealtimeNs());
168}
169
170void StatsLogProcessor::resetConfigsLocked(const int64_t timestampNs) {
171    std::vector<ConfigKey> configKeys;
172    for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) {
173        configKeys.push_back(it->first);
174    }
175    resetConfigsLocked(timestampNs, configKeys);
176}
177
178void StatsLogProcessor::OnLogEvent(LogEvent* event, bool reconnected) {
179    std::lock_guard<std::mutex> lock(mMetricsMutex);
180
181#ifdef VERY_VERBOSE_PRINTING
182    if (mPrintAllLogs) {
183        ALOGI("%s", event->ToString().c_str());
184    }
185#endif
186    const int64_t currentTimestampNs = event->GetElapsedTimestampNs();
187
188    if (reconnected && mLastTimestampSeen != 0) {
189        // LogReader tells us the connection has just been reset. Now we need
190        // to enter reconnection state to find the last CP.
191        mInReconnection = true;
192    }
193
194    if (mInReconnection) {
195        // We see the checkpoint
196        if (currentTimestampNs == mLastTimestampSeen) {
197            mInReconnection = false;
198            // Found the CP. ignore this event, and we will start to read from next event.
199            return;
200        }
201        if (currentTimestampNs > mLargestTimestampSeen) {
202            // We see a new log but CP has not been found yet. Give up now.
203            mLogLossCount++;
204            mInReconnection = false;
205            StatsdStats::getInstance().noteLogLost(currentTimestampNs);
206            // Persist the data before we reset. Do we want this?
207            WriteDataToDiskLocked(CONFIG_RESET);
208            // We see fresher event before we see the checkpoint. We might have lost data.
209            // The best we can do is to reset.
210            resetConfigsLocked(currentTimestampNs);
211        } else {
212            // Still in search of the CP. Keep going.
213            return;
214        }
215    }
216
217    mLogCount++;
218    mLastTimestampSeen = currentTimestampNs;
219    if (mLargestTimestampSeen < currentTimestampNs) {
220        mLargestTimestampSeen = currentTimestampNs;
221    }
222
223    resetIfConfigTtlExpiredLocked(currentTimestampNs);
224
225    StatsdStats::getInstance().noteAtomLogged(
226        event->GetTagId(), event->GetElapsedTimestampNs() / NS_PER_SEC);
227
228    // Hard-coded logic to update the isolated uid's in the uid-map.
229    // The field numbers need to be currently updated by hand with atoms.proto
230    if (event->GetTagId() == android::util::ISOLATED_UID_CHANGED) {
231        onIsolatedUidChangedEventLocked(*event);
232    }
233
234    if (mMetricsManagers.empty()) {
235        return;
236    }
237
238    int64_t curTimeSec = getElapsedRealtimeSec();
239    if (curTimeSec - mLastPullerCacheClearTimeSec > StatsdStats::kPullerCacheClearIntervalSec) {
240        mStatsPullerManager.ClearPullerCacheIfNecessary(curTimeSec * NS_PER_SEC);
241        mLastPullerCacheClearTimeSec = curTimeSec;
242    }
243
244
245    if (event->GetTagId() != android::util::ISOLATED_UID_CHANGED) {
246        // Map the isolated uid to host uid if necessary.
247        mapIsolatedUidToHostUidIfNecessaryLocked(event);
248    }
249
250    // pass the event to metrics managers.
251    for (auto& pair : mMetricsManagers) {
252        pair.second->onLogEvent(*event);
253        flushIfNecessaryLocked(event->GetElapsedTimestampNs(), pair.first, *(pair.second));
254    }
255}
256
257void StatsLogProcessor::OnConfigUpdated(const int64_t timestampNs, const ConfigKey& key,
258                                        const StatsdConfig& config) {
259    std::lock_guard<std::mutex> lock(mMetricsMutex);
260    WriteDataToDiskLocked(key, timestampNs, CONFIG_UPDATED);
261    OnConfigUpdatedLocked(timestampNs, key, config);
262}
263
264void StatsLogProcessor::OnConfigUpdatedLocked(
265        const int64_t timestampNs, const ConfigKey& key, const StatsdConfig& config) {
266    VLOG("Updated configuration for key %s", key.ToString().c_str());
267    sp<MetricsManager> newMetricsManager =
268        new MetricsManager(key, config, mTimeBaseNs, timestampNs, mUidMap,
269                           mAnomalyAlarmMonitor, mPeriodicAlarmMonitor);
270    if (newMetricsManager->isConfigValid()) {
271        mUidMap->OnConfigUpdated(key);
272        if (newMetricsManager->shouldAddUidMapListener()) {
273            // We have to add listener after the MetricsManager is constructed because it's
274            // not safe to create wp or sp from this pointer inside its constructor.
275            mUidMap->addListener(newMetricsManager.get());
276        }
277        newMetricsManager->refreshTtl(timestampNs);
278        mMetricsManagers[key] = newMetricsManager;
279        VLOG("StatsdConfig valid");
280    } else {
281        // If there is any error in the config, don't use it.
282        ALOGE("StatsdConfig NOT valid");
283    }
284}
285
286size_t StatsLogProcessor::GetMetricsSize(const ConfigKey& key) const {
287    std::lock_guard<std::mutex> lock(mMetricsMutex);
288    auto it = mMetricsManagers.find(key);
289    if (it == mMetricsManagers.end()) {
290        ALOGW("Config source %s does not exist", key.ToString().c_str());
291        return 0;
292    }
293    return it->second->byteSize();
294}
295
296void StatsLogProcessor::dumpStates(FILE* out, bool verbose) {
297    std::lock_guard<std::mutex> lock(mMetricsMutex);
298    fprintf(out, "MetricsManager count: %lu\n", (unsigned long)mMetricsManagers.size());
299    for (auto metricsManager : mMetricsManagers) {
300        metricsManager.second->dumpStates(out, verbose);
301    }
302}
303
304/*
305 * onDumpReport dumps serialized ConfigMetricsReportList into outData.
306 */
307void StatsLogProcessor::onDumpReport(const ConfigKey& key, const int64_t dumpTimeStampNs,
308                                     const bool include_current_partial_bucket,
309                                     const DumpReportReason dumpReportReason,
310                                     vector<uint8_t>* outData) {
311    std::lock_guard<std::mutex> lock(mMetricsMutex);
312
313    ProtoOutputStream proto;
314
315    // Start of ConfigKey.
316    uint64_t configKeyToken = proto.start(FIELD_TYPE_MESSAGE | FIELD_ID_CONFIG_KEY);
317    proto.write(FIELD_TYPE_INT32 | FIELD_ID_UID, key.GetUid());
318    proto.write(FIELD_TYPE_INT64 | FIELD_ID_ID, (long long)key.GetId());
319    proto.end(configKeyToken);
320    // End of ConfigKey.
321
322    // Then, check stats-data directory to see there's any file containing
323    // ConfigMetricsReport from previous shutdowns to concatenate to reports.
324    StorageManager::appendConfigMetricsReport(key, &proto);
325
326    auto it = mMetricsManagers.find(key);
327    if (it != mMetricsManagers.end()) {
328        // This allows another broadcast to be sent within the rate-limit period if we get close to
329        // filling the buffer again soon.
330        mLastBroadcastTimes.erase(key);
331
332        // Start of ConfigMetricsReport (reports).
333        uint64_t reportsToken =
334                proto.start(FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_REPORTS);
335        onConfigMetricsReportLocked(key, dumpTimeStampNs, include_current_partial_bucket,
336                                    dumpReportReason, &proto);
337        proto.end(reportsToken);
338        // End of ConfigMetricsReport (reports).
339    } else {
340        ALOGW("Config source %s does not exist", key.ToString().c_str());
341    }
342
343    if (outData != nullptr) {
344        outData->clear();
345        outData->resize(proto.size());
346        size_t pos = 0;
347        auto iter = proto.data();
348        while (iter.readBuffer() != NULL) {
349            size_t toRead = iter.currentToRead();
350            std::memcpy(&((*outData)[pos]), iter.readBuffer(), toRead);
351            pos += toRead;
352            iter.rp()->move(toRead);
353        }
354    }
355
356    StatsdStats::getInstance().noteMetricsReportSent(key, proto.size());
357}
358
359/*
360 * onConfigMetricsReportLocked dumps serialized ConfigMetricsReport into outData.
361 */
362void StatsLogProcessor::onConfigMetricsReportLocked(const ConfigKey& key,
363                                                    const int64_t dumpTimeStampNs,
364                                                    const bool include_current_partial_bucket,
365                                                    const DumpReportReason dumpReportReason,
366                                                    ProtoOutputStream* proto) {
367    // We already checked whether key exists in mMetricsManagers in
368    // WriteDataToDisk.
369    auto it = mMetricsManagers.find(key);
370    if (it == mMetricsManagers.end()) {
371        return;
372    }
373    int64_t lastReportTimeNs = it->second->getLastReportTimeNs();
374    int64_t lastReportWallClockNs = it->second->getLastReportWallClockNs();
375
376    std::set<string> str_set;
377
378    // First, fill in ConfigMetricsReport using current data on memory, which
379    // starts from filling in StatsLogReport's.
380    it->second->onDumpReport(dumpTimeStampNs, include_current_partial_bucket,
381                             &str_set, proto);
382
383    // Fill in UidMap if there is at least one metric to report.
384    // This skips the uid map if it's an empty config.
385    if (it->second->getNumMetrics() > 0) {
386        uint64_t uidMapToken = proto->start(FIELD_TYPE_MESSAGE | FIELD_ID_UID_MAP);
387        mUidMap->appendUidMap(dumpTimeStampNs, key, &str_set, proto);
388        proto->end(uidMapToken);
389    }
390
391    // Fill in the timestamps.
392    proto->write(FIELD_TYPE_INT64 | FIELD_ID_LAST_REPORT_ELAPSED_NANOS,
393                (long long)lastReportTimeNs);
394    proto->write(FIELD_TYPE_INT64 | FIELD_ID_CURRENT_REPORT_ELAPSED_NANOS,
395                (long long)dumpTimeStampNs);
396    proto->write(FIELD_TYPE_INT64 | FIELD_ID_LAST_REPORT_WALL_CLOCK_NANOS,
397                (long long)lastReportWallClockNs);
398    proto->write(FIELD_TYPE_INT64 | FIELD_ID_CURRENT_REPORT_WALL_CLOCK_NANOS,
399                (long long)getWallClockNs());
400    // Dump report reason
401    proto->write(FIELD_TYPE_INT32 | FIELD_ID_DUMP_REPORT_REASON, dumpReportReason);
402
403    for (const auto& str : str_set) {
404        proto->write(FIELD_TYPE_STRING | FIELD_COUNT_REPEATED | FIELD_ID_STRINGS, str);
405    }
406}
407
408void StatsLogProcessor::resetConfigsLocked(const int64_t timestampNs,
409                                           const std::vector<ConfigKey>& configs) {
410    for (const auto& key : configs) {
411        StatsdConfig config;
412        if (StorageManager::readConfigFromDisk(key, &config)) {
413            OnConfigUpdatedLocked(timestampNs, key, config);
414            StatsdStats::getInstance().noteConfigReset(key);
415        } else {
416            ALOGE("Failed to read backup config from disk for : %s", key.ToString().c_str());
417            auto it = mMetricsManagers.find(key);
418            if (it != mMetricsManagers.end()) {
419                it->second->refreshTtl(timestampNs);
420            }
421        }
422    }
423}
424
425void StatsLogProcessor::resetIfConfigTtlExpiredLocked(const int64_t timestampNs) {
426    std::vector<ConfigKey> configKeysTtlExpired;
427    for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) {
428        if (it->second != nullptr && !it->second->isInTtl(timestampNs)) {
429            configKeysTtlExpired.push_back(it->first);
430        }
431    }
432    if (configKeysTtlExpired.size() > 0) {
433        WriteDataToDiskLocked(CONFIG_RESET);
434        resetConfigsLocked(timestampNs, configKeysTtlExpired);
435    }
436}
437
438void StatsLogProcessor::OnConfigRemoved(const ConfigKey& key) {
439    std::lock_guard<std::mutex> lock(mMetricsMutex);
440    auto it = mMetricsManagers.find(key);
441    if (it != mMetricsManagers.end()) {
442        WriteDataToDiskLocked(key, getElapsedRealtimeNs(), CONFIG_REMOVED);
443        mMetricsManagers.erase(it);
444        mUidMap->OnConfigRemoved(key);
445    }
446    StatsdStats::getInstance().noteConfigRemoved(key);
447
448    mLastBroadcastTimes.erase(key);
449
450    if (mMetricsManagers.empty()) {
451        mStatsPullerManager.ForceClearPullerCache();
452    }
453}
454
455void StatsLogProcessor::flushIfNecessaryLocked(
456    int64_t timestampNs, const ConfigKey& key, MetricsManager& metricsManager) {
457    auto lastCheckTime = mLastByteSizeTimes.find(key);
458    if (lastCheckTime != mLastByteSizeTimes.end()) {
459        if (timestampNs - lastCheckTime->second < StatsdStats::kMinByteSizeCheckPeriodNs) {
460            return;
461        }
462    }
463
464    // We suspect that the byteSize() computation is expensive, so we set a rate limit.
465    size_t totalBytes = metricsManager.byteSize();
466    mLastByteSizeTimes[key] = timestampNs;
467    bool requestDump = false;
468    if (totalBytes >
469        StatsdStats::kMaxMetricsBytesPerConfig) {  // Too late. We need to start clearing data.
470        metricsManager.dropData(timestampNs);
471        StatsdStats::getInstance().noteDataDropped(key);
472        VLOG("StatsD had to toss out metrics for %s", key.ToString().c_str());
473    } else if ((totalBytes > StatsdStats::kBytesPerConfigTriggerGetData) ||
474               (mOnDiskDataConfigs.find(key) != mOnDiskDataConfigs.end())) {
475        // Request to send a broadcast if:
476        // 1. in memory data > threshold   OR
477        // 2. config has old data report on disk.
478        requestDump = true;
479    }
480
481    if (requestDump) {
482        // Send broadcast so that receivers can pull data.
483        auto lastBroadcastTime = mLastBroadcastTimes.find(key);
484        if (lastBroadcastTime != mLastBroadcastTimes.end()) {
485            if (timestampNs - lastBroadcastTime->second < StatsdStats::kMinBroadcastPeriodNs) {
486                VLOG("StatsD would've sent a broadcast but the rate limit stopped us.");
487                return;
488            }
489        }
490        if (mSendBroadcast(key)) {
491            mOnDiskDataConfigs.erase(key);
492            VLOG("StatsD triggered data fetch for %s", key.ToString().c_str());
493            mLastBroadcastTimes[key] = timestampNs;
494            StatsdStats::getInstance().noteBroadcastSent(key);
495        }
496    }
497}
498
499void StatsLogProcessor::WriteDataToDiskLocked(const ConfigKey& key,
500                                              const int64_t timestampNs,
501                                              const DumpReportReason dumpReportReason) {
502    if (mMetricsManagers.find(key) == mMetricsManagers.end() ||
503        !mMetricsManagers.find(key)->second->shouldWriteToDisk()) {
504        return;
505    }
506    ProtoOutputStream proto;
507    onConfigMetricsReportLocked(key, timestampNs, true /* include_current_partial_bucket*/,
508                                dumpReportReason, &proto);
509    string file_name = StringPrintf("%s/%ld_%d_%lld", STATS_DATA_DIR,
510         (long)getWallClockSec(), key.GetUid(), (long long)key.GetId());
511    android::base::unique_fd fd(open(file_name.c_str(),
512                                O_WRONLY | O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR));
513    if (fd == -1) {
514        ALOGE("Attempt to write %s but failed", file_name.c_str());
515        return;
516    }
517    proto.flush(fd.get());
518    // We were able to write the ConfigMetricsReport to disk, so we should trigger collection ASAP.
519    mOnDiskDataConfigs.insert(key);
520}
521
522void StatsLogProcessor::WriteDataToDiskLocked(const DumpReportReason dumpReportReason) {
523    const int64_t timeNs = getElapsedRealtimeNs();
524    for (auto& pair : mMetricsManagers) {
525        WriteDataToDiskLocked(pair.first, timeNs, dumpReportReason);
526    }
527}
528
529void StatsLogProcessor::WriteDataToDisk(const DumpReportReason dumpReportReason) {
530    std::lock_guard<std::mutex> lock(mMetricsMutex);
531    WriteDataToDiskLocked(dumpReportReason);
532}
533
534void StatsLogProcessor::informPullAlarmFired(const int64_t timestampNs) {
535    std::lock_guard<std::mutex> lock(mMetricsMutex);
536    mStatsPullerManager.OnAlarmFired(timestampNs);
537}
538
539int64_t StatsLogProcessor::getLastReportTimeNs(const ConfigKey& key) {
540    auto it = mMetricsManagers.find(key);
541    if (it == mMetricsManagers.end()) {
542        return 0;
543    } else {
544        return it->second->getLastReportTimeNs();
545    }
546}
547
548void StatsLogProcessor::noteOnDiskData(const ConfigKey& key) {
549    std::lock_guard<std::mutex> lock(mMetricsMutex);
550    mOnDiskDataConfigs.insert(key);
551}
552
553}  // namespace statsd
554}  // namespace os
555}  // namespace android
556