StatsLogProcessor.cpp revision 892f3d32293b8358c72da4a94633827abbda640b
1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define DEBUG true  // STOPSHIP if true
18#include "Log.h"
19#include "statslog.h"
20
21#include <android-base/file.h>
22#include <dirent.h>
23#include "StatsLogProcessor.h"
24#include "stats_log_util.h"
25#include "android-base/stringprintf.h"
26#include "guardrail/StatsdStats.h"
27#include "metrics/CountMetricProducer.h"
28#include "external/StatsPullerManager.h"
29#include "stats_util.h"
30#include "storage/StorageManager.h"
31
32#include <log/log_event_list.h>
33#include <utils/Errors.h>
34#include <utils/SystemClock.h>
35
36using namespace android;
37using android::base::StringPrintf;
38using android::util::FIELD_COUNT_REPEATED;
39using android::util::FIELD_TYPE_BOOL;
40using android::util::FIELD_TYPE_FLOAT;
41using android::util::FIELD_TYPE_INT32;
42using android::util::FIELD_TYPE_INT64;
43using android::util::FIELD_TYPE_MESSAGE;
44using android::util::FIELD_TYPE_STRING;
45using android::util::ProtoOutputStream;
46using std::make_unique;
47using std::unique_ptr;
48using std::vector;
49
50namespace android {
51namespace os {
52namespace statsd {
53
54// for ConfigMetricsReportList
55const int FIELD_ID_CONFIG_KEY = 1;
56const int FIELD_ID_REPORTS = 2;
57// for ConfigKey
58const int FIELD_ID_UID = 1;
59const int FIELD_ID_ID = 2;
60// for ConfigMetricsReport
61// const int FIELD_ID_METRICS = 1; // written in MetricsManager.cpp
62const int FIELD_ID_UID_MAP = 2;
63const int FIELD_ID_LAST_REPORT_ELAPSED_NANOS = 3;
64const int FIELD_ID_CURRENT_REPORT_ELAPSED_NANOS = 4;
65const int FIELD_ID_LAST_REPORT_WALL_CLOCK_NANOS = 5;
66const int FIELD_ID_CURRENT_REPORT_WALL_CLOCK_NANOS = 6;
67const int FIELD_ID_DUMP_REPORT_REASON = 8;
68const int FIELD_ID_STRINGS = 9;
69
70#define NS_PER_HOUR 3600 * NS_PER_SEC
71
72#define STATS_DATA_DIR "/data/misc/stats-data"
73
74StatsLogProcessor::StatsLogProcessor(const sp<UidMap>& uidMap,
75                                     const sp<AlarmMonitor>& anomalyAlarmMonitor,
76                                     const sp<AlarmMonitor>& periodicAlarmMonitor,
77                                     const int64_t timeBaseNs,
78                                     const std::function<void(const ConfigKey&)>& sendBroadcast)
79    : mUidMap(uidMap),
80      mAnomalyAlarmMonitor(anomalyAlarmMonitor),
81      mPeriodicAlarmMonitor(periodicAlarmMonitor),
82      mSendBroadcast(sendBroadcast),
83      mTimeBaseNs(timeBaseNs),
84      mLargestTimestampSeen(0),
85      mLastTimestampSeen(0) {
86}
87
88StatsLogProcessor::~StatsLogProcessor() {
89}
90
91void StatsLogProcessor::onAnomalyAlarmFired(
92        const int64_t& timestampNs,
93        unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>> alarmSet) {
94    std::lock_guard<std::mutex> lock(mMetricsMutex);
95    for (const auto& itr : mMetricsManagers) {
96        itr.second->onAnomalyAlarmFired(timestampNs, alarmSet);
97    }
98}
99void StatsLogProcessor::onPeriodicAlarmFired(
100        const int64_t& timestampNs,
101        unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>> alarmSet) {
102
103    std::lock_guard<std::mutex> lock(mMetricsMutex);
104    for (const auto& itr : mMetricsManagers) {
105        itr.second->onPeriodicAlarmFired(timestampNs, alarmSet);
106    }
107}
108
109void updateUid(Value* value, int hostUid) {
110    int uid = value->int_value;
111    if (uid != hostUid) {
112        value->setInt(hostUid);
113    }
114}
115
116void StatsLogProcessor::mapIsolatedUidToHostUidIfNecessaryLocked(LogEvent* event) const {
117    if (android::util::AtomsInfo::kAtomsWithAttributionChain.find(event->GetTagId()) !=
118        android::util::AtomsInfo::kAtomsWithAttributionChain.end()) {
119        for (auto& value : *(event->getMutableValues())) {
120            if (value.mField.getPosAtDepth(0) > kAttributionField) {
121                break;
122            }
123            if (isAttributionUidField(value)) {
124                const int hostUid = mUidMap->getHostUidOrSelf(value.mValue.int_value);
125                updateUid(&value.mValue, hostUid);
126            }
127        }
128    } else {
129        auto it = android::util::AtomsInfo::kAtomsWithUidField.find(event->GetTagId());
130        if (it != android::util::AtomsInfo::kAtomsWithUidField.end()) {
131            int uidField = it->second;  // uidField is the field number in proto,
132                                        // starting from 1
133            if (uidField > 0 && (int)event->getValues().size() >= uidField &&
134                (event->getValues())[uidField - 1].mValue.getType() == INT) {
135                Value& value = (*event->getMutableValues())[uidField - 1].mValue;
136                const int hostUid = mUidMap->getHostUidOrSelf(value.int_value);
137                updateUid(&value, hostUid);
138            } else {
139                ALOGE("Malformed log, uid not found. %s", event->ToString().c_str());
140            }
141        }
142    }
143}
144
145void StatsLogProcessor::onIsolatedUidChangedEventLocked(const LogEvent& event) {
146    status_t err = NO_ERROR, err2 = NO_ERROR, err3 = NO_ERROR;
147    bool is_create = event.GetBool(3, &err);
148    auto parent_uid = int(event.GetLong(1, &err2));
149    auto isolated_uid = int(event.GetLong(2, &err3));
150    if (err == NO_ERROR && err2 == NO_ERROR && err3 == NO_ERROR) {
151        if (is_create) {
152            mUidMap->assignIsolatedUid(isolated_uid, parent_uid);
153        } else {
154            mUidMap->removeIsolatedUid(isolated_uid, parent_uid);
155        }
156    } else {
157        ALOGE("Failed to parse uid in the isolated uid change event.");
158    }
159}
160
161void StatsLogProcessor::OnLogEvent(LogEvent* event) {
162    OnLogEvent(event, false);
163}
164
165void StatsLogProcessor::resetConfigs() {
166    std::lock_guard<std::mutex> lock(mMetricsMutex);
167    resetConfigsLocked(getElapsedRealtimeNs());
168}
169
170void StatsLogProcessor::resetConfigsLocked(const int64_t timestampNs) {
171    std::vector<ConfigKey> configKeys;
172    for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) {
173        configKeys.push_back(it->first);
174    }
175    resetConfigsLocked(timestampNs, configKeys);
176}
177
178void StatsLogProcessor::OnLogEvent(LogEvent* event, bool reconnected) {
179    std::lock_guard<std::mutex> lock(mMetricsMutex);
180    const int64_t currentTimestampNs = event->GetElapsedTimestampNs();
181
182    if (reconnected && mLastTimestampSeen != 0) {
183        // LogReader tells us the connection has just been reset. Now we need
184        // to enter reconnection state to find the last CP.
185        mInReconnection = true;
186    }
187
188    if (mInReconnection) {
189        // We see the checkpoint
190        if (currentTimestampNs == mLastTimestampSeen) {
191            mInReconnection = false;
192            // Found the CP. ignore this event, and we will start to read from next event.
193            return;
194        }
195        if (currentTimestampNs > mLargestTimestampSeen) {
196            // We see a new log but CP has not been found yet. Give up now.
197            mLogLossCount++;
198            mInReconnection = false;
199            StatsdStats::getInstance().noteLogLost(currentTimestampNs);
200            // Persist the data before we reset. Do we want this?
201            WriteDataToDiskLocked(CONFIG_RESET);
202            // We see fresher event before we see the checkpoint. We might have lost data.
203            // The best we can do is to reset.
204            resetConfigsLocked(currentTimestampNs);
205        } else {
206            // Still in search of the CP. Keep going.
207            return;
208        }
209    }
210
211    mLogCount++;
212    mLastTimestampSeen = currentTimestampNs;
213    if (mLargestTimestampSeen < currentTimestampNs) {
214        mLargestTimestampSeen = currentTimestampNs;
215    }
216
217    resetIfConfigTtlExpiredLocked(currentTimestampNs);
218
219    StatsdStats::getInstance().noteAtomLogged(
220        event->GetTagId(), event->GetElapsedTimestampNs() / NS_PER_SEC);
221
222    // Hard-coded logic to update the isolated uid's in the uid-map.
223    // The field numbers need to be currently updated by hand with atoms.proto
224    if (event->GetTagId() == android::util::ISOLATED_UID_CHANGED) {
225        onIsolatedUidChangedEventLocked(*event);
226    }
227
228    if (mMetricsManagers.empty()) {
229        return;
230    }
231
232    int64_t curTimeSec = getElapsedRealtimeSec();
233    if (curTimeSec - mLastPullerCacheClearTimeSec > StatsdStats::kPullerCacheClearIntervalSec) {
234        mStatsPullerManager.ClearPullerCacheIfNecessary(curTimeSec * NS_PER_SEC);
235        mLastPullerCacheClearTimeSec = curTimeSec;
236    }
237
238
239    if (event->GetTagId() != android::util::ISOLATED_UID_CHANGED) {
240        // Map the isolated uid to host uid if necessary.
241        mapIsolatedUidToHostUidIfNecessaryLocked(event);
242    }
243
244    // pass the event to metrics managers.
245    for (auto& pair : mMetricsManagers) {
246        pair.second->onLogEvent(*event);
247        flushIfNecessaryLocked(event->GetElapsedTimestampNs(), pair.first, *(pair.second));
248    }
249}
250
251void StatsLogProcessor::OnConfigUpdated(const int64_t timestampNs, const ConfigKey& key,
252                                        const StatsdConfig& config) {
253    std::lock_guard<std::mutex> lock(mMetricsMutex);
254    WriteDataToDiskLocked(key, timestampNs, CONFIG_UPDATED);
255    OnConfigUpdatedLocked(timestampNs, key, config);
256}
257
258void StatsLogProcessor::OnConfigUpdatedLocked(
259        const int64_t timestampNs, const ConfigKey& key, const StatsdConfig& config) {
260    VLOG("Updated configuration for key %s", key.ToString().c_str());
261    sp<MetricsManager> newMetricsManager =
262        new MetricsManager(key, config, mTimeBaseNs, timestampNs, mUidMap,
263                           mAnomalyAlarmMonitor, mPeriodicAlarmMonitor);
264    if (newMetricsManager->isConfigValid()) {
265        mUidMap->OnConfigUpdated(key);
266        if (newMetricsManager->shouldAddUidMapListener()) {
267            // We have to add listener after the MetricsManager is constructed because it's
268            // not safe to create wp or sp from this pointer inside its constructor.
269            mUidMap->addListener(newMetricsManager.get());
270        }
271        newMetricsManager->refreshTtl(timestampNs);
272        mMetricsManagers[key] = newMetricsManager;
273        VLOG("StatsdConfig valid");
274    } else {
275        // If there is any error in the config, don't use it.
276        ALOGE("StatsdConfig NOT valid");
277    }
278}
279
280size_t StatsLogProcessor::GetMetricsSize(const ConfigKey& key) const {
281    std::lock_guard<std::mutex> lock(mMetricsMutex);
282    auto it = mMetricsManagers.find(key);
283    if (it == mMetricsManagers.end()) {
284        ALOGW("Config source %s does not exist", key.ToString().c_str());
285        return 0;
286    }
287    return it->second->byteSize();
288}
289
290void StatsLogProcessor::dumpStates(FILE* out, bool verbose) {
291    std::lock_guard<std::mutex> lock(mMetricsMutex);
292    fprintf(out, "MetricsManager count: %lu\n", (unsigned long)mMetricsManagers.size());
293    for (auto metricsManager : mMetricsManagers) {
294        metricsManager.second->dumpStates(out, verbose);
295    }
296}
297
298/*
299 * onDumpReport dumps serialized ConfigMetricsReportList into outData.
300 */
301void StatsLogProcessor::onDumpReport(const ConfigKey& key, const int64_t dumpTimeStampNs,
302                                     const bool include_current_partial_bucket,
303                                     const bool include_string,
304                                     const DumpReportReason dumpReportReason,
305                                     vector<uint8_t>* outData) {
306    std::lock_guard<std::mutex> lock(mMetricsMutex);
307
308    ProtoOutputStream proto;
309
310    // Start of ConfigKey.
311    uint64_t configKeyToken = proto.start(FIELD_TYPE_MESSAGE | FIELD_ID_CONFIG_KEY);
312    proto.write(FIELD_TYPE_INT32 | FIELD_ID_UID, key.GetUid());
313    proto.write(FIELD_TYPE_INT64 | FIELD_ID_ID, (long long)key.GetId());
314    proto.end(configKeyToken);
315    // End of ConfigKey.
316
317    // Then, check stats-data directory to see there's any file containing
318    // ConfigMetricsReport from previous shutdowns to concatenate to reports.
319    StorageManager::appendConfigMetricsReport(key, &proto);
320
321    auto it = mMetricsManagers.find(key);
322    if (it != mMetricsManagers.end()) {
323        // This allows another broadcast to be sent within the rate-limit period if we get close to
324        // filling the buffer again soon.
325        mLastBroadcastTimes.erase(key);
326
327        // Start of ConfigMetricsReport (reports).
328        uint64_t reportsToken =
329                proto.start(FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_REPORTS);
330        onConfigMetricsReportLocked(key, dumpTimeStampNs, include_current_partial_bucket,
331                                    include_string, dumpReportReason, &proto);
332        proto.end(reportsToken);
333        // End of ConfigMetricsReport (reports).
334    } else {
335        ALOGW("Config source %s does not exist", key.ToString().c_str());
336    }
337
338    if (outData != nullptr) {
339        outData->clear();
340        outData->resize(proto.size());
341        size_t pos = 0;
342        auto iter = proto.data();
343        while (iter.readBuffer() != NULL) {
344            size_t toRead = iter.currentToRead();
345            std::memcpy(&((*outData)[pos]), iter.readBuffer(), toRead);
346            pos += toRead;
347            iter.rp()->move(toRead);
348        }
349    }
350
351    StatsdStats::getInstance().noteMetricsReportSent(key, proto.size());
352}
353
354/*
355 * onConfigMetricsReportLocked dumps serialized ConfigMetricsReport into outData.
356 */
357void StatsLogProcessor::onConfigMetricsReportLocked(const ConfigKey& key,
358                                                    const int64_t dumpTimeStampNs,
359                                                    const bool include_current_partial_bucket,
360                                                    const bool include_string,
361                                                    const DumpReportReason dumpReportReason,
362                                                    ProtoOutputStream* proto) {
363    // We already checked whether key exists in mMetricsManagers in
364    // WriteDataToDisk.
365    auto it = mMetricsManagers.find(key);
366    if (it == mMetricsManagers.end()) {
367        return;
368    }
369    int64_t lastReportTimeNs = it->second->getLastReportTimeNs();
370    int64_t lastReportWallClockNs = it->second->getLastReportWallClockNs();
371
372    std::set<string> str_set;
373
374    // First, fill in ConfigMetricsReport using current data on memory, which
375    // starts from filling in StatsLogReport's.
376    it->second->onDumpReport(dumpTimeStampNs, include_current_partial_bucket,
377                             &str_set, proto);
378
379    // Fill in UidMap.
380    uint64_t uidMapToken = proto->start(FIELD_TYPE_MESSAGE | FIELD_ID_UID_MAP);
381    mUidMap->appendUidMap(dumpTimeStampNs, key, &str_set, proto);
382    proto->end(uidMapToken);
383
384    // Fill in the timestamps.
385    proto->write(FIELD_TYPE_INT64 | FIELD_ID_LAST_REPORT_ELAPSED_NANOS,
386                (long long)lastReportTimeNs);
387    proto->write(FIELD_TYPE_INT64 | FIELD_ID_CURRENT_REPORT_ELAPSED_NANOS,
388                (long long)dumpTimeStampNs);
389    proto->write(FIELD_TYPE_INT64 | FIELD_ID_LAST_REPORT_WALL_CLOCK_NANOS,
390                (long long)lastReportWallClockNs);
391    proto->write(FIELD_TYPE_INT64 | FIELD_ID_CURRENT_REPORT_WALL_CLOCK_NANOS,
392                (long long)getWallClockNs());
393    // Dump report reason
394    proto->write(FIELD_TYPE_INT32 | FIELD_ID_DUMP_REPORT_REASON, dumpReportReason);
395
396    if (include_string) {
397        for (const auto& str : str_set) {
398            proto->write(FIELD_TYPE_STRING | FIELD_COUNT_REPEATED | FIELD_ID_STRINGS, str);
399        }
400    }
401}
402
403void StatsLogProcessor::resetConfigsLocked(const int64_t timestampNs,
404                                           const std::vector<ConfigKey>& configs) {
405    for (const auto& key : configs) {
406        StatsdConfig config;
407        if (StorageManager::readConfigFromDisk(key, &config)) {
408            OnConfigUpdatedLocked(timestampNs, key, config);
409            StatsdStats::getInstance().noteConfigReset(key);
410        } else {
411            ALOGE("Failed to read backup config from disk for : %s", key.ToString().c_str());
412            auto it = mMetricsManagers.find(key);
413            if (it != mMetricsManagers.end()) {
414                it->second->refreshTtl(timestampNs);
415            }
416        }
417    }
418}
419
420void StatsLogProcessor::resetIfConfigTtlExpiredLocked(const int64_t timestampNs) {
421    std::vector<ConfigKey> configKeysTtlExpired;
422    for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) {
423        if (it->second != nullptr && !it->second->isInTtl(timestampNs)) {
424            configKeysTtlExpired.push_back(it->first);
425        }
426    }
427    if (configKeysTtlExpired.size() > 0) {
428        WriteDataToDiskLocked(CONFIG_RESET);
429        resetConfigsLocked(timestampNs, configKeysTtlExpired);
430    }
431}
432
433void StatsLogProcessor::OnConfigRemoved(const ConfigKey& key) {
434    std::lock_guard<std::mutex> lock(mMetricsMutex);
435    auto it = mMetricsManagers.find(key);
436    if (it != mMetricsManagers.end()) {
437        WriteDataToDiskLocked(key, getElapsedRealtimeNs(), CONFIG_REMOVED);
438        mMetricsManagers.erase(it);
439        mUidMap->OnConfigRemoved(key);
440    }
441    StatsdStats::getInstance().noteConfigRemoved(key);
442
443    mLastBroadcastTimes.erase(key);
444
445    if (mMetricsManagers.empty()) {
446        mStatsPullerManager.ForceClearPullerCache();
447    }
448}
449
450void StatsLogProcessor::flushIfNecessaryLocked(
451    int64_t timestampNs, const ConfigKey& key, MetricsManager& metricsManager) {
452    auto lastCheckTime = mLastByteSizeTimes.find(key);
453    if (lastCheckTime != mLastByteSizeTimes.end()) {
454        if (timestampNs - lastCheckTime->second < StatsdStats::kMinByteSizeCheckPeriodNs) {
455            return;
456        }
457    }
458
459    // We suspect that the byteSize() computation is expensive, so we set a rate limit.
460    size_t totalBytes = metricsManager.byteSize();
461    mLastByteSizeTimes[key] = timestampNs;
462    if (totalBytes >
463        StatsdStats::kMaxMetricsBytesPerConfig) {  // Too late. We need to start clearing data.
464        metricsManager.dropData(timestampNs);
465        StatsdStats::getInstance().noteDataDropped(key);
466        VLOG("StatsD had to toss out metrics for %s", key.ToString().c_str());
467    } else if (totalBytes > StatsdStats::kBytesPerConfigTriggerGetData) {
468        // Send broadcast so that receivers can pull data.
469        auto lastBroadcastTime = mLastBroadcastTimes.find(key);
470        if (lastBroadcastTime != mLastBroadcastTimes.end()) {
471            if (timestampNs - lastBroadcastTime->second < StatsdStats::kMinBroadcastPeriodNs) {
472                VLOG("StatsD would've sent a broadcast but the rate limit stopped us.");
473                return;
474            }
475        }
476        mLastBroadcastTimes[key] = timestampNs;
477        VLOG("StatsD requesting broadcast for %s", key.ToString().c_str());
478        mSendBroadcast(key);
479        StatsdStats::getInstance().noteBroadcastSent(key);
480    }
481}
482
483void StatsLogProcessor::WriteDataToDiskLocked(const ConfigKey& key,
484                                              const int64_t timestampNs,
485                                              const DumpReportReason dumpReportReason) {
486    if (mMetricsManagers.find(key) == mMetricsManagers.end()) {
487        return;
488    }
489    ProtoOutputStream proto;
490    onConfigMetricsReportLocked(key, timestampNs,
491                                true /* include_current_partial_bucket*/,
492                                false /* include strings */, dumpReportReason, &proto);
493    string file_name = StringPrintf("%s/%ld_%d_%lld", STATS_DATA_DIR,
494         (long)getWallClockSec(), key.GetUid(), (long long)key.GetId());
495    android::base::unique_fd fd(open(file_name.c_str(),
496                                O_WRONLY | O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR));
497    if (fd == -1) {
498        ALOGE("Attempt to write %s but failed", file_name.c_str());
499        return;
500    }
501    proto.flush(fd.get());
502}
503
504void StatsLogProcessor::WriteDataToDiskLocked(const DumpReportReason dumpReportReason) {
505    const int64_t timeNs = getElapsedRealtimeNs();
506    for (auto& pair : mMetricsManagers) {
507        WriteDataToDiskLocked(pair.first, timeNs, dumpReportReason);
508    }
509}
510
511void StatsLogProcessor::WriteDataToDisk(const DumpReportReason dumpReportReason) {
512    std::lock_guard<std::mutex> lock(mMetricsMutex);
513    WriteDataToDiskLocked(dumpReportReason);
514}
515
516void StatsLogProcessor::informPullAlarmFired(const int64_t timestampNs) {
517    std::lock_guard<std::mutex> lock(mMetricsMutex);
518    mStatsPullerManager.OnAlarmFired(timestampNs);
519}
520
521int64_t StatsLogProcessor::getLastReportTimeNs(const ConfigKey& key) {
522    auto it = mMetricsManagers.find(key);
523    if (it == mMetricsManagers.end()) {
524        return 0;
525    } else {
526        return it->second->getLastReportTimeNs();
527    }
528}
529
530}  // namespace statsd
531}  // namespace os
532}  // namespace android
533