GaugeMetricProducer.cpp revision 6189807c12e9cf32a36d32557725561c44b8aa5a
1/*
2* Copyright (C) 2017 The Android Open Source Project
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8*      http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*/
16
17#define DEBUG false  // STOPSHIP if true
18#include "Log.h"
19
20#include "GaugeMetricProducer.h"
21#include "guardrail/StatsdStats.h"
22#include "stats_log_util.h"
23
24#include <cutils/log.h>
25
26using android::util::FIELD_COUNT_REPEATED;
27using android::util::FIELD_TYPE_BOOL;
28using android::util::FIELD_TYPE_FLOAT;
29using android::util::FIELD_TYPE_INT32;
30using android::util::FIELD_TYPE_INT64;
31using android::util::FIELD_TYPE_MESSAGE;
32using android::util::FIELD_TYPE_STRING;
33using android::util::ProtoOutputStream;
34using std::map;
35using std::string;
36using std::unordered_map;
37using std::vector;
38using std::make_shared;
39using std::shared_ptr;
40
41namespace android {
42namespace os {
43namespace statsd {
44
45// for StatsLogReport
46const int FIELD_ID_ID = 1;
47const int FIELD_ID_GAUGE_METRICS = 8;
48// for GaugeMetricDataWrapper
49const int FIELD_ID_DATA = 1;
50// for GaugeMetricData
51const int FIELD_ID_DIMENSION_IN_WHAT = 1;
52const int FIELD_ID_DIMENSION_IN_CONDITION = 2;
53const int FIELD_ID_BUCKET_INFO = 3;
54// for GaugeBucketInfo
55const int FIELD_ID_START_BUCKET_ELAPSED_NANOS = 1;
56const int FIELD_ID_END_BUCKET_ELAPSED_NANOS = 2;
57const int FIELD_ID_ATOM = 3;
58const int FIELD_ID_ELAPSED_ATOM_TIMESTAMP = 4;
59
60GaugeMetricProducer::GaugeMetricProducer(const ConfigKey& key, const GaugeMetric& metric,
61                                         const int conditionIndex,
62                                         const sp<ConditionWizard>& wizard,
63                                         const int pullTagId, const uint64_t startTimeNs,
64                                         shared_ptr<StatsPullerManager> statsPullerManager)
65    : MetricProducer(metric.id(), key, startTimeNs, conditionIndex, wizard),
66      mStatsPullerManager(statsPullerManager),
67      mPullTagId(pullTagId) {
68    mCurrentSlicedBucket = std::make_shared<DimToGaugeAtomsMap>();
69    mCurrentSlicedBucketForAnomaly = std::make_shared<DimToValMap>();
70    int64_t bucketSizeMills = 0;
71    if (metric.has_bucket()) {
72        bucketSizeMills = TimeUnitToBucketSizeInMillis(metric.bucket());
73    } else {
74        bucketSizeMills = TimeUnitToBucketSizeInMillis(ONE_HOUR);
75    }
76    mBucketSizeNs = bucketSizeMills * 1000000;
77
78    mSamplingType = metric.sampling_type();
79    if (!metric.gauge_fields_filter().include_all()) {
80        translateFieldMatcher(metric.gauge_fields_filter().fields(), &mFieldMatchers);
81    }
82
83    // TODO: use UidMap if uid->pkg_name is required
84    if (metric.has_dimensions_in_what()) {
85        translateFieldMatcher(metric.dimensions_in_what(), &mDimensionsInWhat);
86    }
87
88    if (metric.has_dimensions_in_condition()) {
89        translateFieldMatcher(metric.dimensions_in_condition(), &mDimensionsInCondition);
90    }
91
92    if (metric.links().size() > 0) {
93        for (const auto& link : metric.links()) {
94            Metric2Condition mc;
95            mc.conditionId = link.condition();
96            translateFieldMatcher(link.fields_in_what(), &mc.metricFields);
97            translateFieldMatcher(link.fields_in_condition(), &mc.conditionFields);
98            mMetric2ConditionLinks.push_back(mc);
99        }
100    }
101    mConditionSliced = (metric.links().size() > 0) || (mDimensionsInCondition.size() > 0);
102
103    // Kicks off the puller immediately.
104    if (mPullTagId != -1 && mSamplingType == GaugeMetric::RANDOM_ONE_SAMPLE) {
105        mStatsPullerManager->RegisterReceiver(mPullTagId, this, bucketSizeMills);
106    }
107
108    VLOG("metric %lld created. bucket size %lld start_time: %lld", (long long)metric.id(),
109         (long long)mBucketSizeNs, (long long)mStartTimeNs);
110}
111
112// for testing
113GaugeMetricProducer::GaugeMetricProducer(const ConfigKey& key, const GaugeMetric& metric,
114                                         const int conditionIndex,
115                                         const sp<ConditionWizard>& wizard, const int pullTagId,
116                                         const int64_t startTimeNs)
117    : GaugeMetricProducer(key, metric, conditionIndex, wizard, pullTagId, startTimeNs,
118                          make_shared<StatsPullerManager>()) {
119}
120
121GaugeMetricProducer::~GaugeMetricProducer() {
122    VLOG("~GaugeMetricProducer() called");
123    if (mPullTagId != -1) {
124        mStatsPullerManager->UnRegisterReceiver(mPullTagId, this);
125    }
126}
127
128void GaugeMetricProducer::onDumpReportLocked(const uint64_t dumpTimeNs,
129                                             ProtoOutputStream* protoOutput) {
130    VLOG("gauge metric %lld report now...", (long long)mMetricId);
131
132    flushIfNeededLocked(dumpTimeNs);
133    if (mPastBuckets.empty()) {
134        return;
135    }
136
137    protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_ID, (long long)mMetricId);
138    long long protoToken = protoOutput->start(FIELD_TYPE_MESSAGE | FIELD_ID_GAUGE_METRICS);
139
140    for (const auto& pair : mPastBuckets) {
141        const MetricDimensionKey& dimensionKey = pair.first;
142
143        VLOG("  dimension key %s", dimensionKey.c_str());
144        long long wrapperToken =
145                protoOutput->start(FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_DATA);
146
147        // First fill dimension.
148        long long dimensionToken = protoOutput->start(
149                FIELD_TYPE_MESSAGE | FIELD_ID_DIMENSION_IN_WHAT);
150        writeDimensionToProto(dimensionKey.getDimensionKeyInWhat(), protoOutput);
151        protoOutput->end(dimensionToken);
152
153        if (dimensionKey.hasDimensionKeyInCondition()) {
154            long long dimensionInConditionToken = protoOutput->start(
155                    FIELD_TYPE_MESSAGE | FIELD_ID_DIMENSION_IN_CONDITION);
156            writeDimensionToProto(dimensionKey.getDimensionKeyInCondition(), protoOutput);
157            protoOutput->end(dimensionInConditionToken);
158        }
159
160        // Then fill bucket_info (GaugeBucketInfo).
161        for (const auto& bucket : pair.second) {
162            long long bucketInfoToken = protoOutput->start(
163                    FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_BUCKET_INFO);
164            protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_START_BUCKET_ELAPSED_NANOS,
165                               (long long)bucket.mBucketStartNs);
166            protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_END_BUCKET_ELAPSED_NANOS,
167                               (long long)bucket.mBucketEndNs);
168
169            if (!bucket.mGaugeAtoms.empty()) {
170                long long atomsToken =
171                    protoOutput->start(FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_ATOM);
172                for (const auto& atom : bucket.mGaugeAtoms) {
173                    writeFieldValueTreeToStream(mTagId, *(atom.mFields), protoOutput);
174                }
175                protoOutput->end(atomsToken);
176
177                for (const auto& atom : bucket.mGaugeAtoms) {
178                    protoOutput->write(
179                        FIELD_TYPE_INT64 | FIELD_COUNT_REPEATED | FIELD_ID_ELAPSED_ATOM_TIMESTAMP,
180                        (long long)atom.mTimestamps);
181                }
182            }
183            protoOutput->end(bucketInfoToken);
184            VLOG("\t bucket [%lld - %lld] includes %d atoms.", (long long)bucket.mBucketStartNs,
185                 (long long)bucket.mBucketEndNs, (int)bucket.mGaugeAtoms.size());
186        }
187        protoOutput->end(wrapperToken);
188    }
189    protoOutput->end(protoToken);
190
191    mPastBuckets.clear();
192    // TODO: Clear mDimensionKeyMap once the report is dumped.
193}
194
195void GaugeMetricProducer::pullLocked() {
196    vector<std::shared_ptr<LogEvent>> allData;
197    if (!mStatsPullerManager->Pull(mPullTagId, &allData)) {
198        ALOGE("Stats puller failed for tag: %d", mPullTagId);
199        return;
200    }
201    for (const auto& data : allData) {
202        onMatchedLogEventLocked(0, *data);
203    }
204}
205
206void GaugeMetricProducer::onConditionChangedLocked(const bool conditionMet,
207                                                   const uint64_t eventTime) {
208    VLOG("Metric %lld onConditionChanged", (long long)mMetricId);
209    flushIfNeededLocked(eventTime);
210    mCondition = conditionMet;
211
212    // Push mode. No need to proactively pull the gauge data.
213    if (mPullTagId == -1) {
214        return;
215    }
216
217    bool triggerPuller = false;
218    switch(mSamplingType) {
219        // When the metric wants to do random sampling and there is already one gauge atom for the
220        // current bucket, do not do it again.
221        case GaugeMetric::RANDOM_ONE_SAMPLE: {
222            triggerPuller = mCondition && mCurrentSlicedBucket->empty();
223            break;
224        }
225        case GaugeMetric::ALL_CONDITION_CHANGES: {
226            triggerPuller = true;
227            break;
228        }
229        default:
230            break;
231    }
232    if (!triggerPuller) {
233        return;
234    }
235
236    vector<std::shared_ptr<LogEvent>> allData;
237    if (!mStatsPullerManager->Pull(mPullTagId, &allData)) {
238        ALOGE("Stats puller failed for tag: %d", mPullTagId);
239        return;
240    }
241    for (const auto& data : allData) {
242        onMatchedLogEventLocked(0, *data);
243    }
244    flushIfNeededLocked(eventTime);
245}
246
247void GaugeMetricProducer::onSlicedConditionMayChangeLocked(const uint64_t eventTime) {
248    VLOG("Metric %lld onSlicedConditionMayChange", (long long)mMetricId);
249}
250
251std::shared_ptr<vector<FieldValue>> GaugeMetricProducer::getGaugeFields(const LogEvent& event) {
252    if (mFieldMatchers.size() > 0) {
253        std::shared_ptr<vector<FieldValue>> gaugeFields = std::make_shared<vector<FieldValue>>();
254        filterGaugeValues(mFieldMatchers, event.getValues(), gaugeFields.get());
255        return gaugeFields;
256    } else {
257        return std::make_shared<vector<FieldValue>>(event.getValues());
258    }
259}
260
261void GaugeMetricProducer::onDataPulled(const std::vector<std::shared_ptr<LogEvent>>& allData) {
262    std::lock_guard<std::mutex> lock(mMutex);
263    if (allData.size() == 0) {
264        return;
265    }
266    for (const auto& data : allData) {
267        onMatchedLogEventLocked(0, *data);
268    }
269}
270
271bool GaugeMetricProducer::hitGuardRailLocked(const MetricDimensionKey& newKey) {
272    if (mCurrentSlicedBucket->find(newKey) != mCurrentSlicedBucket->end()) {
273        return false;
274    }
275    // 1. Report the tuple count if the tuple count > soft limit
276    if (mCurrentSlicedBucket->size() > StatsdStats::kDimensionKeySizeSoftLimit - 1) {
277        size_t newTupleCount = mCurrentSlicedBucket->size() + 1;
278        StatsdStats::getInstance().noteMetricDimensionSize(mConfigKey, mMetricId, newTupleCount);
279        // 2. Don't add more tuples, we are above the allowed threshold. Drop the data.
280        if (newTupleCount > StatsdStats::kDimensionKeySizeHardLimit) {
281            ALOGE("GaugeMetric %lld dropping data for dimension key %s",
282                (long long)mMetricId, newKey.c_str());
283            return true;
284        }
285    }
286
287    return false;
288}
289
290void GaugeMetricProducer::onMatchedLogEventInternalLocked(
291        const size_t matcherIndex, const MetricDimensionKey& eventKey,
292        const ConditionKey& conditionKey, bool condition,
293        const LogEvent& event) {
294    if (condition == false) {
295        return;
296    }
297    uint64_t eventTimeNs = event.GetElapsedTimestampNs();
298    mTagId = event.GetTagId();
299    if (eventTimeNs < mCurrentBucketStartTimeNs) {
300        VLOG("Skip event due to late arrival: %lld vs %lld", (long long)eventTimeNs,
301             (long long)mCurrentBucketStartTimeNs);
302        return;
303    }
304    flushIfNeededLocked(eventTimeNs);
305
306    // When gauge metric wants to randomly sample the output atom, we just simply use the first
307    // gauge in the given bucket.
308    if (mCurrentSlicedBucket->find(eventKey) != mCurrentSlicedBucket->end() &&
309        mSamplingType == GaugeMetric::RANDOM_ONE_SAMPLE) {
310        return;
311    }
312    if (hitGuardRailLocked(eventKey)) {
313        return;
314    }
315    GaugeAtom gaugeAtom(getGaugeFields(event), eventTimeNs);
316    (*mCurrentSlicedBucket)[eventKey].push_back(gaugeAtom);
317    // Anomaly detection on gauge metric only works when there is one numeric
318    // field specified.
319    if (mAnomalyTrackers.size() > 0) {
320        if (gaugeAtom.mFields->size() == 1) {
321            const Value& value = gaugeAtom.mFields->begin()->mValue;
322            long gaugeVal = 0;
323            if (value.getType() == INT) {
324                gaugeVal = (long)value.int_value;
325            } else if (value.getType() == LONG) {
326                gaugeVal = value.long_value;
327            }
328            for (auto& tracker : mAnomalyTrackers) {
329                tracker->detectAndDeclareAnomaly(eventTimeNs, mCurrentBucketNum, eventKey,
330                                                 gaugeVal);
331            }
332        }
333    }
334}
335
336void GaugeMetricProducer::updateCurrentSlicedBucketForAnomaly() {
337    for (const auto& slice : *mCurrentSlicedBucket) {
338        if (slice.second.empty()) {
339            continue;
340        }
341        const Value& value = slice.second.front().mFields->front().mValue;
342        long gaugeVal = 0;
343        if (value.getType() == INT) {
344            gaugeVal = (long)value.int_value;
345        } else if (value.getType() == LONG) {
346            gaugeVal = value.long_value;
347        }
348        (*mCurrentSlicedBucketForAnomaly)[slice.first] = gaugeVal;
349    }
350}
351
352// When a new matched event comes in, we check if event falls into the current
353// bucket. If not, flush the old counter to past buckets and initialize the new
354// bucket.
355// if data is pushed, onMatchedLogEvent will only be called through onConditionChanged() inside
356// the GaugeMetricProducer while holding the lock.
357void GaugeMetricProducer::flushIfNeededLocked(const uint64_t& eventTimeNs) {
358    uint64_t currentBucketEndTimeNs = getCurrentBucketEndTimeNs();
359
360    if (eventTimeNs < currentBucketEndTimeNs) {
361        VLOG("eventTime is %lld, less than next bucket start time %lld", (long long)eventTimeNs,
362             (long long)(mCurrentBucketStartTimeNs + mBucketSizeNs));
363        return;
364    }
365
366    flushCurrentBucketLocked(eventTimeNs);
367
368    // Adjusts the bucket start and end times.
369    int64_t numBucketsForward = 1 + (eventTimeNs - currentBucketEndTimeNs) / mBucketSizeNs;
370    mCurrentBucketStartTimeNs = currentBucketEndTimeNs + (numBucketsForward - 1) * mBucketSizeNs;
371    mCurrentBucketNum += numBucketsForward;
372    VLOG("metric %lld: new bucket start time: %lld", (long long)mMetricId,
373         (long long)mCurrentBucketStartTimeNs);
374}
375
376void GaugeMetricProducer::flushCurrentBucketLocked(const uint64_t& eventTimeNs) {
377    uint64_t fullBucketEndTimeNs = getCurrentBucketEndTimeNs();
378
379    GaugeBucket info;
380    info.mBucketStartNs = mCurrentBucketStartTimeNs;
381    if (eventTimeNs < fullBucketEndTimeNs) {
382        info.mBucketEndNs = eventTimeNs;
383    } else {
384        info.mBucketEndNs = fullBucketEndTimeNs;
385    }
386    info.mBucketNum = mCurrentBucketNum;
387
388    for (const auto& slice : *mCurrentSlicedBucket) {
389        info.mGaugeAtoms = slice.second;
390        auto& bucketList = mPastBuckets[slice.first];
391        bucketList.push_back(info);
392        VLOG("gauge metric %lld, dump key value: %s", (long long)mMetricId, slice.first.c_str());
393    }
394
395    // If we have anomaly trackers, we need to update the partial bucket values.
396    if (mAnomalyTrackers.size() > 0) {
397        updateCurrentSlicedBucketForAnomaly();
398
399        if (eventTimeNs > fullBucketEndTimeNs) {
400            // This is known to be a full bucket, so send this data to the anomaly tracker.
401            for (auto& tracker : mAnomalyTrackers) {
402                tracker->addPastBucket(mCurrentSlicedBucketForAnomaly, mCurrentBucketNum);
403            }
404            mCurrentSlicedBucketForAnomaly = std::make_shared<DimToValMap>();
405        }
406    }
407
408    mCurrentSlicedBucket = std::make_shared<DimToGaugeAtomsMap>();
409}
410
411size_t GaugeMetricProducer::byteSizeLocked() const {
412    size_t totalSize = 0;
413    for (const auto& pair : mPastBuckets) {
414        totalSize += pair.second.size() * kBucketSize;
415    }
416    return totalSize;
417}
418
419}  // namespace statsd
420}  // namespace os
421}  // namespace android
422