GaugeMetricProducer.cpp revision 4c959cb99eb7e71e5417a61f5429c5fa0073e826
1/*
2* Copyright (C) 2017 The Android Open Source Project
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8*      http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*/
16
17#define DEBUG false  // STOPSHIP if true
18#include "Log.h"
19
20#include "GaugeMetricProducer.h"
21#include "guardrail/StatsdStats.h"
22#include "stats_log_util.h"
23
24#include <cutils/log.h>
25
26using android::util::FIELD_COUNT_REPEATED;
27using android::util::FIELD_TYPE_BOOL;
28using android::util::FIELD_TYPE_FLOAT;
29using android::util::FIELD_TYPE_INT32;
30using android::util::FIELD_TYPE_INT64;
31using android::util::FIELD_TYPE_MESSAGE;
32using android::util::FIELD_TYPE_STRING;
33using android::util::ProtoOutputStream;
34using std::map;
35using std::string;
36using std::unordered_map;
37using std::vector;
38using std::make_shared;
39using std::shared_ptr;
40
41namespace android {
42namespace os {
43namespace statsd {
44
45// for StatsLogReport
46const int FIELD_ID_ID = 1;
47const int FIELD_ID_GAUGE_METRICS = 8;
48// for GaugeMetricDataWrapper
49const int FIELD_ID_DATA = 1;
50// for GaugeMetricData
51const int FIELD_ID_DIMENSION_IN_WHAT = 1;
52const int FIELD_ID_DIMENSION_IN_CONDITION = 2;
53const int FIELD_ID_BUCKET_INFO = 3;
54// for GaugeBucketInfo
55const int FIELD_ID_START_BUCKET_NANOS = 1;
56const int FIELD_ID_END_BUCKET_NANOS = 2;
57const int FIELD_ID_ATOM = 3;
58const int FIELD_ID_TIMESTAMP = 4;
59
60GaugeMetricProducer::GaugeMetricProducer(const ConfigKey& key, const GaugeMetric& metric,
61                                         const int conditionIndex,
62                                         const sp<ConditionWizard>& wizard,
63                                         const int pullTagId, const uint64_t startTimeNs,
64                                         shared_ptr<StatsPullerManager> statsPullerManager)
65    : MetricProducer(metric.id(), key, startTimeNs, conditionIndex, wizard),
66      mStatsPullerManager(statsPullerManager),
67      mPullTagId(pullTagId) {
68    mCurrentSlicedBucket = std::make_shared<DimToGaugeAtomsMap>();
69    mCurrentSlicedBucketForAnomaly = std::make_shared<DimToValMap>();
70    int64_t bucketSizeMills = 0;
71    if (metric.has_bucket()) {
72        bucketSizeMills = TimeUnitToBucketSizeInMillis(metric.bucket());
73    } else {
74        bucketSizeMills = TimeUnitToBucketSizeInMillis(ONE_HOUR);
75    }
76    mBucketSizeNs = bucketSizeMills * 1000000;
77
78    mSamplingType = metric.sampling_type();
79    if (!metric.gauge_fields_filter().include_all()) {
80        translateFieldMatcher(metric.gauge_fields_filter().fields(), &mFieldMatchers);
81    }
82
83    // TODO: use UidMap if uid->pkg_name is required
84    if (metric.has_dimensions_in_what()) {
85        translateFieldMatcher(metric.dimensions_in_what(), &mDimensionsInWhat);
86    }
87
88    if (metric.has_dimensions_in_condition()) {
89        translateFieldMatcher(metric.dimensions_in_condition(), &mDimensionsInCondition);
90    }
91
92    if (metric.links().size() > 0) {
93        for (const auto& link : metric.links()) {
94            Metric2Condition mc;
95            mc.conditionId = link.condition();
96            translateFieldMatcher(link.fields_in_what(), &mc.metricFields);
97            translateFieldMatcher(link.fields_in_condition(), &mc.conditionFields);
98            mMetric2ConditionLinks.push_back(mc);
99        }
100    }
101    mConditionSliced = (metric.links().size() > 0) || (mDimensionsInCondition.size() > 0);
102
103    // Kicks off the puller immediately.
104    if (mPullTagId != -1 && mSamplingType == GaugeMetric::RANDOM_ONE_SAMPLE) {
105        mStatsPullerManager->RegisterReceiver(mPullTagId, this, bucketSizeMills);
106    }
107
108    VLOG("metric %lld created. bucket size %lld start_time: %lld", (long long)metric.id(),
109         (long long)mBucketSizeNs, (long long)mStartTimeNs);
110}
111
112// for testing
113GaugeMetricProducer::GaugeMetricProducer(const ConfigKey& key, const GaugeMetric& metric,
114                                         const int conditionIndex,
115                                         const sp<ConditionWizard>& wizard, const int pullTagId,
116                                         const int64_t startTimeNs)
117    : GaugeMetricProducer(key, metric, conditionIndex, wizard, pullTagId, startTimeNs,
118                          make_shared<StatsPullerManager>()) {
119}
120
121GaugeMetricProducer::~GaugeMetricProducer() {
122    VLOG("~GaugeMetricProducer() called");
123    if (mPullTagId != -1) {
124        mStatsPullerManager->UnRegisterReceiver(mPullTagId, this);
125    }
126}
127
128void GaugeMetricProducer::onDumpReportLocked(const uint64_t dumpTimeNs,
129                                             ProtoOutputStream* protoOutput) {
130    VLOG("gauge metric %lld report now...", (long long)mMetricId);
131
132    flushIfNeededLocked(dumpTimeNs);
133    if (mPastBuckets.empty()) {
134        return;
135    }
136
137    protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_ID, (long long)mMetricId);
138    long long protoToken = protoOutput->start(FIELD_TYPE_MESSAGE | FIELD_ID_GAUGE_METRICS);
139
140    for (const auto& pair : mPastBuckets) {
141        const MetricDimensionKey& dimensionKey = pair.first;
142
143        VLOG("  dimension key %s", dimensionKey.c_str());
144        long long wrapperToken =
145                protoOutput->start(FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_DATA);
146
147        // First fill dimension.
148        long long dimensionToken = protoOutput->start(
149                FIELD_TYPE_MESSAGE | FIELD_ID_DIMENSION_IN_WHAT);
150        writeDimensionToProto(dimensionKey.getDimensionKeyInWhat(), protoOutput);
151        protoOutput->end(dimensionToken);
152
153        if (dimensionKey.hasDimensionKeyInCondition()) {
154            long long dimensionInConditionToken = protoOutput->start(
155                    FIELD_TYPE_MESSAGE | FIELD_ID_DIMENSION_IN_CONDITION);
156            writeDimensionToProto(dimensionKey.getDimensionKeyInCondition(), protoOutput);
157            protoOutput->end(dimensionInConditionToken);
158        }
159
160        // Then fill bucket_info (GaugeBucketInfo).
161        for (const auto& bucket : pair.second) {
162            long long bucketInfoToken = protoOutput->start(
163                    FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_BUCKET_INFO);
164            protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_START_BUCKET_NANOS,
165                               (long long)bucket.mBucketStartNs);
166            protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_END_BUCKET_NANOS,
167                               (long long)bucket.mBucketEndNs);
168
169            if (!bucket.mGaugeAtoms.empty()) {
170                long long atomsToken =
171                    protoOutput->start(FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_ATOM);
172                for (const auto& atom : bucket.mGaugeAtoms) {
173                    writeFieldValueTreeToStream(mTagId, *(atom.mFields), protoOutput);
174                }
175                protoOutput->end(atomsToken);
176
177                for (const auto& atom : bucket.mGaugeAtoms) {
178                    protoOutput->write(FIELD_TYPE_INT64 | FIELD_COUNT_REPEATED | FIELD_ID_TIMESTAMP,
179                                       (long long)atom.mTimestamps);
180                }
181            }
182            protoOutput->end(bucketInfoToken);
183            VLOG("\t bucket [%lld - %lld] includes %d atoms.", (long long)bucket.mBucketStartNs,
184                 (long long)bucket.mBucketEndNs, (int)bucket.mGaugeAtoms.size());
185        }
186        protoOutput->end(wrapperToken);
187    }
188    protoOutput->end(protoToken);
189
190    mPastBuckets.clear();
191    // TODO: Clear mDimensionKeyMap once the report is dumped.
192}
193
194void GaugeMetricProducer::pullLocked() {
195    vector<std::shared_ptr<LogEvent>> allData;
196    if (!mStatsPullerManager->Pull(mPullTagId, &allData)) {
197        ALOGE("Stats puller failed for tag: %d", mPullTagId);
198        return;
199    }
200    for (const auto& data : allData) {
201        onMatchedLogEventLocked(0, *data);
202    }
203}
204
205void GaugeMetricProducer::onConditionChangedLocked(const bool conditionMet,
206                                                   const uint64_t eventTime) {
207    VLOG("Metric %lld onConditionChanged", (long long)mMetricId);
208    flushIfNeededLocked(eventTime);
209    mCondition = conditionMet;
210
211    // Push mode. No need to proactively pull the gauge data.
212    if (mPullTagId == -1) {
213        return;
214    }
215
216    bool triggerPuller = false;
217    switch(mSamplingType) {
218        // When the metric wants to do random sampling and there is already one gauge atom for the
219        // current bucket, do not do it again.
220        case GaugeMetric::RANDOM_ONE_SAMPLE: {
221            triggerPuller = mCondition && mCurrentSlicedBucket->empty();
222            break;
223        }
224        case GaugeMetric::ALL_CONDITION_CHANGES: {
225            triggerPuller = true;
226            break;
227        }
228        default:
229            break;
230    }
231    if (!triggerPuller) {
232        return;
233    }
234
235    vector<std::shared_ptr<LogEvent>> allData;
236    if (!mStatsPullerManager->Pull(mPullTagId, &allData)) {
237        ALOGE("Stats puller failed for tag: %d", mPullTagId);
238        return;
239    }
240    for (const auto& data : allData) {
241        onMatchedLogEventLocked(0, *data);
242    }
243    flushIfNeededLocked(eventTime);
244}
245
246void GaugeMetricProducer::onSlicedConditionMayChangeLocked(const uint64_t eventTime) {
247    VLOG("Metric %lld onSlicedConditionMayChange", (long long)mMetricId);
248}
249
250std::shared_ptr<vector<FieldValue>> GaugeMetricProducer::getGaugeFields(const LogEvent& event) {
251    if (mFieldMatchers.size() > 0) {
252        std::shared_ptr<vector<FieldValue>> gaugeFields = std::make_shared<vector<FieldValue>>();
253        filterGaugeValues(mFieldMatchers, event.getValues(), gaugeFields.get());
254        return gaugeFields;
255    } else {
256        return std::make_shared<vector<FieldValue>>(event.getValues());
257    }
258}
259
260void GaugeMetricProducer::onDataPulled(const std::vector<std::shared_ptr<LogEvent>>& allData) {
261    std::lock_guard<std::mutex> lock(mMutex);
262    if (allData.size() == 0) {
263        return;
264    }
265    for (const auto& data : allData) {
266        onMatchedLogEventLocked(0, *data);
267    }
268}
269
270bool GaugeMetricProducer::hitGuardRailLocked(const MetricDimensionKey& newKey) {
271    if (mCurrentSlicedBucket->find(newKey) != mCurrentSlicedBucket->end()) {
272        return false;
273    }
274    // 1. Report the tuple count if the tuple count > soft limit
275    if (mCurrentSlicedBucket->size() > StatsdStats::kDimensionKeySizeSoftLimit - 1) {
276        size_t newTupleCount = mCurrentSlicedBucket->size() + 1;
277        StatsdStats::getInstance().noteMetricDimensionSize(mConfigKey, mMetricId, newTupleCount);
278        // 2. Don't add more tuples, we are above the allowed threshold. Drop the data.
279        if (newTupleCount > StatsdStats::kDimensionKeySizeHardLimit) {
280            ALOGE("GaugeMetric %lld dropping data for dimension key %s",
281                (long long)mMetricId, newKey.c_str());
282            return true;
283        }
284    }
285
286    return false;
287}
288
289void GaugeMetricProducer::onMatchedLogEventInternalLocked(
290        const size_t matcherIndex, const MetricDimensionKey& eventKey,
291        const ConditionKey& conditionKey, bool condition,
292        const LogEvent& event) {
293    if (condition == false) {
294        return;
295    }
296    uint64_t eventTimeNs = event.GetTimestampNs();
297    mTagId = event.GetTagId();
298    if (eventTimeNs < mCurrentBucketStartTimeNs) {
299        VLOG("Skip event due to late arrival: %lld vs %lld", (long long)eventTimeNs,
300             (long long)mCurrentBucketStartTimeNs);
301        return;
302    }
303    flushIfNeededLocked(eventTimeNs);
304
305    // When gauge metric wants to randomly sample the output atom, we just simply use the first
306    // gauge in the given bucket.
307    if (mCurrentSlicedBucket->find(eventKey) != mCurrentSlicedBucket->end() &&
308        mSamplingType == GaugeMetric::RANDOM_ONE_SAMPLE) {
309        return;
310    }
311    if (hitGuardRailLocked(eventKey)) {
312        return;
313    }
314    GaugeAtom gaugeAtom(getGaugeFields(event), eventTimeNs);
315    (*mCurrentSlicedBucket)[eventKey].push_back(gaugeAtom);
316    // Anomaly detection on gauge metric only works when there is one numeric
317    // field specified.
318    if (mAnomalyTrackers.size() > 0) {
319        if (gaugeAtom.mFields->size() == 1) {
320            const Value& value = gaugeAtom.mFields->begin()->mValue;
321            long gaugeVal = 0;
322            if (value.getType() == INT) {
323                gaugeVal = (long)value.int_value;
324            } else if (value.getType() == LONG) {
325                gaugeVal = value.long_value;
326            }
327            for (auto& tracker : mAnomalyTrackers) {
328                tracker->detectAndDeclareAnomaly(eventTimeNs, mCurrentBucketNum, eventKey,
329                                                 gaugeVal);
330            }
331        }
332    }
333}
334
335void GaugeMetricProducer::updateCurrentSlicedBucketForAnomaly() {
336    for (const auto& slice : *mCurrentSlicedBucket) {
337        if (slice.second.empty()) {
338            continue;
339        }
340        const Value& value = slice.second.front().mFields->front().mValue;
341        long gaugeVal = 0;
342        if (value.getType() == INT) {
343            gaugeVal = (long)value.int_value;
344        } else if (value.getType() == LONG) {
345            gaugeVal = value.long_value;
346        }
347        (*mCurrentSlicedBucketForAnomaly)[slice.first] = gaugeVal;
348    }
349}
350
351// When a new matched event comes in, we check if event falls into the current
352// bucket. If not, flush the old counter to past buckets and initialize the new
353// bucket.
354// if data is pushed, onMatchedLogEvent will only be called through onConditionChanged() inside
355// the GaugeMetricProducer while holding the lock.
356void GaugeMetricProducer::flushIfNeededLocked(const uint64_t& eventTimeNs) {
357    uint64_t currentBucketEndTimeNs = getCurrentBucketEndTimeNs();
358
359    if (eventTimeNs < currentBucketEndTimeNs) {
360        VLOG("eventTime is %lld, less than next bucket start time %lld", (long long)eventTimeNs,
361             (long long)(mCurrentBucketStartTimeNs + mBucketSizeNs));
362        return;
363    }
364
365    flushCurrentBucketLocked(eventTimeNs);
366
367    // Adjusts the bucket start and end times.
368    int64_t numBucketsForward = 1 + (eventTimeNs - currentBucketEndTimeNs) / mBucketSizeNs;
369    mCurrentBucketStartTimeNs = currentBucketEndTimeNs + (numBucketsForward - 1) * mBucketSizeNs;
370    mCurrentBucketNum += numBucketsForward;
371    VLOG("metric %lld: new bucket start time: %lld", (long long)mMetricId,
372         (long long)mCurrentBucketStartTimeNs);
373}
374
375void GaugeMetricProducer::flushCurrentBucketLocked(const uint64_t& eventTimeNs) {
376    uint64_t fullBucketEndTimeNs = getCurrentBucketEndTimeNs();
377
378    GaugeBucket info;
379    info.mBucketStartNs = mCurrentBucketStartTimeNs;
380    if (eventTimeNs < fullBucketEndTimeNs) {
381        info.mBucketEndNs = eventTimeNs;
382    } else {
383        info.mBucketEndNs = fullBucketEndTimeNs;
384    }
385    info.mBucketNum = mCurrentBucketNum;
386
387    for (const auto& slice : *mCurrentSlicedBucket) {
388        info.mGaugeAtoms = slice.second;
389        auto& bucketList = mPastBuckets[slice.first];
390        bucketList.push_back(info);
391        VLOG("gauge metric %lld, dump key value: %s", (long long)mMetricId, slice.first.c_str());
392    }
393
394    // If we have anomaly trackers, we need to update the partial bucket values.
395    if (mAnomalyTrackers.size() > 0) {
396        updateCurrentSlicedBucketForAnomaly();
397
398        if (eventTimeNs > fullBucketEndTimeNs) {
399            // This is known to be a full bucket, so send this data to the anomaly tracker.
400            for (auto& tracker : mAnomalyTrackers) {
401                tracker->addPastBucket(mCurrentSlicedBucketForAnomaly, mCurrentBucketNum);
402            }
403            mCurrentSlicedBucketForAnomaly = std::make_shared<DimToValMap>();
404        }
405    }
406
407    mCurrentSlicedBucket = std::make_shared<DimToGaugeAtomsMap>();
408}
409
410size_t GaugeMetricProducer::byteSizeLocked() const {
411    size_t totalSize = 0;
412    for (const auto& pair : mPastBuckets) {
413        totalSize += pair.second.size() * kBucketSize;
414    }
415    return totalSize;
416}
417
418}  // namespace statsd
419}  // namespace os
420}  // namespace android
421