GaugeMetricProducer.cpp revision 5ee0787024cc446a21008ff5710dec19c6afc834
1/*
2* Copyright (C) 2017 The Android Open Source Project
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8*      http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*/
16
17#define DEBUG false  // STOPSHIP if true
18#include "Log.h"
19
20#include "GaugeMetricProducer.h"
21#include "guardrail/StatsdStats.h"
22#include "stats_log_util.h"
23
24#include <cutils/log.h>
25
26using android::util::FIELD_COUNT_REPEATED;
27using android::util::FIELD_TYPE_BOOL;
28using android::util::FIELD_TYPE_FLOAT;
29using android::util::FIELD_TYPE_INT32;
30using android::util::FIELD_TYPE_INT64;
31using android::util::FIELD_TYPE_MESSAGE;
32using android::util::FIELD_TYPE_STRING;
33using android::util::ProtoOutputStream;
34using std::map;
35using std::string;
36using std::unordered_map;
37using std::vector;
38using std::make_shared;
39using std::shared_ptr;
40
41namespace android {
42namespace os {
43namespace statsd {
44
45// for StatsLogReport
46const int FIELD_ID_ID = 1;
47const int FIELD_ID_GAUGE_METRICS = 8;
48// for GaugeMetricDataWrapper
49const int FIELD_ID_DATA = 1;
50// for GaugeMetricData
51const int FIELD_ID_DIMENSION_IN_WHAT = 1;
52const int FIELD_ID_DIMENSION_IN_CONDITION = 2;
53const int FIELD_ID_BUCKET_INFO = 3;
54// for GaugeBucketInfo
55const int FIELD_ID_START_BUCKET_ELAPSED_NANOS = 1;
56const int FIELD_ID_END_BUCKET_ELAPSED_NANOS = 2;
57const int FIELD_ID_ATOM = 3;
58const int FIELD_ID_ELAPSED_ATOM_TIMESTAMP = 4;
59
60GaugeMetricProducer::GaugeMetricProducer(const ConfigKey& key, const GaugeMetric& metric,
61                                         const int conditionIndex,
62                                         const sp<ConditionWizard>& wizard,
63                                         const int pullTagId, const uint64_t startTimeNs,
64                                         shared_ptr<StatsPullerManager> statsPullerManager)
65    : MetricProducer(metric.id(), key, startTimeNs, conditionIndex, wizard),
66      mStatsPullerManager(statsPullerManager),
67      mPullTagId(pullTagId) {
68    mCurrentSlicedBucket = std::make_shared<DimToGaugeAtomsMap>();
69    mCurrentSlicedBucketForAnomaly = std::make_shared<DimToValMap>();
70    int64_t bucketSizeMills = 0;
71    if (metric.has_bucket()) {
72        bucketSizeMills = TimeUnitToBucketSizeInMillisGuardrailed(key.GetUid(), metric.bucket());
73    } else {
74        bucketSizeMills = TimeUnitToBucketSizeInMillis(ONE_HOUR);
75    }
76    mBucketSizeNs = bucketSizeMills * 1000000;
77
78    mSamplingType = metric.sampling_type();
79    if (!metric.gauge_fields_filter().include_all()) {
80        translateFieldMatcher(metric.gauge_fields_filter().fields(), &mFieldMatchers);
81    }
82
83    // TODO: use UidMap if uid->pkg_name is required
84    if (metric.has_dimensions_in_what()) {
85        translateFieldMatcher(metric.dimensions_in_what(), &mDimensionsInWhat);
86    }
87
88    if (metric.has_dimensions_in_condition()) {
89        translateFieldMatcher(metric.dimensions_in_condition(), &mDimensionsInCondition);
90    }
91
92    if (metric.links().size() > 0) {
93        for (const auto& link : metric.links()) {
94            Metric2Condition mc;
95            mc.conditionId = link.condition();
96            translateFieldMatcher(link.fields_in_what(), &mc.metricFields);
97            translateFieldMatcher(link.fields_in_condition(), &mc.conditionFields);
98            mMetric2ConditionLinks.push_back(mc);
99        }
100    }
101    mConditionSliced = (metric.links().size() > 0) || (mDimensionsInCondition.size() > 0);
102
103    // Kicks off the puller immediately.
104    if (mPullTagId != -1 && mSamplingType == GaugeMetric::RANDOM_ONE_SAMPLE) {
105        mStatsPullerManager->RegisterReceiver(mPullTagId, this, bucketSizeMills);
106    }
107
108    VLOG("metric %lld created. bucket size %lld start_time: %lld", (long long)metric.id(),
109         (long long)mBucketSizeNs, (long long)mStartTimeNs);
110}
111
112// for testing
113GaugeMetricProducer::GaugeMetricProducer(const ConfigKey& key, const GaugeMetric& metric,
114                                         const int conditionIndex,
115                                         const sp<ConditionWizard>& wizard, const int pullTagId,
116                                         const int64_t startTimeNs)
117    : GaugeMetricProducer(key, metric, conditionIndex, wizard, pullTagId, startTimeNs,
118                          make_shared<StatsPullerManager>()) {
119}
120
121GaugeMetricProducer::~GaugeMetricProducer() {
122    VLOG("~GaugeMetricProducer() called");
123    if (mPullTagId != -1) {
124        mStatsPullerManager->UnRegisterReceiver(mPullTagId, this);
125    }
126}
127
128void GaugeMetricProducer::onDumpReportLocked(const uint64_t dumpTimeNs,
129                                             ProtoOutputStream* protoOutput) {
130    VLOG("gauge metric %lld report now...", (long long)mMetricId);
131
132    flushIfNeededLocked(dumpTimeNs);
133    if (mPastBuckets.empty()) {
134        return;
135    }
136
137    protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_ID, (long long)mMetricId);
138    uint64_t protoToken = protoOutput->start(FIELD_TYPE_MESSAGE | FIELD_ID_GAUGE_METRICS);
139
140    for (const auto& pair : mPastBuckets) {
141        const MetricDimensionKey& dimensionKey = pair.first;
142
143        VLOG("  dimension key %s", dimensionKey.c_str());
144        uint64_t wrapperToken =
145                protoOutput->start(FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_DATA);
146
147        // First fill dimension.
148        uint64_t dimensionToken = protoOutput->start(
149                FIELD_TYPE_MESSAGE | FIELD_ID_DIMENSION_IN_WHAT);
150        writeDimensionToProto(dimensionKey.getDimensionKeyInWhat(), protoOutput);
151        protoOutput->end(dimensionToken);
152
153        if (dimensionKey.hasDimensionKeyInCondition()) {
154            uint64_t dimensionInConditionToken = protoOutput->start(
155                    FIELD_TYPE_MESSAGE | FIELD_ID_DIMENSION_IN_CONDITION);
156            writeDimensionToProto(dimensionKey.getDimensionKeyInCondition(), protoOutput);
157            protoOutput->end(dimensionInConditionToken);
158        }
159
160        // Then fill bucket_info (GaugeBucketInfo).
161        for (const auto& bucket : pair.second) {
162            uint64_t bucketInfoToken = protoOutput->start(
163                    FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_BUCKET_INFO);
164            protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_START_BUCKET_ELAPSED_NANOS,
165                               (long long)bucket.mBucketStartNs);
166            protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_END_BUCKET_ELAPSED_NANOS,
167                               (long long)bucket.mBucketEndNs);
168
169            if (!bucket.mGaugeAtoms.empty()) {
170                uint64_t atomsToken =
171                    protoOutput->start(FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_ATOM);
172                for (const auto& atom : bucket.mGaugeAtoms) {
173                    writeFieldValueTreeToStream(mTagId, *(atom.mFields), protoOutput);
174                }
175                protoOutput->end(atomsToken);
176                for (const auto& atom : bucket.mGaugeAtoms) {
177                    const bool truncateTimestamp =
178                        android::util::kNotTruncatingTimestampAtomWhiteList.find(mTagId) ==
179                        android::util::kNotTruncatingTimestampAtomWhiteList.end();
180                    int64_t timestampNs =  truncateTimestamp ?
181                        truncateTimestampNsToFiveMinutes(atom.mTimestamps) : atom.mTimestamps;
182                    protoOutput->write(
183                        FIELD_TYPE_INT64 | FIELD_COUNT_REPEATED | FIELD_ID_ELAPSED_ATOM_TIMESTAMP,
184                        (long long)timestampNs);
185                }
186            }
187            protoOutput->end(bucketInfoToken);
188            VLOG("\t bucket [%lld - %lld] includes %d atoms.", (long long)bucket.mBucketStartNs,
189                 (long long)bucket.mBucketEndNs, (int)bucket.mGaugeAtoms.size());
190        }
191        protoOutput->end(wrapperToken);
192    }
193    protoOutput->end(protoToken);
194
195    mPastBuckets.clear();
196    // TODO: Clear mDimensionKeyMap once the report is dumped.
197}
198
199void GaugeMetricProducer::pullLocked() {
200    vector<std::shared_ptr<LogEvent>> allData;
201    if (!mStatsPullerManager->Pull(mPullTagId, &allData)) {
202        ALOGE("Stats puller failed for tag: %d", mPullTagId);
203        return;
204    }
205    for (const auto& data : allData) {
206        onMatchedLogEventLocked(0, *data);
207    }
208}
209
210void GaugeMetricProducer::onConditionChangedLocked(const bool conditionMet,
211                                                   const uint64_t eventTime) {
212    VLOG("Metric %lld onConditionChanged", (long long)mMetricId);
213    flushIfNeededLocked(eventTime);
214    mCondition = conditionMet;
215
216    // Push mode. No need to proactively pull the gauge data.
217    if (mPullTagId == -1) {
218        return;
219    }
220
221    bool triggerPuller = false;
222    switch(mSamplingType) {
223        // When the metric wants to do random sampling and there is already one gauge atom for the
224        // current bucket, do not do it again.
225        case GaugeMetric::RANDOM_ONE_SAMPLE: {
226            triggerPuller = mCondition && mCurrentSlicedBucket->empty();
227            break;
228        }
229        case GaugeMetric::ALL_CONDITION_CHANGES: {
230            triggerPuller = true;
231            break;
232        }
233        default:
234            break;
235    }
236    if (!triggerPuller) {
237        return;
238    }
239
240    vector<std::shared_ptr<LogEvent>> allData;
241    if (!mStatsPullerManager->Pull(mPullTagId, &allData)) {
242        ALOGE("Stats puller failed for tag: %d", mPullTagId);
243        return;
244    }
245    for (const auto& data : allData) {
246        onMatchedLogEventLocked(0, *data);
247    }
248    flushIfNeededLocked(eventTime);
249}
250
251void GaugeMetricProducer::onSlicedConditionMayChangeLocked(const uint64_t eventTime) {
252    VLOG("Metric %lld onSlicedConditionMayChange", (long long)mMetricId);
253}
254
255std::shared_ptr<vector<FieldValue>> GaugeMetricProducer::getGaugeFields(const LogEvent& event) {
256    if (mFieldMatchers.size() > 0) {
257        std::shared_ptr<vector<FieldValue>> gaugeFields = std::make_shared<vector<FieldValue>>();
258        filterGaugeValues(mFieldMatchers, event.getValues(), gaugeFields.get());
259        return gaugeFields;
260    } else {
261        return std::make_shared<vector<FieldValue>>(event.getValues());
262    }
263}
264
265void GaugeMetricProducer::onDataPulled(const std::vector<std::shared_ptr<LogEvent>>& allData) {
266    std::lock_guard<std::mutex> lock(mMutex);
267    if (allData.size() == 0) {
268        return;
269    }
270    for (const auto& data : allData) {
271        onMatchedLogEventLocked(0, *data);
272    }
273}
274
275bool GaugeMetricProducer::hitGuardRailLocked(const MetricDimensionKey& newKey) {
276    if (mCurrentSlicedBucket->find(newKey) != mCurrentSlicedBucket->end()) {
277        return false;
278    }
279    // 1. Report the tuple count if the tuple count > soft limit
280    if (mCurrentSlicedBucket->size() > StatsdStats::kDimensionKeySizeSoftLimit - 1) {
281        size_t newTupleCount = mCurrentSlicedBucket->size() + 1;
282        StatsdStats::getInstance().noteMetricDimensionSize(mConfigKey, mMetricId, newTupleCount);
283        // 2. Don't add more tuples, we are above the allowed threshold. Drop the data.
284        if (newTupleCount > StatsdStats::kDimensionKeySizeHardLimit) {
285            ALOGE("GaugeMetric %lld dropping data for dimension key %s",
286                (long long)mMetricId, newKey.c_str());
287            return true;
288        }
289    }
290
291    return false;
292}
293
294void GaugeMetricProducer::onMatchedLogEventInternalLocked(
295        const size_t matcherIndex, const MetricDimensionKey& eventKey,
296        const ConditionKey& conditionKey, bool condition,
297        const LogEvent& event) {
298    if (condition == false) {
299        return;
300    }
301    uint64_t eventTimeNs = event.GetElapsedTimestampNs();
302    mTagId = event.GetTagId();
303    if (eventTimeNs < mCurrentBucketStartTimeNs) {
304        VLOG("Skip event due to late arrival: %lld vs %lld", (long long)eventTimeNs,
305             (long long)mCurrentBucketStartTimeNs);
306        return;
307    }
308    flushIfNeededLocked(eventTimeNs);
309
310    // When gauge metric wants to randomly sample the output atom, we just simply use the first
311    // gauge in the given bucket.
312    if (mCurrentSlicedBucket->find(eventKey) != mCurrentSlicedBucket->end() &&
313        mSamplingType == GaugeMetric::RANDOM_ONE_SAMPLE) {
314        return;
315    }
316    if (hitGuardRailLocked(eventKey)) {
317        return;
318    }
319    GaugeAtom gaugeAtom(getGaugeFields(event), eventTimeNs);
320    (*mCurrentSlicedBucket)[eventKey].push_back(gaugeAtom);
321    // Anomaly detection on gauge metric only works when there is one numeric
322    // field specified.
323    if (mAnomalyTrackers.size() > 0) {
324        if (gaugeAtom.mFields->size() == 1) {
325            const Value& value = gaugeAtom.mFields->begin()->mValue;
326            long gaugeVal = 0;
327            if (value.getType() == INT) {
328                gaugeVal = (long)value.int_value;
329            } else if (value.getType() == LONG) {
330                gaugeVal = value.long_value;
331            }
332            for (auto& tracker : mAnomalyTrackers) {
333                tracker->detectAndDeclareAnomaly(eventTimeNs, mCurrentBucketNum, eventKey,
334                                                 gaugeVal);
335            }
336        }
337    }
338}
339
340void GaugeMetricProducer::updateCurrentSlicedBucketForAnomaly() {
341    for (const auto& slice : *mCurrentSlicedBucket) {
342        if (slice.second.empty()) {
343            continue;
344        }
345        const Value& value = slice.second.front().mFields->front().mValue;
346        long gaugeVal = 0;
347        if (value.getType() == INT) {
348            gaugeVal = (long)value.int_value;
349        } else if (value.getType() == LONG) {
350            gaugeVal = value.long_value;
351        }
352        (*mCurrentSlicedBucketForAnomaly)[slice.first] = gaugeVal;
353    }
354}
355
356void GaugeMetricProducer::dropDataLocked(const uint64_t dropTimeNs) {
357    flushIfNeededLocked(dropTimeNs);
358    mPastBuckets.clear();
359}
360
361// When a new matched event comes in, we check if event falls into the current
362// bucket. If not, flush the old counter to past buckets and initialize the new
363// bucket.
364// if data is pushed, onMatchedLogEvent will only be called through onConditionChanged() inside
365// the GaugeMetricProducer while holding the lock.
366void GaugeMetricProducer::flushIfNeededLocked(const uint64_t& eventTimeNs) {
367    uint64_t currentBucketEndTimeNs = getCurrentBucketEndTimeNs();
368
369    if (eventTimeNs < currentBucketEndTimeNs) {
370        VLOG("eventTime is %lld, less than next bucket start time %lld", (long long)eventTimeNs,
371             (long long)(mCurrentBucketStartTimeNs + mBucketSizeNs));
372        return;
373    }
374
375    flushCurrentBucketLocked(eventTimeNs);
376
377    // Adjusts the bucket start and end times.
378    int64_t numBucketsForward = 1 + (eventTimeNs - currentBucketEndTimeNs) / mBucketSizeNs;
379    mCurrentBucketStartTimeNs = currentBucketEndTimeNs + (numBucketsForward - 1) * mBucketSizeNs;
380    mCurrentBucketNum += numBucketsForward;
381    VLOG("metric %lld: new bucket start time: %lld", (long long)mMetricId,
382         (long long)mCurrentBucketStartTimeNs);
383}
384
385void GaugeMetricProducer::flushCurrentBucketLocked(const uint64_t& eventTimeNs) {
386    uint64_t fullBucketEndTimeNs = getCurrentBucketEndTimeNs();
387
388    GaugeBucket info;
389    info.mBucketStartNs = mCurrentBucketStartTimeNs;
390    if (eventTimeNs < fullBucketEndTimeNs) {
391        info.mBucketEndNs = eventTimeNs;
392    } else {
393        info.mBucketEndNs = fullBucketEndTimeNs;
394    }
395    info.mBucketNum = mCurrentBucketNum;
396
397    for (const auto& slice : *mCurrentSlicedBucket) {
398        info.mGaugeAtoms = slice.second;
399        auto& bucketList = mPastBuckets[slice.first];
400        bucketList.push_back(info);
401        VLOG("gauge metric %lld, dump key value: %s", (long long)mMetricId, slice.first.c_str());
402    }
403
404    // If we have anomaly trackers, we need to update the partial bucket values.
405    if (mAnomalyTrackers.size() > 0) {
406        updateCurrentSlicedBucketForAnomaly();
407
408        if (eventTimeNs > fullBucketEndTimeNs) {
409            // This is known to be a full bucket, so send this data to the anomaly tracker.
410            for (auto& tracker : mAnomalyTrackers) {
411                tracker->addPastBucket(mCurrentSlicedBucketForAnomaly, mCurrentBucketNum);
412            }
413            mCurrentSlicedBucketForAnomaly = std::make_shared<DimToValMap>();
414        }
415    }
416
417    mCurrentSlicedBucket = std::make_shared<DimToGaugeAtomsMap>();
418}
419
420size_t GaugeMetricProducer::byteSizeLocked() const {
421    size_t totalSize = 0;
422    for (const auto& pair : mPastBuckets) {
423        totalSize += pair.second.size() * kBucketSize;
424    }
425    return totalSize;
426}
427
428}  // namespace statsd
429}  // namespace os
430}  // namespace android
431