GaugeMetricProducer.cpp revision 6189807c12e9cf32a36d32557725561c44b8aa5a
1/* 2* Copyright (C) 2017 The Android Open Source Project 3* 4* Licensed under the Apache License, Version 2.0 (the "License"); 5* you may not use this file except in compliance with the License. 6* You may obtain a copy of the License at 7* 8* http://www.apache.org/licenses/LICENSE-2.0 9* 10* Unless required by applicable law or agreed to in writing, software 11* distributed under the License is distributed on an "AS IS" BASIS, 12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13* See the License for the specific language governing permissions and 14* limitations under the License. 15*/ 16 17#define DEBUG false // STOPSHIP if true 18#include "Log.h" 19 20#include "GaugeMetricProducer.h" 21#include "guardrail/StatsdStats.h" 22#include "stats_log_util.h" 23 24#include <cutils/log.h> 25 26using android::util::FIELD_COUNT_REPEATED; 27using android::util::FIELD_TYPE_BOOL; 28using android::util::FIELD_TYPE_FLOAT; 29using android::util::FIELD_TYPE_INT32; 30using android::util::FIELD_TYPE_INT64; 31using android::util::FIELD_TYPE_MESSAGE; 32using android::util::FIELD_TYPE_STRING; 33using android::util::ProtoOutputStream; 34using std::map; 35using std::string; 36using std::unordered_map; 37using std::vector; 38using std::make_shared; 39using std::shared_ptr; 40 41namespace android { 42namespace os { 43namespace statsd { 44 45// for StatsLogReport 46const int FIELD_ID_ID = 1; 47const int FIELD_ID_GAUGE_METRICS = 8; 48// for GaugeMetricDataWrapper 49const int FIELD_ID_DATA = 1; 50// for GaugeMetricData 51const int FIELD_ID_DIMENSION_IN_WHAT = 1; 52const int FIELD_ID_DIMENSION_IN_CONDITION = 2; 53const int FIELD_ID_BUCKET_INFO = 3; 54// for GaugeBucketInfo 55const int FIELD_ID_START_BUCKET_ELAPSED_NANOS = 1; 56const int FIELD_ID_END_BUCKET_ELAPSED_NANOS = 2; 57const int FIELD_ID_ATOM = 3; 58const int FIELD_ID_ELAPSED_ATOM_TIMESTAMP = 4; 59 60GaugeMetricProducer::GaugeMetricProducer(const ConfigKey& key, const GaugeMetric& metric, 61 const int conditionIndex, 62 const sp<ConditionWizard>& wizard, 63 const int pullTagId, const uint64_t startTimeNs, 64 shared_ptr<StatsPullerManager> statsPullerManager) 65 : MetricProducer(metric.id(), key, startTimeNs, conditionIndex, wizard), 66 mStatsPullerManager(statsPullerManager), 67 mPullTagId(pullTagId) { 68 mCurrentSlicedBucket = std::make_shared<DimToGaugeAtomsMap>(); 69 mCurrentSlicedBucketForAnomaly = std::make_shared<DimToValMap>(); 70 int64_t bucketSizeMills = 0; 71 if (metric.has_bucket()) { 72 bucketSizeMills = TimeUnitToBucketSizeInMillis(metric.bucket()); 73 } else { 74 bucketSizeMills = TimeUnitToBucketSizeInMillis(ONE_HOUR); 75 } 76 mBucketSizeNs = bucketSizeMills * 1000000; 77 78 mSamplingType = metric.sampling_type(); 79 if (!metric.gauge_fields_filter().include_all()) { 80 translateFieldMatcher(metric.gauge_fields_filter().fields(), &mFieldMatchers); 81 } 82 83 // TODO: use UidMap if uid->pkg_name is required 84 if (metric.has_dimensions_in_what()) { 85 translateFieldMatcher(metric.dimensions_in_what(), &mDimensionsInWhat); 86 } 87 88 if (metric.has_dimensions_in_condition()) { 89 translateFieldMatcher(metric.dimensions_in_condition(), &mDimensionsInCondition); 90 } 91 92 if (metric.links().size() > 0) { 93 for (const auto& link : metric.links()) { 94 Metric2Condition mc; 95 mc.conditionId = link.condition(); 96 translateFieldMatcher(link.fields_in_what(), &mc.metricFields); 97 translateFieldMatcher(link.fields_in_condition(), &mc.conditionFields); 98 mMetric2ConditionLinks.push_back(mc); 99 } 100 } 101 mConditionSliced = (metric.links().size() > 0) || (mDimensionsInCondition.size() > 0); 102 103 // Kicks off the puller immediately. 104 if (mPullTagId != -1 && mSamplingType == GaugeMetric::RANDOM_ONE_SAMPLE) { 105 mStatsPullerManager->RegisterReceiver(mPullTagId, this, bucketSizeMills); 106 } 107 108 VLOG("metric %lld created. bucket size %lld start_time: %lld", (long long)metric.id(), 109 (long long)mBucketSizeNs, (long long)mStartTimeNs); 110} 111 112// for testing 113GaugeMetricProducer::GaugeMetricProducer(const ConfigKey& key, const GaugeMetric& metric, 114 const int conditionIndex, 115 const sp<ConditionWizard>& wizard, const int pullTagId, 116 const int64_t startTimeNs) 117 : GaugeMetricProducer(key, metric, conditionIndex, wizard, pullTagId, startTimeNs, 118 make_shared<StatsPullerManager>()) { 119} 120 121GaugeMetricProducer::~GaugeMetricProducer() { 122 VLOG("~GaugeMetricProducer() called"); 123 if (mPullTagId != -1) { 124 mStatsPullerManager->UnRegisterReceiver(mPullTagId, this); 125 } 126} 127 128void GaugeMetricProducer::onDumpReportLocked(const uint64_t dumpTimeNs, 129 ProtoOutputStream* protoOutput) { 130 VLOG("gauge metric %lld report now...", (long long)mMetricId); 131 132 flushIfNeededLocked(dumpTimeNs); 133 if (mPastBuckets.empty()) { 134 return; 135 } 136 137 protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_ID, (long long)mMetricId); 138 long long protoToken = protoOutput->start(FIELD_TYPE_MESSAGE | FIELD_ID_GAUGE_METRICS); 139 140 for (const auto& pair : mPastBuckets) { 141 const MetricDimensionKey& dimensionKey = pair.first; 142 143 VLOG(" dimension key %s", dimensionKey.c_str()); 144 long long wrapperToken = 145 protoOutput->start(FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_DATA); 146 147 // First fill dimension. 148 long long dimensionToken = protoOutput->start( 149 FIELD_TYPE_MESSAGE | FIELD_ID_DIMENSION_IN_WHAT); 150 writeDimensionToProto(dimensionKey.getDimensionKeyInWhat(), protoOutput); 151 protoOutput->end(dimensionToken); 152 153 if (dimensionKey.hasDimensionKeyInCondition()) { 154 long long dimensionInConditionToken = protoOutput->start( 155 FIELD_TYPE_MESSAGE | FIELD_ID_DIMENSION_IN_CONDITION); 156 writeDimensionToProto(dimensionKey.getDimensionKeyInCondition(), protoOutput); 157 protoOutput->end(dimensionInConditionToken); 158 } 159 160 // Then fill bucket_info (GaugeBucketInfo). 161 for (const auto& bucket : pair.second) { 162 long long bucketInfoToken = protoOutput->start( 163 FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_BUCKET_INFO); 164 protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_START_BUCKET_ELAPSED_NANOS, 165 (long long)bucket.mBucketStartNs); 166 protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_END_BUCKET_ELAPSED_NANOS, 167 (long long)bucket.mBucketEndNs); 168 169 if (!bucket.mGaugeAtoms.empty()) { 170 long long atomsToken = 171 protoOutput->start(FIELD_TYPE_MESSAGE | FIELD_COUNT_REPEATED | FIELD_ID_ATOM); 172 for (const auto& atom : bucket.mGaugeAtoms) { 173 writeFieldValueTreeToStream(mTagId, *(atom.mFields), protoOutput); 174 } 175 protoOutput->end(atomsToken); 176 177 for (const auto& atom : bucket.mGaugeAtoms) { 178 protoOutput->write( 179 FIELD_TYPE_INT64 | FIELD_COUNT_REPEATED | FIELD_ID_ELAPSED_ATOM_TIMESTAMP, 180 (long long)atom.mTimestamps); 181 } 182 } 183 protoOutput->end(bucketInfoToken); 184 VLOG("\t bucket [%lld - %lld] includes %d atoms.", (long long)bucket.mBucketStartNs, 185 (long long)bucket.mBucketEndNs, (int)bucket.mGaugeAtoms.size()); 186 } 187 protoOutput->end(wrapperToken); 188 } 189 protoOutput->end(protoToken); 190 191 mPastBuckets.clear(); 192 // TODO: Clear mDimensionKeyMap once the report is dumped. 193} 194 195void GaugeMetricProducer::pullLocked() { 196 vector<std::shared_ptr<LogEvent>> allData; 197 if (!mStatsPullerManager->Pull(mPullTagId, &allData)) { 198 ALOGE("Stats puller failed for tag: %d", mPullTagId); 199 return; 200 } 201 for (const auto& data : allData) { 202 onMatchedLogEventLocked(0, *data); 203 } 204} 205 206void GaugeMetricProducer::onConditionChangedLocked(const bool conditionMet, 207 const uint64_t eventTime) { 208 VLOG("Metric %lld onConditionChanged", (long long)mMetricId); 209 flushIfNeededLocked(eventTime); 210 mCondition = conditionMet; 211 212 // Push mode. No need to proactively pull the gauge data. 213 if (mPullTagId == -1) { 214 return; 215 } 216 217 bool triggerPuller = false; 218 switch(mSamplingType) { 219 // When the metric wants to do random sampling and there is already one gauge atom for the 220 // current bucket, do not do it again. 221 case GaugeMetric::RANDOM_ONE_SAMPLE: { 222 triggerPuller = mCondition && mCurrentSlicedBucket->empty(); 223 break; 224 } 225 case GaugeMetric::ALL_CONDITION_CHANGES: { 226 triggerPuller = true; 227 break; 228 } 229 default: 230 break; 231 } 232 if (!triggerPuller) { 233 return; 234 } 235 236 vector<std::shared_ptr<LogEvent>> allData; 237 if (!mStatsPullerManager->Pull(mPullTagId, &allData)) { 238 ALOGE("Stats puller failed for tag: %d", mPullTagId); 239 return; 240 } 241 for (const auto& data : allData) { 242 onMatchedLogEventLocked(0, *data); 243 } 244 flushIfNeededLocked(eventTime); 245} 246 247void GaugeMetricProducer::onSlicedConditionMayChangeLocked(const uint64_t eventTime) { 248 VLOG("Metric %lld onSlicedConditionMayChange", (long long)mMetricId); 249} 250 251std::shared_ptr<vector<FieldValue>> GaugeMetricProducer::getGaugeFields(const LogEvent& event) { 252 if (mFieldMatchers.size() > 0) { 253 std::shared_ptr<vector<FieldValue>> gaugeFields = std::make_shared<vector<FieldValue>>(); 254 filterGaugeValues(mFieldMatchers, event.getValues(), gaugeFields.get()); 255 return gaugeFields; 256 } else { 257 return std::make_shared<vector<FieldValue>>(event.getValues()); 258 } 259} 260 261void GaugeMetricProducer::onDataPulled(const std::vector<std::shared_ptr<LogEvent>>& allData) { 262 std::lock_guard<std::mutex> lock(mMutex); 263 if (allData.size() == 0) { 264 return; 265 } 266 for (const auto& data : allData) { 267 onMatchedLogEventLocked(0, *data); 268 } 269} 270 271bool GaugeMetricProducer::hitGuardRailLocked(const MetricDimensionKey& newKey) { 272 if (mCurrentSlicedBucket->find(newKey) != mCurrentSlicedBucket->end()) { 273 return false; 274 } 275 // 1. Report the tuple count if the tuple count > soft limit 276 if (mCurrentSlicedBucket->size() > StatsdStats::kDimensionKeySizeSoftLimit - 1) { 277 size_t newTupleCount = mCurrentSlicedBucket->size() + 1; 278 StatsdStats::getInstance().noteMetricDimensionSize(mConfigKey, mMetricId, newTupleCount); 279 // 2. Don't add more tuples, we are above the allowed threshold. Drop the data. 280 if (newTupleCount > StatsdStats::kDimensionKeySizeHardLimit) { 281 ALOGE("GaugeMetric %lld dropping data for dimension key %s", 282 (long long)mMetricId, newKey.c_str()); 283 return true; 284 } 285 } 286 287 return false; 288} 289 290void GaugeMetricProducer::onMatchedLogEventInternalLocked( 291 const size_t matcherIndex, const MetricDimensionKey& eventKey, 292 const ConditionKey& conditionKey, bool condition, 293 const LogEvent& event) { 294 if (condition == false) { 295 return; 296 } 297 uint64_t eventTimeNs = event.GetElapsedTimestampNs(); 298 mTagId = event.GetTagId(); 299 if (eventTimeNs < mCurrentBucketStartTimeNs) { 300 VLOG("Skip event due to late arrival: %lld vs %lld", (long long)eventTimeNs, 301 (long long)mCurrentBucketStartTimeNs); 302 return; 303 } 304 flushIfNeededLocked(eventTimeNs); 305 306 // When gauge metric wants to randomly sample the output atom, we just simply use the first 307 // gauge in the given bucket. 308 if (mCurrentSlicedBucket->find(eventKey) != mCurrentSlicedBucket->end() && 309 mSamplingType == GaugeMetric::RANDOM_ONE_SAMPLE) { 310 return; 311 } 312 if (hitGuardRailLocked(eventKey)) { 313 return; 314 } 315 GaugeAtom gaugeAtom(getGaugeFields(event), eventTimeNs); 316 (*mCurrentSlicedBucket)[eventKey].push_back(gaugeAtom); 317 // Anomaly detection on gauge metric only works when there is one numeric 318 // field specified. 319 if (mAnomalyTrackers.size() > 0) { 320 if (gaugeAtom.mFields->size() == 1) { 321 const Value& value = gaugeAtom.mFields->begin()->mValue; 322 long gaugeVal = 0; 323 if (value.getType() == INT) { 324 gaugeVal = (long)value.int_value; 325 } else if (value.getType() == LONG) { 326 gaugeVal = value.long_value; 327 } 328 for (auto& tracker : mAnomalyTrackers) { 329 tracker->detectAndDeclareAnomaly(eventTimeNs, mCurrentBucketNum, eventKey, 330 gaugeVal); 331 } 332 } 333 } 334} 335 336void GaugeMetricProducer::updateCurrentSlicedBucketForAnomaly() { 337 for (const auto& slice : *mCurrentSlicedBucket) { 338 if (slice.second.empty()) { 339 continue; 340 } 341 const Value& value = slice.second.front().mFields->front().mValue; 342 long gaugeVal = 0; 343 if (value.getType() == INT) { 344 gaugeVal = (long)value.int_value; 345 } else if (value.getType() == LONG) { 346 gaugeVal = value.long_value; 347 } 348 (*mCurrentSlicedBucketForAnomaly)[slice.first] = gaugeVal; 349 } 350} 351 352// When a new matched event comes in, we check if event falls into the current 353// bucket. If not, flush the old counter to past buckets and initialize the new 354// bucket. 355// if data is pushed, onMatchedLogEvent will only be called through onConditionChanged() inside 356// the GaugeMetricProducer while holding the lock. 357void GaugeMetricProducer::flushIfNeededLocked(const uint64_t& eventTimeNs) { 358 uint64_t currentBucketEndTimeNs = getCurrentBucketEndTimeNs(); 359 360 if (eventTimeNs < currentBucketEndTimeNs) { 361 VLOG("eventTime is %lld, less than next bucket start time %lld", (long long)eventTimeNs, 362 (long long)(mCurrentBucketStartTimeNs + mBucketSizeNs)); 363 return; 364 } 365 366 flushCurrentBucketLocked(eventTimeNs); 367 368 // Adjusts the bucket start and end times. 369 int64_t numBucketsForward = 1 + (eventTimeNs - currentBucketEndTimeNs) / mBucketSizeNs; 370 mCurrentBucketStartTimeNs = currentBucketEndTimeNs + (numBucketsForward - 1) * mBucketSizeNs; 371 mCurrentBucketNum += numBucketsForward; 372 VLOG("metric %lld: new bucket start time: %lld", (long long)mMetricId, 373 (long long)mCurrentBucketStartTimeNs); 374} 375 376void GaugeMetricProducer::flushCurrentBucketLocked(const uint64_t& eventTimeNs) { 377 uint64_t fullBucketEndTimeNs = getCurrentBucketEndTimeNs(); 378 379 GaugeBucket info; 380 info.mBucketStartNs = mCurrentBucketStartTimeNs; 381 if (eventTimeNs < fullBucketEndTimeNs) { 382 info.mBucketEndNs = eventTimeNs; 383 } else { 384 info.mBucketEndNs = fullBucketEndTimeNs; 385 } 386 info.mBucketNum = mCurrentBucketNum; 387 388 for (const auto& slice : *mCurrentSlicedBucket) { 389 info.mGaugeAtoms = slice.second; 390 auto& bucketList = mPastBuckets[slice.first]; 391 bucketList.push_back(info); 392 VLOG("gauge metric %lld, dump key value: %s", (long long)mMetricId, slice.first.c_str()); 393 } 394 395 // If we have anomaly trackers, we need to update the partial bucket values. 396 if (mAnomalyTrackers.size() > 0) { 397 updateCurrentSlicedBucketForAnomaly(); 398 399 if (eventTimeNs > fullBucketEndTimeNs) { 400 // This is known to be a full bucket, so send this data to the anomaly tracker. 401 for (auto& tracker : mAnomalyTrackers) { 402 tracker->addPastBucket(mCurrentSlicedBucketForAnomaly, mCurrentBucketNum); 403 } 404 mCurrentSlicedBucketForAnomaly = std::make_shared<DimToValMap>(); 405 } 406 } 407 408 mCurrentSlicedBucket = std::make_shared<DimToGaugeAtomsMap>(); 409} 410 411size_t GaugeMetricProducer::byteSizeLocked() const { 412 size_t totalSize = 0; 413 for (const auto& pair : mPastBuckets) { 414 totalSize += pair.second.size() * kBucketSize; 415 } 416 return totalSize; 417} 418 419} // namespace statsd 420} // namespace os 421} // namespace android 422