1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/tracked_objects.h"
6
7#include <ctype.h>
8#include <limits.h>
9#include <stdlib.h>
10
11#include "base/atomicops.h"
12#include "base/base_switches.h"
13#include "base/command_line.h"
14#include "base/compiler_specific.h"
15#include "base/debug/leak_annotations.h"
16#include "base/logging.h"
17#include "base/metrics/histogram_macros.h"
18#include "base/numerics/safe_conversions.h"
19#include "base/numerics/safe_math.h"
20#include "base/process/process_handle.h"
21#include "base/third_party/valgrind/memcheck.h"
22#include "base/threading/worker_pool.h"
23#include "base/tracking_info.h"
24#include "build/build_config.h"
25
26using base::TimeDelta;
27
28namespace base {
29class TimeDelta;
30}
31
32namespace tracked_objects {
33
34namespace {
35
36constexpr char kWorkerThreadSanitizedName[] = "WorkerThread-*";
37
38// When ThreadData is first initialized, should we start in an ACTIVE state to
39// record all of the startup-time tasks, or should we start up DEACTIVATED, so
40// that we only record after parsing the command line flag --enable-tracking.
41// Note that the flag may force either state, so this really controls only the
42// period of time up until that flag is parsed.  If there is no flag seen, then
43// this state may prevail for much or all of the process lifetime.
44const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
45
46// Possible states of the profiler timing enabledness.
47enum {
48  UNDEFINED_TIMING,
49  ENABLED_TIMING,
50  DISABLED_TIMING,
51};
52
53// State of the profiler timing enabledness.
54base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
55
56// Returns whether profiler timing is enabled.  The default is true, but this
57// may be overridden by a command-line flag.  Some platforms may
58// programmatically set this command-line flag to the "off" value if it's not
59// specified.
60// This in turn can be overridden by explicitly calling
61// ThreadData::EnableProfilerTiming, say, based on a field trial.
62inline bool IsProfilerTimingEnabled() {
63  // Reading |g_profiler_timing_enabled| is done without barrier because
64  // multiple initialization is not an issue while the barrier can be relatively
65  // costly given that this method is sometimes called in a tight loop.
66  base::subtle::Atomic32 current_timing_enabled =
67      base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
68  if (current_timing_enabled == UNDEFINED_TIMING) {
69    if (!base::CommandLine::InitializedForCurrentProcess())
70      return true;
71    current_timing_enabled =
72        (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
73             switches::kProfilerTiming) ==
74         switches::kProfilerTimingDisabledValue)
75            ? DISABLED_TIMING
76            : ENABLED_TIMING;
77    base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
78                                  current_timing_enabled);
79  }
80  return current_timing_enabled == ENABLED_TIMING;
81}
82
83// Sanitize a thread name by replacing trailing sequence of digits with "*".
84// Examples:
85// 1. "BrowserBlockingWorker1/23857" => "BrowserBlockingWorker1/*"
86// 2. "Chrome_IOThread" => "Chrome_IOThread"
87std::string SanitizeThreadName(const std::string& thread_name) {
88  size_t i = thread_name.length();
89
90  while (i > 0 && isdigit(thread_name[i - 1]))
91    --i;
92
93  if (i == thread_name.length())
94    return thread_name;
95
96  return thread_name.substr(0, i) + '*';
97}
98
99}  // namespace
100
101//------------------------------------------------------------------------------
102// DeathData tallies durations when a death takes place.
103
104DeathData::DeathData()
105    : count_(0),
106      sample_probability_count_(0),
107      run_duration_sum_(0),
108      queue_duration_sum_(0),
109      run_duration_max_(0),
110      queue_duration_max_(0),
111      alloc_ops_(0),
112      free_ops_(0),
113      allocated_bytes_(0),
114      freed_bytes_(0),
115      alloc_overhead_bytes_(0),
116      max_allocated_bytes_(0),
117      run_duration_sample_(0),
118      queue_duration_sample_(0),
119      last_phase_snapshot_(nullptr) {}
120
121DeathData::DeathData(const DeathData& other)
122    : count_(other.count_),
123      sample_probability_count_(other.sample_probability_count_),
124      run_duration_sum_(other.run_duration_sum_),
125      queue_duration_sum_(other.queue_duration_sum_),
126      run_duration_max_(other.run_duration_max_),
127      queue_duration_max_(other.queue_duration_max_),
128      alloc_ops_(other.alloc_ops_),
129      free_ops_(other.free_ops_),
130      allocated_bytes_(other.allocated_bytes_),
131      freed_bytes_(other.freed_bytes_),
132      alloc_overhead_bytes_(other.alloc_overhead_bytes_),
133      max_allocated_bytes_(other.max_allocated_bytes_),
134      run_duration_sample_(other.run_duration_sample_),
135      queue_duration_sample_(other.queue_duration_sample_),
136      last_phase_snapshot_(nullptr) {
137  // This constructor will be used by std::map when adding new DeathData values
138  // to the map.  At that point, last_phase_snapshot_ is still NULL, so we don't
139  // need to worry about ownership transfer.
140  DCHECK(other.last_phase_snapshot_ == nullptr);
141}
142
143DeathData::~DeathData() {
144  while (last_phase_snapshot_) {
145    const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
146    last_phase_snapshot_ = snapshot->prev;
147    delete snapshot;
148  }
149}
150
151// TODO(jar): I need to see if this macro to optimize branching is worth using.
152//
153// This macro has no branching, so it is surely fast, and is equivalent to:
154//             if (assign_it)
155//               target = source;
156// We use a macro rather than a template to force this to inline.
157// Related code for calculating max is discussed on the web.
158#define CONDITIONAL_ASSIGN(assign_it, target, source) \
159  ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it))
160
161void DeathData::RecordDurations(const int32_t queue_duration,
162                                const int32_t run_duration,
163                                const uint32_t random_number) {
164  // We'll just clamp at INT_MAX, but we should note this in the UI as such.
165  if (count_ < INT_MAX)
166    base::subtle::NoBarrier_Store(&count_, count_ + 1);
167
168  int sample_probability_count =
169      base::subtle::NoBarrier_Load(&sample_probability_count_);
170  if (sample_probability_count < INT_MAX)
171    ++sample_probability_count;
172  base::subtle::NoBarrier_Store(&sample_probability_count_,
173                                sample_probability_count);
174
175  base::subtle::NoBarrier_Store(&queue_duration_sum_,
176                                queue_duration_sum_ + queue_duration);
177  base::subtle::NoBarrier_Store(&run_duration_sum_,
178                                run_duration_sum_ + run_duration);
179
180  if (queue_duration_max() < queue_duration)
181    base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration);
182  if (run_duration_max() < run_duration)
183    base::subtle::NoBarrier_Store(&run_duration_max_, run_duration);
184
185  // Take a uniformly distributed sample over all durations ever supplied during
186  // the current profiling phase.
187  // The probability that we (instead) use this new sample is
188  // 1/sample_probability_count_. This results in a completely uniform selection
189  // of the sample (at least when we don't clamp sample_probability_count_...
190  // but that should be inconsequentially likely).  We ignore the fact that we
191  // correlated our selection of a sample to the run and queue times (i.e., we
192  // used them to generate random_number).
193  CHECK_GT(sample_probability_count, 0);
194  if (0 == (random_number % sample_probability_count)) {
195    base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration);
196    base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration);
197  }
198}
199
200void DeathData::RecordAllocations(const uint32_t alloc_ops,
201                                  const uint32_t free_ops,
202                                  const uint32_t allocated_bytes,
203                                  const uint32_t freed_bytes,
204                                  const uint32_t alloc_overhead_bytes,
205                                  const uint32_t max_allocated_bytes) {
206  // Use saturating arithmetic.
207  SaturatingMemberAdd(alloc_ops, &alloc_ops_);
208  SaturatingMemberAdd(free_ops, &free_ops_);
209  SaturatingMemberAdd(allocated_bytes, &allocated_bytes_);
210  SaturatingMemberAdd(freed_bytes, &freed_bytes_);
211  SaturatingMemberAdd(alloc_overhead_bytes, &alloc_overhead_bytes_);
212
213  int32_t max = base::saturated_cast<int32_t>(max_allocated_bytes);
214  if (max > max_allocated_bytes_)
215    base::subtle::NoBarrier_Store(&max_allocated_bytes_, max);
216}
217
218void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
219  // Snapshotting and storing current state.
220  last_phase_snapshot_ =
221      new DeathDataPhaseSnapshot(profiling_phase, *this, last_phase_snapshot_);
222
223  // Not touching fields for which a delta can be computed by comparing with a
224  // snapshot from the previous phase. Resetting other fields.  Sample values
225  // will be reset upon next death recording because sample_probability_count_
226  // is set to 0.
227  // We avoid resetting to 0 in favor of deltas whenever possible.  The reason
228  // is that for incrementable fields, resetting to 0 from the snapshot thread
229  // potentially in parallel with incrementing in the death thread may result in
230  // significant data corruption that has a potential to grow with time.  Not
231  // resetting incrementable fields and using deltas will cause any
232  // off-by-little corruptions to be likely fixed at the next snapshot.
233  // The max values are not incrementable, and cannot be deduced using deltas
234  // for a given phase. Hence, we have to reset them to 0.  But the potential
235  // damage is limited to getting the previous phase's max to apply for the next
236  // phase, and the error doesn't have a potential to keep growing with new
237  // resets.
238  // sample_probability_count_ is incrementable, but must be reset to 0 at the
239  // phase end, so that we start a new uniformly randomized sample selection
240  // after the reset. These fields are updated using atomics. However, race
241  // conditions are possible since these are updated individually and not
242  // together atomically, resulting in the values being mutually inconsistent.
243  // The damage is limited to selecting a wrong sample, which is not something
244  // that can cause accumulating or cascading effects.
245  // If there were no inconsistencies caused by race conditions, we never send a
246  // sample for the previous phase in the next phase's snapshot because
247  // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
248  base::subtle::NoBarrier_Store(&sample_probability_count_, 0);
249  base::subtle::NoBarrier_Store(&run_duration_max_, 0);
250  base::subtle::NoBarrier_Store(&queue_duration_max_, 0);
251}
252
253void DeathData::SaturatingMemberAdd(const uint32_t addend,
254                                    base::subtle::Atomic32* sum) {
255  // Bail quick if no work or already saturated.
256  if (addend == 0U || *sum == INT_MAX)
257    return;
258
259  base::CheckedNumeric<int32_t> new_sum = *sum;
260  new_sum += addend;
261  base::subtle::NoBarrier_Store(sum, new_sum.ValueOrDefault(INT_MAX));
262}
263
264//------------------------------------------------------------------------------
265DeathDataSnapshot::DeathDataSnapshot()
266    : count(-1),
267      run_duration_sum(-1),
268      run_duration_max(-1),
269      run_duration_sample(-1),
270      queue_duration_sum(-1),
271      queue_duration_max(-1),
272      queue_duration_sample(-1),
273      alloc_ops(-1),
274      free_ops(-1),
275      allocated_bytes(-1),
276      freed_bytes(-1),
277      alloc_overhead_bytes(-1),
278      max_allocated_bytes(-1) {}
279
280DeathDataSnapshot::DeathDataSnapshot(int count,
281                                     int32_t run_duration_sum,
282                                     int32_t run_duration_max,
283                                     int32_t run_duration_sample,
284                                     int32_t queue_duration_sum,
285                                     int32_t queue_duration_max,
286                                     int32_t queue_duration_sample,
287                                     int32_t alloc_ops,
288                                     int32_t free_ops,
289                                     int32_t allocated_bytes,
290                                     int32_t freed_bytes,
291                                     int32_t alloc_overhead_bytes,
292                                     int32_t max_allocated_bytes)
293    : count(count),
294      run_duration_sum(run_duration_sum),
295      run_duration_max(run_duration_max),
296      run_duration_sample(run_duration_sample),
297      queue_duration_sum(queue_duration_sum),
298      queue_duration_max(queue_duration_max),
299      queue_duration_sample(queue_duration_sample),
300      alloc_ops(alloc_ops),
301      free_ops(free_ops),
302      allocated_bytes(allocated_bytes),
303      freed_bytes(freed_bytes),
304      alloc_overhead_bytes(alloc_overhead_bytes),
305      max_allocated_bytes(max_allocated_bytes) {}
306
307DeathDataSnapshot::DeathDataSnapshot(const DeathData& death_data)
308    : count(death_data.count()),
309      run_duration_sum(death_data.run_duration_sum()),
310      run_duration_max(death_data.run_duration_max()),
311      run_duration_sample(death_data.run_duration_sample()),
312      queue_duration_sum(death_data.queue_duration_sum()),
313      queue_duration_max(death_data.queue_duration_max()),
314      queue_duration_sample(death_data.queue_duration_sample()),
315      alloc_ops(death_data.alloc_ops()),
316      free_ops(death_data.free_ops()),
317      allocated_bytes(death_data.allocated_bytes()),
318      freed_bytes(death_data.freed_bytes()),
319      alloc_overhead_bytes(death_data.alloc_overhead_bytes()),
320      max_allocated_bytes(death_data.max_allocated_bytes()) {}
321
322DeathDataSnapshot::DeathDataSnapshot(const DeathDataSnapshot& death_data) =
323    default;
324
325DeathDataSnapshot::~DeathDataSnapshot() {
326}
327
328DeathDataSnapshot DeathDataSnapshot::Delta(
329    const DeathDataSnapshot& older) const {
330  return DeathDataSnapshot(
331      count - older.count, run_duration_sum - older.run_duration_sum,
332      run_duration_max, run_duration_sample,
333      queue_duration_sum - older.queue_duration_sum, queue_duration_max,
334      queue_duration_sample, alloc_ops - older.alloc_ops,
335      free_ops - older.free_ops, allocated_bytes - older.allocated_bytes,
336      freed_bytes - older.freed_bytes,
337      alloc_overhead_bytes - older.alloc_overhead_bytes, max_allocated_bytes);
338}
339
340//------------------------------------------------------------------------------
341BirthOnThread::BirthOnThread(const Location& location,
342                             const ThreadData& current)
343    : location_(location),
344      birth_thread_(&current) {
345}
346
347//------------------------------------------------------------------------------
348BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
349}
350
351BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
352    : location(birth.location()),
353      sanitized_thread_name(birth.birth_thread()->sanitized_thread_name()) {}
354
355BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
356}
357
358//------------------------------------------------------------------------------
359Births::Births(const Location& location, const ThreadData& current)
360    : BirthOnThread(location, current),
361      birth_count_(1) { }
362
363int Births::birth_count() const { return birth_count_; }
364
365void Births::RecordBirth() { ++birth_count_; }
366
367//------------------------------------------------------------------------------
368// ThreadData maintains the central data for all births and deaths on a single
369// thread.
370
371// TODO(jar): We should pull all these static vars together, into a struct, and
372// optimize layout so that we benefit from locality of reference during accesses
373// to them.
374
375// static
376ThreadData::NowFunction* ThreadData::now_function_for_testing_ = NULL;
377
378// A TLS slot which points to the ThreadData instance for the current thread.
379// We do a fake initialization here (zeroing out data), and then the real
380// in-place construction happens when we call tls_index_.Initialize().
381// static
382base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
383
384// static
385int ThreadData::cleanup_count_ = 0;
386
387// static
388int ThreadData::incarnation_counter_ = 0;
389
390// static
391ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
392
393// static
394ThreadData* ThreadData::first_retired_thread_data_ = NULL;
395
396// static
397base::LazyInstance<base::Lock>::Leaky
398    ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
399
400// static
401base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED;
402
403ThreadData::ThreadData(const std::string& sanitized_thread_name)
404    : next_(NULL),
405      next_retired_thread_data_(NULL),
406      sanitized_thread_name_(sanitized_thread_name),
407      incarnation_count_for_pool_(-1),
408      current_stopwatch_(NULL) {
409  DCHECK(sanitized_thread_name_.empty() ||
410         !isdigit(sanitized_thread_name_.back()));
411  PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
412}
413
414ThreadData::~ThreadData() {
415}
416
417void ThreadData::PushToHeadOfList() {
418  // Toss in a hint of randomness (atop the uniniitalized value).
419  (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
420                                                 sizeof(random_number_));
421  MSAN_UNPOISON(&random_number_, sizeof(random_number_));
422  random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0));
423  random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
424
425  DCHECK(!next_);
426  base::AutoLock lock(*list_lock_.Pointer());
427  incarnation_count_for_pool_ = incarnation_counter_;
428  next_ = all_thread_data_list_head_;
429  all_thread_data_list_head_ = this;
430}
431
432// static
433ThreadData* ThreadData::first() {
434  base::AutoLock lock(*list_lock_.Pointer());
435  return all_thread_data_list_head_;
436}
437
438ThreadData* ThreadData::next() const { return next_; }
439
440// static
441void ThreadData::InitializeThreadContext(const std::string& thread_name) {
442  if (base::WorkerPool::RunsTasksOnCurrentThread())
443    return;
444  DCHECK_NE(thread_name, kWorkerThreadSanitizedName);
445  EnsureTlsInitialization();
446  ThreadData* current_thread_data =
447      reinterpret_cast<ThreadData*>(tls_index_.Get());
448  if (current_thread_data)
449    return;  // Browser tests instigate this.
450  current_thread_data =
451      GetRetiredOrCreateThreadData(SanitizeThreadName(thread_name));
452  tls_index_.Set(current_thread_data);
453}
454
455// static
456ThreadData* ThreadData::Get() {
457  if (!tls_index_.initialized())
458    return NULL;  // For unittests only.
459  ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
460  if (registered)
461    return registered;
462
463  // We must be a worker thread, since we didn't pre-register.
464  ThreadData* worker_thread_data =
465      GetRetiredOrCreateThreadData(kWorkerThreadSanitizedName);
466  tls_index_.Set(worker_thread_data);
467  return worker_thread_data;
468}
469
470// static
471void ThreadData::OnThreadTermination(void* thread_data) {
472  DCHECK(thread_data);  // TLS should *never* call us with a NULL.
473  // We must NOT do any allocations during this callback.  There is a chance
474  // that the allocator is no longer active on this thread.
475  reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
476}
477
478void ThreadData::OnThreadTerminationCleanup() {
479  // We must NOT do any allocations during this callback. There is a chance that
480  // the allocator is no longer active on this thread.
481
482  // The list_lock_ was created when we registered the callback, so it won't be
483  // allocated here despite the lazy reference.
484  base::AutoLock lock(*list_lock_.Pointer());
485  if (incarnation_counter_ != incarnation_count_for_pool_)
486    return;  // ThreadData was constructed in an earlier unit test.
487  ++cleanup_count_;
488
489  // Add this ThreadData to a retired list so that it can be reused by a thread
490  // with the same name sanitized name in the future.
491  // |next_retired_thread_data_| is expected to be nullptr for a ThreadData
492  // associated with an active thread.
493  DCHECK(!next_retired_thread_data_);
494  next_retired_thread_data_ = first_retired_thread_data_;
495  first_retired_thread_data_ = this;
496}
497
498// static
499void ThreadData::Snapshot(int current_profiling_phase,
500                          ProcessDataSnapshot* process_data_snapshot) {
501  // Get an unchanging copy of a ThreadData list.
502  ThreadData* my_list = ThreadData::first();
503
504  // Gather data serially.
505  // This hackish approach *can* get some slightly corrupt tallies, as we are
506  // grabbing values without the protection of a lock, but it has the advantage
507  // of working even with threads that don't have message loops.  If a user
508  // sees any strangeness, they can always just run their stats gathering a
509  // second time.
510  BirthCountMap birth_counts;
511  for (ThreadData* thread_data = my_list; thread_data;
512       thread_data = thread_data->next()) {
513    thread_data->SnapshotExecutedTasks(current_profiling_phase,
514                                       &process_data_snapshot->phased_snapshots,
515                                       &birth_counts);
516  }
517
518  // Add births that are still active -- i.e. objects that have tallied a birth,
519  // but have not yet tallied a matching death, and hence must be either
520  // running, queued up, or being held in limbo for future posting.
521  auto* current_phase_tasks =
522      &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
523  for (const auto& birth_count : birth_counts) {
524    if (birth_count.second > 0) {
525      current_phase_tasks->push_back(
526          TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
527                       DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0,
528                                         0, 0, 0, 0, 0, 0),
529                       "Still_Alive"));
530    }
531  }
532}
533
534// static
535void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
536  // Get an unchanging copy of a ThreadData list.
537  ThreadData* my_list = ThreadData::first();
538
539  // Add snapshots for all instances of death data in all threads serially.
540  // This hackish approach *can* get some slightly corrupt tallies, as we are
541  // grabbing values without the protection of a lock, but it has the advantage
542  // of working even with threads that don't have message loops.  Any corruption
543  // shouldn't cause "cascading damage" to anything else (in later phases).
544  for (ThreadData* thread_data = my_list; thread_data;
545       thread_data = thread_data->next()) {
546    thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
547  }
548}
549
550Births* ThreadData::TallyABirth(const Location& location) {
551  BirthMap::iterator it = birth_map_.find(location);
552  Births* child;
553  if (it != birth_map_.end()) {
554    child =  it->second;
555    child->RecordBirth();
556  } else {
557    child = new Births(location, *this);  // Leak this.
558    // Lock since the map may get relocated now, and other threads sometimes
559    // snapshot it (but they lock before copying it).
560    base::AutoLock lock(map_lock_);
561    birth_map_[location] = child;
562  }
563
564  return child;
565}
566
567void ThreadData::TallyADeath(const Births& births,
568                             int32_t queue_duration,
569                             const TaskStopwatch& stopwatch) {
570  int32_t run_duration = stopwatch.RunDurationMs();
571
572  // Stir in some randomness, plus add constant in case durations are zero.
573  const uint32_t kSomePrimeNumber = 2147483647;
574  random_number_ += queue_duration + run_duration + kSomePrimeNumber;
575  // An address is going to have some randomness to it as well ;-).
576  random_number_ ^=
577      static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0));
578
579  DeathMap::iterator it = death_map_.find(&births);
580  DeathData* death_data;
581  if (it != death_map_.end()) {
582    death_data = &it->second;
583  } else {
584    base::AutoLock lock(map_lock_);  // Lock as the map may get relocated now.
585    death_data = &death_map_[&births];
586  }  // Release lock ASAP.
587  death_data->RecordDurations(queue_duration, run_duration, random_number_);
588
589#if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
590  if (stopwatch.heap_tracking_enabled()) {
591    base::debug::ThreadHeapUsage heap_usage = stopwatch.heap_usage().usage();
592    // Saturate the 64 bit counts on conversion to 32 bit storage.
593    death_data->RecordAllocations(
594        base::saturated_cast<int32_t>(heap_usage.alloc_ops),
595        base::saturated_cast<int32_t>(heap_usage.free_ops),
596        base::saturated_cast<int32_t>(heap_usage.alloc_bytes),
597        base::saturated_cast<int32_t>(heap_usage.free_bytes),
598        base::saturated_cast<int32_t>(heap_usage.alloc_overhead_bytes),
599        base::saturated_cast<int32_t>(heap_usage.max_allocated_bytes));
600  }
601#endif
602}
603
604// static
605Births* ThreadData::TallyABirthIfActive(const Location& location) {
606  if (!TrackingStatus())
607    return NULL;
608  ThreadData* current_thread_data = Get();
609  if (!current_thread_data)
610    return NULL;
611  return current_thread_data->TallyABirth(location);
612}
613
614// static
615void ThreadData::TallyRunOnNamedThreadIfTracking(
616    const base::TrackingInfo& completed_task,
617    const TaskStopwatch& stopwatch) {
618  // Even if we have been DEACTIVATED, we will process any pending births so
619  // that our data structures (which counted the outstanding births) remain
620  // consistent.
621  const Births* births = completed_task.birth_tally;
622  if (!births)
623    return;
624  ThreadData* current_thread_data = stopwatch.GetThreadData();
625  if (!current_thread_data)
626    return;
627
628  // Watch out for a race where status_ is changing, and hence one or both
629  // of start_of_run or end_of_run is zero.  In that case, we didn't bother to
630  // get a time value since we "weren't tracking" and we were trying to be
631  // efficient by not calling for a genuine time value.  For simplicity, we'll
632  // use a default zero duration when we can't calculate a true value.
633  TrackedTime start_of_run = stopwatch.StartTime();
634  int32_t queue_duration = 0;
635  if (!start_of_run.is_null()) {
636    queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
637        .InMilliseconds();
638  }
639  current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
640}
641
642// static
643void ThreadData::TallyRunOnWorkerThreadIfTracking(
644    const Births* births,
645    const TrackedTime& time_posted,
646    const TaskStopwatch& stopwatch) {
647  // Even if we have been DEACTIVATED, we will process any pending births so
648  // that our data structures (which counted the outstanding births) remain
649  // consistent.
650  if (!births)
651    return;
652
653  // TODO(jar): Support the option to coalesce all worker-thread activity under
654  // one ThreadData instance that uses locks to protect *all* access.  This will
655  // reduce memory (making it provably bounded), but run incrementally slower
656  // (since we'll use locks on TallyABirth and TallyADeath).  The good news is
657  // that the locks on TallyADeath will be *after* the worker thread has run,
658  // and hence nothing will be waiting for the completion (...  besides some
659  // other thread that might like to run).  Also, the worker threads tasks are
660  // generally longer, and hence the cost of the lock may perchance be amortized
661  // over the long task's lifetime.
662  ThreadData* current_thread_data = stopwatch.GetThreadData();
663  if (!current_thread_data)
664    return;
665
666  TrackedTime start_of_run = stopwatch.StartTime();
667  int32_t queue_duration = 0;
668  if (!start_of_run.is_null()) {
669    queue_duration = (start_of_run - time_posted).InMilliseconds();
670  }
671  current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
672}
673
674// static
675void ThreadData::TallyRunInAScopedRegionIfTracking(
676    const Births* births,
677    const TaskStopwatch& stopwatch) {
678  // Even if we have been DEACTIVATED, we will process any pending births so
679  // that our data structures (which counted the outstanding births) remain
680  // consistent.
681  if (!births)
682    return;
683
684  ThreadData* current_thread_data = stopwatch.GetThreadData();
685  if (!current_thread_data)
686    return;
687
688  int32_t queue_duration = 0;
689  current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
690}
691
692void ThreadData::SnapshotExecutedTasks(
693    int current_profiling_phase,
694    PhasedProcessDataSnapshotMap* phased_snapshots,
695    BirthCountMap* birth_counts) {
696  // Get copy of data, so that the data will not change during the iterations
697  // and processing.
698  BirthMap birth_map;
699  DeathsSnapshot deaths;
700  SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
701
702  for (const auto& birth : birth_map) {
703    (*birth_counts)[birth.second] += birth.second->birth_count();
704  }
705
706  for (const auto& death : deaths) {
707    (*birth_counts)[death.first] -= death.first->birth_count();
708
709    // For the current death data, walk through all its snapshots, starting from
710    // the current one, then from the previous profiling phase etc., and for
711    // each snapshot calculate the delta between the snapshot and the previous
712    // phase, if any.  Store the deltas in the result.
713    for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
714         phase = phase->prev) {
715      const DeathDataSnapshot& death_data =
716          phase->prev ? phase->death_data.Delta(phase->prev->death_data)
717                      : phase->death_data;
718
719      if (death_data.count > 0) {
720        (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
721            TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
722                         sanitized_thread_name()));
723      }
724    }
725  }
726}
727
728// This may be called from another thread.
729void ThreadData::SnapshotMaps(int profiling_phase,
730                              BirthMap* birth_map,
731                              DeathsSnapshot* deaths) {
732  base::AutoLock lock(map_lock_);
733
734  for (const auto& birth : birth_map_)
735    (*birth_map)[birth.first] = birth.second;
736
737  for (const auto& death : death_map_) {
738    deaths->push_back(std::make_pair(
739        death.first,
740        DeathDataPhaseSnapshot(profiling_phase, death.second,
741                               death.second.last_phase_snapshot())));
742  }
743}
744
745void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
746  base::AutoLock lock(map_lock_);
747
748  for (auto& death : death_map_) {
749    death.second.OnProfilingPhaseCompleted(profiling_phase);
750  }
751}
752
753void ThreadData::EnsureTlsInitialization() {
754  if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
755    return;  // Someone else did the initialization.
756  // Due to racy lazy initialization in tests, we'll need to recheck status_
757  // after we acquire the lock.
758
759  // Ensure that we don't double initialize tls.  We are called when single
760  // threaded in the product, but some tests may be racy and lazy about our
761  // initialization.
762  base::AutoLock lock(*list_lock_.Pointer());
763  if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
764    return;  // Someone raced in here and beat us.
765
766  // Perform the "real" TLS initialization now, and leave it intact through
767  // process termination.
768  if (!tls_index_.initialized()) {  // Testing may have initialized this.
769    DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED);
770    tls_index_.Initialize(&ThreadData::OnThreadTermination);
771    DCHECK(tls_index_.initialized());
772  } else {
773    // TLS was initialzed for us earlier.
774    DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS);
775  }
776
777  // Incarnation counter is only significant to testing, as it otherwise will
778  // never again change in this process.
779  ++incarnation_counter_;
780
781  // The lock is not critical for setting status_, but it doesn't hurt.  It also
782  // ensures that if we have a racy initialization, that we'll bail as soon as
783  // we get the lock earlier in this method.
784  base::subtle::Release_Store(&status_, kInitialStartupState);
785  DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED);
786}
787
788// static
789void ThreadData::InitializeAndSetTrackingStatus(Status status) {
790  DCHECK_GE(status, DEACTIVATED);
791  DCHECK_LE(status, PROFILING_ACTIVE);
792
793  EnsureTlsInitialization();  // No-op if already initialized.
794
795  if (status > DEACTIVATED)
796    status = PROFILING_ACTIVE;
797
798  base::subtle::Release_Store(&status_, status);
799}
800
801// static
802ThreadData::Status ThreadData::status() {
803  return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_));
804}
805
806// static
807bool ThreadData::TrackingStatus() {
808  return base::subtle::Acquire_Load(&status_) > DEACTIVATED;
809}
810
811// static
812void ThreadData::EnableProfilerTiming() {
813  base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
814}
815
816// static
817TrackedTime ThreadData::Now() {
818  if (now_function_for_testing_)
819    return TrackedTime::FromMilliseconds((*now_function_for_testing_)());
820  if (IsProfilerTimingEnabled() && TrackingStatus())
821    return TrackedTime::Now();
822  return TrackedTime();  // Super fast when disabled, or not compiled.
823}
824
825// static
826void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count) {
827  base::AutoLock lock(*list_lock_.Pointer());
828
829  // TODO(jar): until this is working on XP, don't run the real test.
830#if 0
831  // Verify that we've at least shutdown/cleanup the major namesd threads.  The
832  // caller should tell us how many thread shutdowns should have taken place by
833  // now.
834  CHECK_GT(cleanup_count_, major_threads_shutdown_count);
835#endif
836}
837
838// static
839void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
840  // This is only called from test code, where we need to cleanup so that
841  // additional tests can be run.
842  // We must be single threaded... but be careful anyway.
843  InitializeAndSetTrackingStatus(DEACTIVATED);
844
845  ThreadData* thread_data_list;
846  {
847    base::AutoLock lock(*list_lock_.Pointer());
848    thread_data_list = all_thread_data_list_head_;
849    all_thread_data_list_head_ = NULL;
850    ++incarnation_counter_;
851    // To be clean, break apart the retired worker list (though we leak them).
852    while (first_retired_thread_data_) {
853      ThreadData* thread_data = first_retired_thread_data_;
854      first_retired_thread_data_ = thread_data->next_retired_thread_data_;
855      thread_data->next_retired_thread_data_ = nullptr;
856    }
857  }
858
859  // Put most global static back in pristine shape.
860  cleanup_count_ = 0;
861  tls_index_.Set(NULL);
862  // Almost UNINITIALIZED.
863  base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS);
864
865  // To avoid any chance of racing in unit tests, which is the only place we
866  // call this function, we may sometimes leak all the data structures we
867  // recovered, as they may still be in use on threads from prior tests!
868  if (leak) {
869    ThreadData* thread_data = thread_data_list;
870    while (thread_data) {
871      ANNOTATE_LEAKING_OBJECT_PTR(thread_data);
872      thread_data = thread_data->next();
873    }
874    return;
875  }
876
877  // When we want to cleanup (on a single thread), here is what we do.
878
879  // Do actual recursive delete in all ThreadData instances.
880  while (thread_data_list) {
881    ThreadData* next_thread_data = thread_data_list;
882    thread_data_list = thread_data_list->next();
883
884    for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
885         next_thread_data->birth_map_.end() != it; ++it)
886      delete it->second;  // Delete the Birth Records.
887    delete next_thread_data;  // Includes all Death Records.
888  }
889}
890
891// static
892ThreadData* ThreadData::GetRetiredOrCreateThreadData(
893    const std::string& sanitized_thread_name) {
894  SCOPED_UMA_HISTOGRAM_TIMER("TrackedObjects.GetRetiredOrCreateThreadData");
895
896  {
897    base::AutoLock lock(*list_lock_.Pointer());
898    ThreadData** pcursor = &first_retired_thread_data_;
899    ThreadData* cursor = first_retired_thread_data_;
900
901    // Assuming that there aren't more than a few tens of retired ThreadData
902    // instances, this lookup should be quick compared to the thread creation
903    // time. Retired ThreadData instances cannot be stored in a map because
904    // insertions are done from OnThreadTerminationCleanup() where allocations
905    // are not allowed.
906    //
907    // Note: Test processes may have more than a few tens of retired ThreadData
908    // instances.
909    while (cursor) {
910      if (cursor->sanitized_thread_name() == sanitized_thread_name) {
911        DCHECK_EQ(*pcursor, cursor);
912        *pcursor = cursor->next_retired_thread_data_;
913        cursor->next_retired_thread_data_ = nullptr;
914        return cursor;
915      }
916      pcursor = &cursor->next_retired_thread_data_;
917      cursor = cursor->next_retired_thread_data_;
918    }
919  }
920
921  return new ThreadData(sanitized_thread_name);
922}
923
924//------------------------------------------------------------------------------
925TaskStopwatch::TaskStopwatch()
926    : wallclock_duration_ms_(0),
927      current_thread_data_(NULL),
928      excluded_duration_ms_(0),
929      parent_(NULL) {
930#if DCHECK_IS_ON()
931  state_ = CREATED;
932  child_ = NULL;
933#endif
934#if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
935  heap_tracking_enabled_ =
936      base::debug::ThreadHeapUsageTracker::IsHeapTrackingEnabled();
937#endif
938}
939
940TaskStopwatch::~TaskStopwatch() {
941#if DCHECK_IS_ON()
942  DCHECK(state_ != RUNNING);
943  DCHECK(child_ == NULL);
944#endif
945}
946
947void TaskStopwatch::Start() {
948#if DCHECK_IS_ON()
949  DCHECK(state_ == CREATED);
950  state_ = RUNNING;
951#endif
952
953  start_time_ = ThreadData::Now();
954#if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
955  if (heap_tracking_enabled_)
956    heap_usage_.Start();
957#endif
958
959  current_thread_data_ = ThreadData::Get();
960  if (!current_thread_data_)
961    return;
962
963  parent_ = current_thread_data_->current_stopwatch_;
964#if DCHECK_IS_ON()
965  if (parent_) {
966    DCHECK(parent_->state_ == RUNNING);
967    DCHECK(parent_->child_ == NULL);
968    parent_->child_ = this;
969  }
970#endif
971  current_thread_data_->current_stopwatch_ = this;
972}
973
974void TaskStopwatch::Stop() {
975  const TrackedTime end_time = ThreadData::Now();
976#if DCHECK_IS_ON()
977  DCHECK(state_ == RUNNING);
978  state_ = STOPPED;
979  DCHECK(child_ == NULL);
980#endif
981#if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
982  if (heap_tracking_enabled_)
983    heap_usage_.Stop(true);
984#endif
985
986  if (!start_time_.is_null() && !end_time.is_null()) {
987    wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
988  }
989
990  if (!current_thread_data_)
991    return;
992
993  DCHECK(current_thread_data_->current_stopwatch_ == this);
994  current_thread_data_->current_stopwatch_ = parent_;
995  if (!parent_)
996    return;
997
998#if DCHECK_IS_ON()
999  DCHECK(parent_->state_ == RUNNING);
1000  DCHECK(parent_->child_ == this);
1001  parent_->child_ = NULL;
1002#endif
1003  parent_->excluded_duration_ms_ += wallclock_duration_ms_;
1004  parent_ = NULL;
1005}
1006
1007TrackedTime TaskStopwatch::StartTime() const {
1008#if DCHECK_IS_ON()
1009  DCHECK(state_ != CREATED);
1010#endif
1011
1012  return start_time_;
1013}
1014
1015int32_t TaskStopwatch::RunDurationMs() const {
1016#if DCHECK_IS_ON()
1017  DCHECK(state_ == STOPPED);
1018#endif
1019
1020  return wallclock_duration_ms_ - excluded_duration_ms_;
1021}
1022
1023ThreadData* TaskStopwatch::GetThreadData() const {
1024#if DCHECK_IS_ON()
1025  DCHECK(state_ != CREATED);
1026#endif
1027
1028  return current_thread_data_;
1029}
1030
1031//------------------------------------------------------------------------------
1032// DeathDataPhaseSnapshot
1033
1034DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
1035    int profiling_phase,
1036    const DeathData& death,
1037    const DeathDataPhaseSnapshot* prev)
1038    : profiling_phase(profiling_phase), death_data(death), prev(prev) {}
1039
1040//------------------------------------------------------------------------------
1041// TaskSnapshot
1042
1043TaskSnapshot::TaskSnapshot() {
1044}
1045
1046TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
1047                           const DeathDataSnapshot& death_data,
1048                           const std::string& death_sanitized_thread_name)
1049    : birth(birth),
1050      death_data(death_data),
1051      death_sanitized_thread_name(death_sanitized_thread_name) {}
1052
1053TaskSnapshot::~TaskSnapshot() {
1054}
1055
1056//------------------------------------------------------------------------------
1057// ProcessDataPhaseSnapshot
1058
1059ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
1060}
1061
1062ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot(
1063    const ProcessDataPhaseSnapshot& other) = default;
1064
1065ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
1066}
1067
1068//------------------------------------------------------------------------------
1069// ProcessDataPhaseSnapshot
1070
1071ProcessDataSnapshot::ProcessDataSnapshot()
1072#if !defined(OS_NACL)
1073    : process_id(base::GetCurrentProcId()) {
1074#else
1075    : process_id(base::kNullProcessId) {
1076#endif
1077}
1078
1079ProcessDataSnapshot::ProcessDataSnapshot(const ProcessDataSnapshot& other) =
1080    default;
1081
1082ProcessDataSnapshot::~ProcessDataSnapshot() {
1083}
1084
1085}  // namespace tracked_objects
1086