1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/tracked_objects.h"
6
7#include <limits.h>
8#include <stdlib.h>
9
10#include "base/atomicops.h"
11#include "base/base_switches.h"
12#include "base/command_line.h"
13#include "base/compiler_specific.h"
14#include "base/debug/leak_annotations.h"
15#include "base/logging.h"
16#include "base/process/process_handle.h"
17#include "base/strings/stringprintf.h"
18#include "base/third_party/valgrind/memcheck.h"
19#include "base/threading/worker_pool.h"
20#include "base/tracking_info.h"
21#include "build/build_config.h"
22
23using base::TimeDelta;
24
25namespace base {
26class TimeDelta;
27}
28
29namespace tracked_objects {
30
31namespace {
32// When ThreadData is first initialized, should we start in an ACTIVE state to
33// record all of the startup-time tasks, or should we start up DEACTIVATED, so
34// that we only record after parsing the command line flag --enable-tracking.
35// Note that the flag may force either state, so this really controls only the
36// period of time up until that flag is parsed.  If there is no flag seen, then
37// this state may prevail for much or all of the process lifetime.
38const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
39
40// Possible states of the profiler timing enabledness.
41enum {
42  UNDEFINED_TIMING,
43  ENABLED_TIMING,
44  DISABLED_TIMING,
45};
46
47// State of the profiler timing enabledness.
48base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
49
50// Returns whether profiler timing is enabled.  The default is true, but this
51// may be overridden by a command-line flag.  Some platforms may
52// programmatically set this command-line flag to the "off" value if it's not
53// specified.
54// This in turn can be overridden by explicitly calling
55// ThreadData::EnableProfilerTiming, say, based on a field trial.
56inline bool IsProfilerTimingEnabled() {
57  // Reading |g_profiler_timing_enabled| is done without barrier because
58  // multiple initialization is not an issue while the barrier can be relatively
59  // costly given that this method is sometimes called in a tight loop.
60  base::subtle::Atomic32 current_timing_enabled =
61      base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
62  if (current_timing_enabled == UNDEFINED_TIMING) {
63    if (!base::CommandLine::InitializedForCurrentProcess())
64      return true;
65    current_timing_enabled =
66        (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
67             switches::kProfilerTiming) ==
68         switches::kProfilerTimingDisabledValue)
69            ? DISABLED_TIMING
70            : ENABLED_TIMING;
71    base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
72                                  current_timing_enabled);
73  }
74  return current_timing_enabled == ENABLED_TIMING;
75}
76
77}  // namespace
78
79//------------------------------------------------------------------------------
80// DeathData tallies durations when a death takes place.
81
82DeathData::DeathData()
83    : count_(0),
84      sample_probability_count_(0),
85      run_duration_sum_(0),
86      queue_duration_sum_(0),
87      run_duration_max_(0),
88      queue_duration_max_(0),
89      run_duration_sample_(0),
90      queue_duration_sample_(0),
91      last_phase_snapshot_(nullptr) {
92}
93
94DeathData::DeathData(const DeathData& other)
95    : count_(other.count_),
96      sample_probability_count_(other.sample_probability_count_),
97      run_duration_sum_(other.run_duration_sum_),
98      queue_duration_sum_(other.queue_duration_sum_),
99      run_duration_max_(other.run_duration_max_),
100      queue_duration_max_(other.queue_duration_max_),
101      run_duration_sample_(other.run_duration_sample_),
102      queue_duration_sample_(other.queue_duration_sample_),
103      last_phase_snapshot_(nullptr) {
104  // This constructor will be used by std::map when adding new DeathData values
105  // to the map.  At that point, last_phase_snapshot_ is still NULL, so we don't
106  // need to worry about ownership transfer.
107  DCHECK(other.last_phase_snapshot_ == nullptr);
108}
109
110DeathData::~DeathData() {
111  while (last_phase_snapshot_) {
112    const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
113    last_phase_snapshot_ = snapshot->prev;
114    delete snapshot;
115  }
116}
117
118// TODO(jar): I need to see if this macro to optimize branching is worth using.
119//
120// This macro has no branching, so it is surely fast, and is equivalent to:
121//             if (assign_it)
122//               target = source;
123// We use a macro rather than a template to force this to inline.
124// Related code for calculating max is discussed on the web.
125#define CONDITIONAL_ASSIGN(assign_it, target, source) \
126  ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it))
127
128void DeathData::RecordDeath(const int32_t queue_duration,
129                            const int32_t run_duration,
130                            const uint32_t random_number) {
131  // We'll just clamp at INT_MAX, but we should note this in the UI as such.
132  if (count_ < INT_MAX)
133    base::subtle::NoBarrier_Store(&count_, count_ + 1);
134
135  int sample_probability_count =
136      base::subtle::NoBarrier_Load(&sample_probability_count_);
137  if (sample_probability_count < INT_MAX)
138    ++sample_probability_count;
139  base::subtle::NoBarrier_Store(&sample_probability_count_,
140                                sample_probability_count);
141
142  base::subtle::NoBarrier_Store(&queue_duration_sum_,
143                                queue_duration_sum_ + queue_duration);
144  base::subtle::NoBarrier_Store(&run_duration_sum_,
145                                run_duration_sum_ + run_duration);
146
147  if (queue_duration_max() < queue_duration)
148    base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration);
149  if (run_duration_max() < run_duration)
150    base::subtle::NoBarrier_Store(&run_duration_max_, run_duration);
151
152  // Take a uniformly distributed sample over all durations ever supplied during
153  // the current profiling phase.
154  // The probability that we (instead) use this new sample is
155  // 1/sample_probability_count_. This results in a completely uniform selection
156  // of the sample (at least when we don't clamp sample_probability_count_...
157  // but that should be inconsequentially likely).  We ignore the fact that we
158  // correlated our selection of a sample to the run and queue times (i.e., we
159  // used them to generate random_number).
160  CHECK_GT(sample_probability_count, 0);
161  if (0 == (random_number % sample_probability_count)) {
162    base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration);
163    base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration);
164  }
165}
166
167void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
168  // Snapshotting and storing current state.
169  last_phase_snapshot_ = new DeathDataPhaseSnapshot(
170      profiling_phase, count(), run_duration_sum(), run_duration_max(),
171      run_duration_sample(), queue_duration_sum(), queue_duration_max(),
172      queue_duration_sample(), last_phase_snapshot_);
173
174  // Not touching fields for which a delta can be computed by comparing with a
175  // snapshot from the previous phase. Resetting other fields.  Sample values
176  // will be reset upon next death recording because sample_probability_count_
177  // is set to 0.
178  // We avoid resetting to 0 in favor of deltas whenever possible.  The reason
179  // is that for incrementable fields, resetting to 0 from the snapshot thread
180  // potentially in parallel with incrementing in the death thread may result in
181  // significant data corruption that has a potential to grow with time.  Not
182  // resetting incrementable fields and using deltas will cause any
183  // off-by-little corruptions to be likely fixed at the next snapshot.
184  // The max values are not incrementable, and cannot be deduced using deltas
185  // for a given phase. Hence, we have to reset them to 0.  But the potential
186  // damage is limited to getting the previous phase's max to apply for the next
187  // phase, and the error doesn't have a potential to keep growing with new
188  // resets.
189  // sample_probability_count_ is incrementable, but must be reset to 0 at the
190  // phase end, so that we start a new uniformly randomized sample selection
191  // after the reset. These fields are updated using atomics. However, race
192  // conditions are possible since these are updated individually and not
193  // together atomically, resulting in the values being mutually inconsistent.
194  // The damage is limited to selecting a wrong sample, which is not something
195  // that can cause accumulating or cascading effects.
196  // If there were no inconsistencies caused by race conditions, we never send a
197  // sample for the previous phase in the next phase's snapshot because
198  // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
199  base::subtle::NoBarrier_Store(&sample_probability_count_, 0);
200  base::subtle::NoBarrier_Store(&run_duration_max_, 0);
201  base::subtle::NoBarrier_Store(&queue_duration_max_, 0);
202}
203
204//------------------------------------------------------------------------------
205DeathDataSnapshot::DeathDataSnapshot()
206    : count(-1),
207      run_duration_sum(-1),
208      run_duration_max(-1),
209      run_duration_sample(-1),
210      queue_duration_sum(-1),
211      queue_duration_max(-1),
212      queue_duration_sample(-1) {
213}
214
215DeathDataSnapshot::DeathDataSnapshot(int count,
216                                     int32_t run_duration_sum,
217                                     int32_t run_duration_max,
218                                     int32_t run_duration_sample,
219                                     int32_t queue_duration_sum,
220                                     int32_t queue_duration_max,
221                                     int32_t queue_duration_sample)
222    : count(count),
223      run_duration_sum(run_duration_sum),
224      run_duration_max(run_duration_max),
225      run_duration_sample(run_duration_sample),
226      queue_duration_sum(queue_duration_sum),
227      queue_duration_max(queue_duration_max),
228      queue_duration_sample(queue_duration_sample) {}
229
230DeathDataSnapshot::~DeathDataSnapshot() {
231}
232
233DeathDataSnapshot DeathDataSnapshot::Delta(
234    const DeathDataSnapshot& older) const {
235  return DeathDataSnapshot(count - older.count,
236                           run_duration_sum - older.run_duration_sum,
237                           run_duration_max, run_duration_sample,
238                           queue_duration_sum - older.queue_duration_sum,
239                           queue_duration_max, queue_duration_sample);
240}
241
242//------------------------------------------------------------------------------
243BirthOnThread::BirthOnThread(const Location& location,
244                             const ThreadData& current)
245    : location_(location),
246      birth_thread_(&current) {
247}
248
249//------------------------------------------------------------------------------
250BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
251}
252
253BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
254    : location(birth.location()),
255      thread_name(birth.birth_thread()->thread_name()) {
256}
257
258BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
259}
260
261//------------------------------------------------------------------------------
262Births::Births(const Location& location, const ThreadData& current)
263    : BirthOnThread(location, current),
264      birth_count_(1) { }
265
266int Births::birth_count() const { return birth_count_; }
267
268void Births::RecordBirth() { ++birth_count_; }
269
270//------------------------------------------------------------------------------
271// ThreadData maintains the central data for all births and deaths on a single
272// thread.
273
274// TODO(jar): We should pull all these static vars together, into a struct, and
275// optimize layout so that we benefit from locality of reference during accesses
276// to them.
277
278// static
279ThreadData::NowFunction* ThreadData::now_function_for_testing_ = NULL;
280
281// A TLS slot which points to the ThreadData instance for the current thread.
282// We do a fake initialization here (zeroing out data), and then the real
283// in-place construction happens when we call tls_index_.Initialize().
284// static
285base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
286
287// static
288int ThreadData::worker_thread_data_creation_count_ = 0;
289
290// static
291int ThreadData::cleanup_count_ = 0;
292
293// static
294int ThreadData::incarnation_counter_ = 0;
295
296// static
297ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
298
299// static
300ThreadData* ThreadData::first_retired_worker_ = NULL;
301
302// static
303base::LazyInstance<base::Lock>::Leaky
304    ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
305
306// static
307base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED;
308
309ThreadData::ThreadData(const std::string& suggested_name)
310    : next_(NULL),
311      next_retired_worker_(NULL),
312      worker_thread_number_(0),
313      incarnation_count_for_pool_(-1),
314      current_stopwatch_(NULL) {
315  DCHECK_GE(suggested_name.size(), 0u);
316  thread_name_ = suggested_name;
317  PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
318}
319
320ThreadData::ThreadData(int thread_number)
321    : next_(NULL),
322      next_retired_worker_(NULL),
323      worker_thread_number_(thread_number),
324      incarnation_count_for_pool_(-1),
325      current_stopwatch_(NULL) {
326  CHECK_GT(thread_number, 0);
327  base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
328  PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
329}
330
331ThreadData::~ThreadData() {
332}
333
334void ThreadData::PushToHeadOfList() {
335  // Toss in a hint of randomness (atop the uniniitalized value).
336  (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
337                                                 sizeof(random_number_));
338  MSAN_UNPOISON(&random_number_, sizeof(random_number_));
339  random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0));
340  random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
341
342  DCHECK(!next_);
343  base::AutoLock lock(*list_lock_.Pointer());
344  incarnation_count_for_pool_ = incarnation_counter_;
345  next_ = all_thread_data_list_head_;
346  all_thread_data_list_head_ = this;
347}
348
349// static
350ThreadData* ThreadData::first() {
351  base::AutoLock lock(*list_lock_.Pointer());
352  return all_thread_data_list_head_;
353}
354
355ThreadData* ThreadData::next() const { return next_; }
356
357// static
358void ThreadData::InitializeThreadContext(const std::string& suggested_name) {
359  if (base::WorkerPool::RunsTasksOnCurrentThread())
360    return;
361  EnsureTlsInitialization();
362  ThreadData* current_thread_data =
363      reinterpret_cast<ThreadData*>(tls_index_.Get());
364  if (current_thread_data)
365    return;  // Browser tests instigate this.
366  current_thread_data = new ThreadData(suggested_name);
367  tls_index_.Set(current_thread_data);
368}
369
370// static
371ThreadData* ThreadData::Get() {
372  if (!tls_index_.initialized())
373    return NULL;  // For unittests only.
374  ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
375  if (registered)
376    return registered;
377
378  // We must be a worker thread, since we didn't pre-register.
379  ThreadData* worker_thread_data = NULL;
380  int worker_thread_number = 0;
381  {
382    base::AutoLock lock(*list_lock_.Pointer());
383    if (first_retired_worker_) {
384      worker_thread_data = first_retired_worker_;
385      first_retired_worker_ = first_retired_worker_->next_retired_worker_;
386      worker_thread_data->next_retired_worker_ = NULL;
387    } else {
388      worker_thread_number = ++worker_thread_data_creation_count_;
389    }
390  }
391
392  // If we can't find a previously used instance, then we have to create one.
393  if (!worker_thread_data) {
394    DCHECK_GT(worker_thread_number, 0);
395    worker_thread_data = new ThreadData(worker_thread_number);
396  }
397  DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
398
399  tls_index_.Set(worker_thread_data);
400  return worker_thread_data;
401}
402
403// static
404void ThreadData::OnThreadTermination(void* thread_data) {
405  DCHECK(thread_data);  // TLS should *never* call us with a NULL.
406  // We must NOT do any allocations during this callback.  There is a chance
407  // that the allocator is no longer active on this thread.
408  reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
409}
410
411void ThreadData::OnThreadTerminationCleanup() {
412  // The list_lock_ was created when we registered the callback, so it won't be
413  // allocated here despite the lazy reference.
414  base::AutoLock lock(*list_lock_.Pointer());
415  if (incarnation_counter_ != incarnation_count_for_pool_)
416    return;  // ThreadData was constructed in an earlier unit test.
417  ++cleanup_count_;
418  // Only worker threads need to be retired and reused.
419  if (!worker_thread_number_) {
420    return;
421  }
422  // We must NOT do any allocations during this callback.
423  // Using the simple linked lists avoids all allocations.
424  DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
425  this->next_retired_worker_ = first_retired_worker_;
426  first_retired_worker_ = this;
427}
428
429// static
430void ThreadData::Snapshot(int current_profiling_phase,
431                          ProcessDataSnapshot* process_data_snapshot) {
432  // Get an unchanging copy of a ThreadData list.
433  ThreadData* my_list = ThreadData::first();
434
435  // Gather data serially.
436  // This hackish approach *can* get some slightly corrupt tallies, as we are
437  // grabbing values without the protection of a lock, but it has the advantage
438  // of working even with threads that don't have message loops.  If a user
439  // sees any strangeness, they can always just run their stats gathering a
440  // second time.
441  BirthCountMap birth_counts;
442  for (ThreadData* thread_data = my_list; thread_data;
443       thread_data = thread_data->next()) {
444    thread_data->SnapshotExecutedTasks(current_profiling_phase,
445                                       &process_data_snapshot->phased_snapshots,
446                                       &birth_counts);
447  }
448
449  // Add births that are still active -- i.e. objects that have tallied a birth,
450  // but have not yet tallied a matching death, and hence must be either
451  // running, queued up, or being held in limbo for future posting.
452  auto* current_phase_tasks =
453      &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
454  for (const auto& birth_count : birth_counts) {
455    if (birth_count.second > 0) {
456      current_phase_tasks->push_back(
457          TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
458                       DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0),
459                       "Still_Alive"));
460    }
461  }
462}
463
464// static
465void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
466  // Get an unchanging copy of a ThreadData list.
467  ThreadData* my_list = ThreadData::first();
468
469  // Add snapshots for all instances of death data in all threads serially.
470  // This hackish approach *can* get some slightly corrupt tallies, as we are
471  // grabbing values without the protection of a lock, but it has the advantage
472  // of working even with threads that don't have message loops.  Any corruption
473  // shouldn't cause "cascading damage" to anything else (in later phases).
474  for (ThreadData* thread_data = my_list; thread_data;
475       thread_data = thread_data->next()) {
476    thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
477  }
478}
479
480Births* ThreadData::TallyABirth(const Location& location) {
481  BirthMap::iterator it = birth_map_.find(location);
482  Births* child;
483  if (it != birth_map_.end()) {
484    child =  it->second;
485    child->RecordBirth();
486  } else {
487    child = new Births(location, *this);  // Leak this.
488    // Lock since the map may get relocated now, and other threads sometimes
489    // snapshot it (but they lock before copying it).
490    base::AutoLock lock(map_lock_);
491    birth_map_[location] = child;
492  }
493
494  return child;
495}
496
497void ThreadData::TallyADeath(const Births& births,
498                             int32_t queue_duration,
499                             const TaskStopwatch& stopwatch) {
500  int32_t run_duration = stopwatch.RunDurationMs();
501
502  // Stir in some randomness, plus add constant in case durations are zero.
503  const uint32_t kSomePrimeNumber = 2147483647;
504  random_number_ += queue_duration + run_duration + kSomePrimeNumber;
505  // An address is going to have some randomness to it as well ;-).
506  random_number_ ^=
507      static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0));
508
509  DeathMap::iterator it = death_map_.find(&births);
510  DeathData* death_data;
511  if (it != death_map_.end()) {
512    death_data = &it->second;
513  } else {
514    base::AutoLock lock(map_lock_);  // Lock as the map may get relocated now.
515    death_data = &death_map_[&births];
516  }  // Release lock ASAP.
517  death_data->RecordDeath(queue_duration, run_duration, random_number_);
518}
519
520// static
521Births* ThreadData::TallyABirthIfActive(const Location& location) {
522  if (!TrackingStatus())
523    return NULL;
524  ThreadData* current_thread_data = Get();
525  if (!current_thread_data)
526    return NULL;
527  return current_thread_data->TallyABirth(location);
528}
529
530// static
531void ThreadData::TallyRunOnNamedThreadIfTracking(
532    const base::TrackingInfo& completed_task,
533    const TaskStopwatch& stopwatch) {
534  // Even if we have been DEACTIVATED, we will process any pending births so
535  // that our data structures (which counted the outstanding births) remain
536  // consistent.
537  const Births* births = completed_task.birth_tally;
538  if (!births)
539    return;
540  ThreadData* current_thread_data = stopwatch.GetThreadData();
541  if (!current_thread_data)
542    return;
543
544  // Watch out for a race where status_ is changing, and hence one or both
545  // of start_of_run or end_of_run is zero.  In that case, we didn't bother to
546  // get a time value since we "weren't tracking" and we were trying to be
547  // efficient by not calling for a genuine time value.  For simplicity, we'll
548  // use a default zero duration when we can't calculate a true value.
549  TrackedTime start_of_run = stopwatch.StartTime();
550  int32_t queue_duration = 0;
551  if (!start_of_run.is_null()) {
552    queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
553        .InMilliseconds();
554  }
555  current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
556}
557
558// static
559void ThreadData::TallyRunOnWorkerThreadIfTracking(
560    const Births* births,
561    const TrackedTime& time_posted,
562    const TaskStopwatch& stopwatch) {
563  // Even if we have been DEACTIVATED, we will process any pending births so
564  // that our data structures (which counted the outstanding births) remain
565  // consistent.
566  if (!births)
567    return;
568
569  // TODO(jar): Support the option to coalesce all worker-thread activity under
570  // one ThreadData instance that uses locks to protect *all* access.  This will
571  // reduce memory (making it provably bounded), but run incrementally slower
572  // (since we'll use locks on TallyABirth and TallyADeath).  The good news is
573  // that the locks on TallyADeath will be *after* the worker thread has run,
574  // and hence nothing will be waiting for the completion (...  besides some
575  // other thread that might like to run).  Also, the worker threads tasks are
576  // generally longer, and hence the cost of the lock may perchance be amortized
577  // over the long task's lifetime.
578  ThreadData* current_thread_data = stopwatch.GetThreadData();
579  if (!current_thread_data)
580    return;
581
582  TrackedTime start_of_run = stopwatch.StartTime();
583  int32_t queue_duration = 0;
584  if (!start_of_run.is_null()) {
585    queue_duration = (start_of_run - time_posted).InMilliseconds();
586  }
587  current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
588}
589
590// static
591void ThreadData::TallyRunInAScopedRegionIfTracking(
592    const Births* births,
593    const TaskStopwatch& stopwatch) {
594  // Even if we have been DEACTIVATED, we will process any pending births so
595  // that our data structures (which counted the outstanding births) remain
596  // consistent.
597  if (!births)
598    return;
599
600  ThreadData* current_thread_data = stopwatch.GetThreadData();
601  if (!current_thread_data)
602    return;
603
604  int32_t queue_duration = 0;
605  current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
606}
607
608void ThreadData::SnapshotExecutedTasks(
609    int current_profiling_phase,
610    PhasedProcessDataSnapshotMap* phased_snapshots,
611    BirthCountMap* birth_counts) {
612  // Get copy of data, so that the data will not change during the iterations
613  // and processing.
614  BirthMap birth_map;
615  DeathsSnapshot deaths;
616  SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
617
618  for (const auto& birth : birth_map) {
619    (*birth_counts)[birth.second] += birth.second->birth_count();
620  }
621
622  for (const auto& death : deaths) {
623    (*birth_counts)[death.first] -= death.first->birth_count();
624
625    // For the current death data, walk through all its snapshots, starting from
626    // the current one, then from the previous profiling phase etc., and for
627    // each snapshot calculate the delta between the snapshot and the previous
628    // phase, if any.  Store the deltas in the result.
629    for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
630         phase = phase->prev) {
631      const DeathDataSnapshot& death_data =
632          phase->prev ? phase->death_data.Delta(phase->prev->death_data)
633                      : phase->death_data;
634
635      if (death_data.count > 0) {
636        (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
637            TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
638                         thread_name()));
639      }
640    }
641  }
642}
643
644// This may be called from another thread.
645void ThreadData::SnapshotMaps(int profiling_phase,
646                              BirthMap* birth_map,
647                              DeathsSnapshot* deaths) {
648  base::AutoLock lock(map_lock_);
649
650  for (const auto& birth : birth_map_)
651    (*birth_map)[birth.first] = birth.second;
652
653  for (const auto& death : death_map_) {
654    deaths->push_back(std::make_pair(
655        death.first,
656        DeathDataPhaseSnapshot(profiling_phase, death.second.count(),
657                               death.second.run_duration_sum(),
658                               death.second.run_duration_max(),
659                               death.second.run_duration_sample(),
660                               death.second.queue_duration_sum(),
661                               death.second.queue_duration_max(),
662                               death.second.queue_duration_sample(),
663                               death.second.last_phase_snapshot())));
664  }
665}
666
667void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
668  base::AutoLock lock(map_lock_);
669
670  for (auto& death : death_map_) {
671    death.second.OnProfilingPhaseCompleted(profiling_phase);
672  }
673}
674
675void ThreadData::EnsureTlsInitialization() {
676  if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
677    return;  // Someone else did the initialization.
678  // Due to racy lazy initialization in tests, we'll need to recheck status_
679  // after we acquire the lock.
680
681  // Ensure that we don't double initialize tls.  We are called when single
682  // threaded in the product, but some tests may be racy and lazy about our
683  // initialization.
684  base::AutoLock lock(*list_lock_.Pointer());
685  if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
686    return;  // Someone raced in here and beat us.
687
688  // Perform the "real" TLS initialization now, and leave it intact through
689  // process termination.
690  if (!tls_index_.initialized()) {  // Testing may have initialized this.
691    DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED);
692    tls_index_.Initialize(&ThreadData::OnThreadTermination);
693    DCHECK(tls_index_.initialized());
694  } else {
695    // TLS was initialzed for us earlier.
696    DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS);
697  }
698
699  // Incarnation counter is only significant to testing, as it otherwise will
700  // never again change in this process.
701  ++incarnation_counter_;
702
703  // The lock is not critical for setting status_, but it doesn't hurt.  It also
704  // ensures that if we have a racy initialization, that we'll bail as soon as
705  // we get the lock earlier in this method.
706  base::subtle::Release_Store(&status_, kInitialStartupState);
707  DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED);
708}
709
710// static
711void ThreadData::InitializeAndSetTrackingStatus(Status status) {
712  DCHECK_GE(status, DEACTIVATED);
713  DCHECK_LE(status, PROFILING_ACTIVE);
714
715  EnsureTlsInitialization();  // No-op if already initialized.
716
717  if (status > DEACTIVATED)
718    status = PROFILING_ACTIVE;
719  base::subtle::Release_Store(&status_, status);
720}
721
722// static
723ThreadData::Status ThreadData::status() {
724  return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_));
725}
726
727// static
728bool ThreadData::TrackingStatus() {
729  return base::subtle::Acquire_Load(&status_) > DEACTIVATED;
730}
731
732// static
733void ThreadData::EnableProfilerTiming() {
734  base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
735}
736
737// static
738TrackedTime ThreadData::Now() {
739  if (now_function_for_testing_)
740    return TrackedTime::FromMilliseconds((*now_function_for_testing_)());
741  if (IsProfilerTimingEnabled() && TrackingStatus())
742    return TrackedTime::Now();
743  return TrackedTime();  // Super fast when disabled, or not compiled.
744}
745
746// static
747void ThreadData::EnsureCleanupWasCalled(int /*major_threads_shutdown_count*/) {
748  base::AutoLock lock(*list_lock_.Pointer());
749  if (worker_thread_data_creation_count_ == 0)
750    return;  // We haven't really run much, and couldn't have leaked.
751
752  // TODO(jar): until this is working on XP, don't run the real test.
753#if 0
754  // Verify that we've at least shutdown/cleanup the major namesd threads.  The
755  // caller should tell us how many thread shutdowns should have taken place by
756  // now.
757  CHECK_GT(cleanup_count_, major_threads_shutdown_count);
758#endif
759}
760
761// static
762void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
763  // This is only called from test code, where we need to cleanup so that
764  // additional tests can be run.
765  // We must be single threaded... but be careful anyway.
766  InitializeAndSetTrackingStatus(DEACTIVATED);
767
768  ThreadData* thread_data_list;
769  {
770    base::AutoLock lock(*list_lock_.Pointer());
771    thread_data_list = all_thread_data_list_head_;
772    all_thread_data_list_head_ = NULL;
773    ++incarnation_counter_;
774    // To be clean, break apart the retired worker list (though we leak them).
775    while (first_retired_worker_) {
776      ThreadData* worker = first_retired_worker_;
777      CHECK_GT(worker->worker_thread_number_, 0);
778      first_retired_worker_ = worker->next_retired_worker_;
779      worker->next_retired_worker_ = NULL;
780    }
781  }
782
783  // Put most global static back in pristine shape.
784  worker_thread_data_creation_count_ = 0;
785  cleanup_count_ = 0;
786  tls_index_.Set(NULL);
787  // Almost UNINITIALIZED.
788  base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS);
789
790  // To avoid any chance of racing in unit tests, which is the only place we
791  // call this function, we may sometimes leak all the data structures we
792  // recovered, as they may still be in use on threads from prior tests!
793  if (leak) {
794    ThreadData* thread_data = thread_data_list;
795    while (thread_data) {
796      ANNOTATE_LEAKING_OBJECT_PTR(thread_data);
797      thread_data = thread_data->next();
798    }
799    return;
800  }
801
802  // When we want to cleanup (on a single thread), here is what we do.
803
804  // Do actual recursive delete in all ThreadData instances.
805  while (thread_data_list) {
806    ThreadData* next_thread_data = thread_data_list;
807    thread_data_list = thread_data_list->next();
808
809    for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
810         next_thread_data->birth_map_.end() != it; ++it)
811      delete it->second;  // Delete the Birth Records.
812    delete next_thread_data;  // Includes all Death Records.
813  }
814}
815
816//------------------------------------------------------------------------------
817TaskStopwatch::TaskStopwatch()
818    : wallclock_duration_ms_(0),
819      current_thread_data_(NULL),
820      excluded_duration_ms_(0),
821      parent_(NULL) {
822#if DCHECK_IS_ON()
823  state_ = CREATED;
824  child_ = NULL;
825#endif
826}
827
828TaskStopwatch::~TaskStopwatch() {
829#if DCHECK_IS_ON()
830  DCHECK(state_ != RUNNING);
831  DCHECK(child_ == NULL);
832#endif
833}
834
835void TaskStopwatch::Start() {
836#if DCHECK_IS_ON()
837  DCHECK(state_ == CREATED);
838  state_ = RUNNING;
839#endif
840
841  start_time_ = ThreadData::Now();
842
843  current_thread_data_ = ThreadData::Get();
844  if (!current_thread_data_)
845    return;
846
847  parent_ = current_thread_data_->current_stopwatch_;
848#if DCHECK_IS_ON()
849  if (parent_) {
850    DCHECK(parent_->state_ == RUNNING);
851    DCHECK(parent_->child_ == NULL);
852    parent_->child_ = this;
853  }
854#endif
855  current_thread_data_->current_stopwatch_ = this;
856}
857
858void TaskStopwatch::Stop() {
859  const TrackedTime end_time = ThreadData::Now();
860#if DCHECK_IS_ON()
861  DCHECK(state_ == RUNNING);
862  state_ = STOPPED;
863  DCHECK(child_ == NULL);
864#endif
865
866  if (!start_time_.is_null() && !end_time.is_null()) {
867    wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
868  }
869
870  if (!current_thread_data_)
871    return;
872
873  DCHECK(current_thread_data_->current_stopwatch_ == this);
874  current_thread_data_->current_stopwatch_ = parent_;
875  if (!parent_)
876    return;
877
878#if DCHECK_IS_ON()
879  DCHECK(parent_->state_ == RUNNING);
880  DCHECK(parent_->child_ == this);
881  parent_->child_ = NULL;
882#endif
883  parent_->excluded_duration_ms_ += wallclock_duration_ms_;
884  parent_ = NULL;
885}
886
887TrackedTime TaskStopwatch::StartTime() const {
888#if DCHECK_IS_ON()
889  DCHECK(state_ != CREATED);
890#endif
891
892  return start_time_;
893}
894
895int32_t TaskStopwatch::RunDurationMs() const {
896#if DCHECK_IS_ON()
897  DCHECK(state_ == STOPPED);
898#endif
899
900  return wallclock_duration_ms_ - excluded_duration_ms_;
901}
902
903ThreadData* TaskStopwatch::GetThreadData() const {
904#if DCHECK_IS_ON()
905  DCHECK(state_ != CREATED);
906#endif
907
908  return current_thread_data_;
909}
910
911//------------------------------------------------------------------------------
912// DeathDataPhaseSnapshot
913
914DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
915    int profiling_phase,
916    int count,
917    int32_t run_duration_sum,
918    int32_t run_duration_max,
919    int32_t run_duration_sample,
920    int32_t queue_duration_sum,
921    int32_t queue_duration_max,
922    int32_t queue_duration_sample,
923    const DeathDataPhaseSnapshot* prev)
924    : profiling_phase(profiling_phase),
925      death_data(count,
926                 run_duration_sum,
927                 run_duration_max,
928                 run_duration_sample,
929                 queue_duration_sum,
930                 queue_duration_max,
931                 queue_duration_sample),
932      prev(prev) {}
933
934//------------------------------------------------------------------------------
935// TaskSnapshot
936
937TaskSnapshot::TaskSnapshot() {
938}
939
940TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
941                           const DeathDataSnapshot& death_data,
942                           const std::string& death_thread_name)
943    : birth(birth),
944      death_data(death_data),
945      death_thread_name(death_thread_name) {
946}
947
948TaskSnapshot::~TaskSnapshot() {
949}
950
951//------------------------------------------------------------------------------
952// ProcessDataPhaseSnapshot
953
954ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
955}
956
957ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot(
958    const ProcessDataPhaseSnapshot& other) = default;
959
960ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
961}
962
963//------------------------------------------------------------------------------
964// ProcessDataPhaseSnapshot
965
966ProcessDataSnapshot::ProcessDataSnapshot()
967#if !defined(OS_NACL)
968    : process_id(base::GetCurrentProcId()) {
969#else
970    : process_id(base::kNullProcessId) {
971#endif
972}
973
974ProcessDataSnapshot::ProcessDataSnapshot(const ProcessDataSnapshot& other) =
975    default;
976
977ProcessDataSnapshot::~ProcessDataSnapshot() {
978}
979
980}  // namespace tracked_objects
981