1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Timeline visualization for TensorFlow using Chrome Trace Format."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import collections
22import copy
23import json
24import re
25
26# The timeline target is usually imported as part of BUILD target
27# "platform_test", which includes also includes the "platform"
28# dependency.  This is why the logging import here is okay.
29from tensorflow.python.platform import tf_logging as logging
30
31
32class AllocationMaximum(collections.namedtuple(
33    'AllocationMaximum', ('timestamp', 'num_bytes', 'tensors'))):
34  """Stores the maximum allocation for a given allocator within the timelne.
35
36  Parameters:
37    timestamp: `tensorflow::Env::NowMicros()` when this maximum was reached.
38    num_bytes: the total memory used at this time.
39    tensors: the set of tensors allocated at this time.
40  """
41  pass
42
43
44class StepStatsAnalysis(collections.namedtuple(
45    'StepStatsAnalysis', ('chrome_trace', 'allocator_maximums'))):
46  """Stores the step stats analysis output.
47
48  Parameters:
49    chrome_trace: A dict containing the chrome trace analysis.
50    allocator_maximums: A dict mapping allocator names to AllocationMaximum.
51  """
52  pass
53
54
55class _ChromeTraceFormatter(object):
56  """A helper class for generating traces in Chrome Trace Format."""
57
58  def __init__(self, show_memory=False):
59    """Constructs a new Chrome Trace formatter."""
60    self._show_memory = show_memory
61    self._events = []
62    self._metadata = []
63
64  def _create_event(self, ph, category, name, pid, tid, timestamp):
65    """Creates a new Chrome Trace event.
66
67    For details of the file format, see:
68    https://github.com/catapult-project/catapult/blob/master/tracing/README.md
69
70    Args:
71      ph:  The type of event - usually a single character.
72      category: The event category as a string.
73      name:  The event name as a string.
74      pid:  Identifier of the process generating this event as an integer.
75      tid:  Identifier of the thread generating this event as an integer.
76      timestamp:  The timestamp of this event as a long integer.
77
78    Returns:
79      A JSON compatible event object.
80    """
81    event = {}
82    event['ph'] = ph
83    event['cat'] = category
84    event['name'] = name
85    event['pid'] = pid
86    event['tid'] = tid
87    event['ts'] = timestamp
88    return event
89
90  def emit_pid(self, name, pid):
91    """Adds a process metadata event to the trace.
92
93    Args:
94      name:  The process name as a string.
95      pid:  Identifier of the process as an integer.
96    """
97    event = {}
98    event['name'] = 'process_name'
99    event['ph'] = 'M'
100    event['pid'] = pid
101    event['args'] = {'name': name}
102    self._metadata.append(event)
103
104  def emit_tid(self, name, pid, tid):
105    """Adds a thread metadata event to the trace.
106
107    Args:
108      name:  The thread name as a string.
109      pid:  Identifier of the process as an integer.
110      tid:  Identifier of the thread as an integer.
111    """
112    event = {}
113    event['name'] = 'thread_name'
114    event['ph'] = 'M'
115    event['pid'] = pid
116    event['tid'] = tid
117    event['args'] = {'name': name}
118    self._metadata.append(event)
119
120  def emit_region(self, timestamp, duration, pid, tid, category, name, args):
121    """Adds a region event to the trace.
122
123    Args:
124      timestamp:  The start timestamp of this region as a long integer.
125      duration:  The duration of this region as a long integer.
126      pid:  Identifier of the process generating this event as an integer.
127      tid:  Identifier of the thread generating this event as an integer.
128      category: The event category as a string.
129      name:  The event name as a string.
130      args:  A JSON-compatible dictionary of event arguments.
131    """
132    event = self._create_event('X', category, name, pid, tid, timestamp)
133    event['dur'] = duration
134    event['args'] = args
135    self._events.append(event)
136
137  def emit_obj_create(self, category, name, timestamp, pid, tid, object_id):
138    """Adds an object creation event to the trace.
139
140    Args:
141      category: The event category as a string.
142      name:  The event name as a string.
143      timestamp:  The timestamp of this event as a long integer.
144      pid:  Identifier of the process generating this event as an integer.
145      tid:  Identifier of the thread generating this event as an integer.
146      object_id: Identifier of the object as an integer.
147    """
148    event = self._create_event('N', category, name, pid, tid, timestamp)
149    event['id'] = object_id
150    self._events.append(event)
151
152  def emit_obj_delete(self, category, name, timestamp, pid, tid, object_id):
153    """Adds an object deletion event to the trace.
154
155    Args:
156      category: The event category as a string.
157      name:  The event name as a string.
158      timestamp:  The timestamp of this event as a long integer.
159      pid:  Identifier of the process generating this event as an integer.
160      tid:  Identifier of the thread generating this event as an integer.
161      object_id: Identifier of the object as an integer.
162    """
163    event = self._create_event('D', category, name, pid, tid, timestamp)
164    event['id'] = object_id
165    self._events.append(event)
166
167  def emit_obj_snapshot(self, category, name, timestamp, pid, tid, object_id,
168                        snapshot):
169    """Adds an object snapshot event to the trace.
170
171    Args:
172      category: The event category as a string.
173      name:  The event name as a string.
174      timestamp:  The timestamp of this event as a long integer.
175      pid:  Identifier of the process generating this event as an integer.
176      tid:  Identifier of the thread generating this event as an integer.
177      object_id: Identifier of the object as an integer.
178      snapshot:  A JSON-compatible representation of the object.
179    """
180    event = self._create_event('O', category, name, pid, tid, timestamp)
181    event['id'] = object_id
182    event['args'] = {'snapshot': snapshot}
183    self._events.append(event)
184
185  def emit_flow_start(self, name, timestamp, pid, tid, flow_id):
186    """Adds a flow start event to the trace.
187
188    When matched with a flow end event (with the same 'flow_id') this will
189    cause the trace viewer to draw an arrow between the start and end events.
190
191    Args:
192      name:  The event name as a string.
193      timestamp:  The timestamp of this event as a long integer.
194      pid:  Identifier of the process generating this event as an integer.
195      tid:  Identifier of the thread generating this event as an integer.
196      flow_id: Identifier of the flow as an integer.
197    """
198    event = self._create_event('s', 'DataFlow', name, pid, tid, timestamp)
199    event['id'] = flow_id
200    self._events.append(event)
201
202  def emit_flow_end(self, name, timestamp, pid, tid, flow_id):
203    """Adds a flow end event to the trace.
204
205    When matched with a flow start event (with the same 'flow_id') this will
206    cause the trace viewer to draw an arrow between the start and end events.
207
208    Args:
209      name:  The event name as a string.
210      timestamp:  The timestamp of this event as a long integer.
211      pid:  Identifier of the process generating this event as an integer.
212      tid:  Identifier of the thread generating this event as an integer.
213      flow_id: Identifier of the flow as an integer.
214    """
215    event = self._create_event('t', 'DataFlow', name, pid, tid, timestamp)
216    event['id'] = flow_id
217    self._events.append(event)
218
219  def emit_counter(self, category, name, pid, timestamp, counter, value):
220    """Emits a record for a single counter.
221
222    Args:
223      category: The event category as a string.
224      name:  The event name as a string.
225      pid:  Identifier of the process generating this event as an integer.
226      timestamp:  The timestamp of this event as a long integer.
227      counter: Name of the counter as a string.
228      value:  Value of the counter as an integer.
229    """
230    event = self._create_event('C', category, name, pid, 0, timestamp)
231    event['args'] = {counter: value}
232    self._events.append(event)
233
234  def emit_counters(self, category, name, pid, timestamp, counters):
235    """Emits a counter record for the dictionary 'counters'.
236
237    Args:
238      category: The event category as a string.
239      name:  The event name as a string.
240      pid:  Identifier of the process generating this event as an integer.
241      timestamp:  The timestamp of this event as a long integer.
242      counters: Dictionary of counter values.
243    """
244    event = self._create_event('C', category, name, pid, 0, timestamp)
245    event['args'] = counters.copy()
246    self._events.append(event)
247
248  def format_to_string(self, pretty=False):
249    """Formats the chrome trace to a string.
250
251    Args:
252      pretty: (Optional.)  If True, produce human-readable JSON output.
253
254    Returns:
255      A JSON-formatted string in Chrome Trace format.
256    """
257    trace = {}
258    trace['traceEvents'] = self._metadata + self._events
259    if pretty:
260      return json.dumps(trace, indent=4, separators=(',', ': '))
261    else:
262      return json.dumps(trace, separators=(',', ':'))
263
264
265class _TensorTracker(object):
266  """An internal class to track the lifetime of a Tensor."""
267
268  def __init__(self, name, object_id, timestamp, pid, allocator, num_bytes):
269    """Creates an object to track tensor references.
270
271    This class is not thread safe and is intended only for internal use by
272    the 'Timeline' class in this file.
273
274    Args:
275      name:  The name of the Tensor as a string.
276      object_id:  Chrome Trace object identifier assigned for this Tensor.
277      timestamp:  The creation timestamp of this event as a long integer.
278      pid:  Process identifier of the associated device, as an integer.
279      allocator:  Name of the allocator used to create the Tensor.
280      num_bytes:  Number of bytes allocated (long integer).
281
282    Returns:
283      A 'TensorTracker' object.
284    """
285    self._name = name
286    self._pid = pid
287    self._object_id = object_id
288    self._create_time = timestamp
289    self._allocator = allocator
290    self._num_bytes = num_bytes
291    self._ref_times = []
292    self._unref_times = []
293
294  @property
295  def name(self):
296    """Name of this tensor."""
297    return self._name
298
299  @property
300  def pid(self):
301    """ID of the process which created this tensor (an integer)."""
302    return self._pid
303
304  @property
305  def create_time(self):
306    """Timestamp when this tensor was created (long integer)."""
307    return self._create_time
308
309  @property
310  def object_id(self):
311    """Returns the object identifier of this tensor (integer)."""
312    return self._object_id
313
314  @property
315  def num_bytes(self):
316    """Size of this tensor in bytes (long integer)."""
317    return self._num_bytes
318
319  @property
320  def allocator(self):
321    """Name of the allocator used to create this tensor (string)."""
322    return self._allocator
323
324  @property
325  def last_unref(self):
326    """Last unreference timestamp of this tensor (long integer)."""
327    return max(self._unref_times)
328
329  def add_ref(self, timestamp):
330    """Adds a reference to this tensor with the specified timestamp.
331
332    Args:
333      timestamp:  Timestamp of object reference as an integer.
334    """
335    self._ref_times.append(timestamp)
336
337  def add_unref(self, timestamp):
338    """Adds an unref to this tensor with the specified timestamp.
339
340    Args:
341      timestamp:  Timestamp of object unreference as an integer.
342    """
343    self._unref_times.append(timestamp)
344
345
346class Timeline(object):
347  """A class for visualizing execution timelines of TensorFlow steps."""
348
349  def __init__(self, step_stats, graph=None):
350    """Constructs a new Timeline.
351
352    A 'Timeline' is used for visualizing the execution of a TensorFlow
353    computation.  It shows the timings and concurrency of execution at
354    the granularity of TensorFlow Ops.
355    This class is not thread safe.
356
357    Args:
358      step_stats: The 'StepStats' proto recording execution times.
359      graph: (Optional) The 'Graph' that was executed.
360    """
361
362    self._step_stats = step_stats
363    self._graph = graph
364    self._chrome_trace = _ChromeTraceFormatter()
365    self._next_pid = 0
366    self._device_pids = {}  # device name -> pid for compute activity.
367    self._tensor_pids = {}  # device name -> pid for tensors.
368    self._tensors = {}  # tensor_name -> TensorTracker
369    self._next_flow_id = 0
370    self._flow_starts = {}  # tensor_name -> (timestamp, pid, tid)
371    self._alloc_times = {}  # tensor_name -> ( time, allocator, size )
372    self._allocator_maximums = {}  # allocator name => maximum bytes long
373
374  def _alloc_pid(self):
375    """Allocate a process Id."""
376    pid = self._next_pid
377    self._next_pid += 1
378    return pid
379
380  def _alloc_flow_id(self):
381    """Allocate a flow Id."""
382    flow_id = self._next_flow_id
383    self._next_flow_id += 1
384    return flow_id
385
386  def _parse_op_label(self, label):
387    """Parses the fields in a node timeline label."""
388    # Expects labels of the form: name = op(arg, arg, ...).
389    match = re.match(r'(.*) = (.*)\((.*)\)', label)
390    if match is None:
391      return 'unknown', 'unknown', []
392    nn, op, inputs = match.groups()
393    if not inputs:
394      inputs = []
395    else:
396      inputs = inputs.split(', ')
397    return nn, op, inputs
398
399  def _assign_lanes(self):
400    """Assigns non-overlapping lanes for the activities on each device."""
401    for device_stats in self._step_stats.dev_stats:
402      # TODO(pbar): Genuine thread IDs in NodeExecStats might be helpful.
403      lanes = [0]
404      for ns in device_stats.node_stats:
405        l = -1
406        for (i, lts) in enumerate(lanes):
407          if ns.all_start_micros > lts:
408            l = i
409            lanes[l] = ns.all_start_micros + ns.all_end_rel_micros
410            break
411        if l < 0:
412          l = len(lanes)
413          lanes.append(ns.all_start_micros + ns.all_end_rel_micros)
414        ns.thread_id = l
415
416  def _emit_op(self, nodestats, pid, is_gputrace):
417    """Generates a Chrome Trace event to show Op execution.
418
419    Args:
420      nodestats: The 'NodeExecStats' proto recording op execution.
421      pid: The pid assigned for the device where this op ran.
422      is_gputrace: If True then this op came from the GPUTracer.
423    """
424    node_name = nodestats.node_name
425    start = nodestats.all_start_micros
426    duration = nodestats.all_end_rel_micros
427    tid = nodestats.thread_id
428    inputs = []
429    if is_gputrace:
430      # Node names should always have the form 'name:op'.
431      fields = node_name.split(':') + ['unknown']
432      node_name, op = fields[:2]
433    elif node_name == 'RecvTensor':
434      # RPC tracing does not use the standard timeline_label format.
435      op = 'RecvTensor'
436    else:
437      _, op, inputs = self._parse_op_label(nodestats.timeline_label)
438    args = {'name': node_name, 'op': op}
439    for i, iname in enumerate(inputs):
440      args['input%d' % i] = iname
441    self._chrome_trace.emit_region(start, duration, pid, tid, 'Op', op, args)
442
443  def _emit_tensor_snapshot(self, tensor, timestamp, pid, tid, value):
444    """Generate Chrome Trace snapshot event for a computed Tensor.
445
446    Args:
447      tensor: A 'TensorTracker' object.
448      timestamp:  The timestamp of this snapshot as a long integer.
449      pid: The pid assigned for showing the device where this op ran.
450      tid: The tid of the thread computing the tensor snapshot.
451      value: A JSON-compliant snapshot of the object.
452    """
453    desc = str(value.tensor_description).replace('"', '')
454    snapshot = {'tensor_description': desc}
455    self._chrome_trace.emit_obj_snapshot('Tensor', tensor.name, timestamp, pid,
456                                         tid, tensor.object_id, snapshot)
457
458  def _produce_tensor(self, name, timestamp, tensors_pid, allocator, num_bytes):
459    object_id = len(self._tensors)
460    tensor = _TensorTracker(name, object_id, timestamp, tensors_pid, allocator,
461                            num_bytes)
462    self._tensors[name] = tensor
463    return tensor
464
465  def _is_gputrace_device(self, device_name):
466    """Returns true if this device is part of the GPUTracer logging."""
467    return '/stream:' in device_name or '/memcpy' in device_name
468
469  def _allocate_pids(self):
470    """Allocate fake process ids for each device in the StepStats."""
471    self._allocators_pid = self._alloc_pid()
472    self._chrome_trace.emit_pid('Allocators', self._allocators_pid)
473
474    # Add processes in the Chrome trace to show compute and data activity.
475    for dev_stats in self._step_stats.dev_stats:
476      device_pid = self._alloc_pid()
477      self._device_pids[dev_stats.device] = device_pid
478      tensors_pid = self._alloc_pid()
479      self._tensor_pids[dev_stats.device] = tensors_pid
480      self._chrome_trace.emit_pid(dev_stats.device + ' Compute', device_pid)
481      self._chrome_trace.emit_pid(dev_stats.device + ' Tensors', tensors_pid)
482
483  def _analyze_tensors(self, show_memory):
484    """Analyze tensor references to track dataflow."""
485    for dev_stats in self._step_stats.dev_stats:
486      device_pid = self._device_pids[dev_stats.device]
487      tensors_pid = self._tensor_pids[dev_stats.device]
488      for node_stats in dev_stats.node_stats:
489        tid = node_stats.thread_id
490        node_name = node_stats.node_name
491        start_time = node_stats.all_start_micros
492        end_time = node_stats.all_start_micros + node_stats.all_end_rel_micros
493        for index, output in enumerate(node_stats.output):
494          if index:
495            output_name = '%s:%d' % (node_name, index)
496          else:
497            output_name = node_name
498
499          allocation = output.tensor_description.allocation_description
500          num_bytes = allocation.requested_bytes
501          allocator_name = allocation.allocator_name
502          tensor = self._produce_tensor(output_name, start_time, tensors_pid,
503                                        allocator_name, num_bytes)
504          tensor.add_ref(start_time)
505          tensor.add_unref(end_time)
506          self._flow_starts[output_name] = (end_time, device_pid, tid)
507
508          if show_memory:
509            self._chrome_trace.emit_obj_create('Tensor', output_name,
510                                               start_time, tensors_pid, tid,
511                                               tensor.object_id)
512            self._emit_tensor_snapshot(tensor, end_time - 1, tensors_pid, tid,
513                                       output)
514
515  def _show_compute(self, show_dataflow):
516    """Visualize the computation activity."""
517    for dev_stats in self._step_stats.dev_stats:
518      device_name = dev_stats.device
519      device_pid = self._device_pids[device_name]
520      is_gputrace = self._is_gputrace_device(device_name)
521
522      for node_stats in dev_stats.node_stats:
523        tid = node_stats.thread_id
524        start_time = node_stats.all_start_micros
525        end_time = node_stats.all_start_micros + node_stats.all_end_rel_micros
526        self._emit_op(node_stats, device_pid, is_gputrace)
527
528        if is_gputrace or node_stats.node_name == 'RecvTensor':
529          continue
530
531        _, _, inputs = self._parse_op_label(node_stats.timeline_label)
532        for input_name in inputs:
533          if input_name not in self._tensors:
534            # This can happen when partitioning has inserted a Send/Recv.
535            # We remove the numeric suffix so that the dataflow appears to
536            # come from the original node.  Ideally, the StepStats would
537            # contain logging for the Send and Recv nodes.
538            index = input_name.rfind('/_')
539            if index > 0:
540              input_name = input_name[:index]
541
542          if input_name in self._tensors:
543            tensor = self._tensors[input_name]
544            tensor.add_ref(start_time)
545            tensor.add_unref(end_time - 1)
546
547            if show_dataflow:
548              # We use a different flow ID for every graph edge.
549              create_time, create_pid, create_tid = self._flow_starts[
550                  input_name]
551              # Don't add flows when producer and consumer ops are on the same
552              # pid/tid since the horizontal arrows clutter the visualization.
553              if create_pid != device_pid or create_tid != tid:
554                flow_id = self._alloc_flow_id()
555                self._chrome_trace.emit_flow_start(input_name, create_time,
556                                                   create_pid, create_tid,
557                                                   flow_id)
558                self._chrome_trace.emit_flow_end(input_name, start_time,
559                                                 device_pid, tid, flow_id)
560          else:
561            logging.vlog(1, 'Can\'t find tensor %s - removed by CSE?',
562                         input_name)
563
564  def _show_memory_counters(self):
565    """Produce a counter series for each memory allocator."""
566    # Iterate over all tensor trackers to build a list of allocations and
567    # frees for each allocator. Then sort the lists and emit a cumulative
568    # counter series for each allocator.
569    allocations = {}
570    for name in self._tensors:
571      tensor = self._tensors[name]
572      self._chrome_trace.emit_obj_delete('Tensor', name, tensor.last_unref,
573                                         tensor.pid, 0, tensor.object_id)
574      allocator = tensor.allocator
575      if allocator not in allocations:
576        allocations[allocator] = []
577      num_bytes = tensor.num_bytes
578      allocations[allocator].append((tensor.create_time, num_bytes, name))
579      allocations[allocator].append((tensor.last_unref, -num_bytes, name))
580
581    alloc_maxes = {}
582
583    # Generate a counter series showing total allocations for each allocator.
584    for allocator in allocations:
585      alloc_list = allocations[allocator]
586      alloc_list.sort()
587      total_bytes = 0
588      alloc_tensor_set = set()
589      alloc_maxes[allocator] = AllocationMaximum(
590          timestamp=0, num_bytes=0, tensors=set())
591      for time, num_bytes, name in alloc_list:
592        total_bytes += num_bytes
593        if num_bytes < 0:
594          alloc_tensor_set.discard(name)
595        else:
596          alloc_tensor_set.add(name)
597
598        if total_bytes > alloc_maxes[allocator].num_bytes:
599          alloc_maxes[allocator] = AllocationMaximum(
600              timestamp=time,
601              num_bytes=total_bytes,
602              tensors=copy.deepcopy(alloc_tensor_set))
603
604        self._chrome_trace.emit_counter('Memory', allocator,
605                                        self._allocators_pid, time, allocator,
606                                        total_bytes)
607    self._allocator_maximums = alloc_maxes
608
609  def analyze_step_stats(self, show_dataflow=True, show_memory=True):
610    self._allocate_pids()
611    self._assign_lanes()
612    self._analyze_tensors(show_memory)
613    self._show_compute(show_dataflow)
614    if show_memory:
615      self._show_memory_counters()
616    return StepStatsAnalysis(
617        chrome_trace=self._chrome_trace,
618        allocator_maximums=self._allocator_maximums)
619
620  def generate_chrome_trace_format(self, show_dataflow=True, show_memory=False):
621    """Produces a trace in Chrome Trace Format.
622
623    Args:
624      show_dataflow: (Optional.) If True, add flow events to the trace
625        connecting producers and consumers of tensors.
626      show_memory: (Optional.) If True, add object snapshot events to the trace
627        showing the sizes and lifetimes of tensors.
628
629    Returns:
630      A JSON formatted string in Chrome Trace format.
631    """
632    step_stats_analysis = self.analyze_step_stats(
633        show_dataflow=show_dataflow, show_memory=show_memory)
634
635    return step_stats_analysis.chrome_trace.format_to_string(pretty=True)
636