1# Copyright 2016 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import json
6import os
7import re
8import subprocess
9from threading import Timer
10
11from py_trace_event import trace_time
12from telemetry.internal.platform import tracing_agent
13from tracing.trace_data import trace_data
14
15
16def _ParsePsProcessString(line):
17  """Parses a process line from the output of `ps`.
18
19  Example of `ps` command output:
20  '3.4 8.0 31887 31447 com.app.Webkit'
21  """
22  token_list = line.strip().split()
23  if len(token_list) < 5:
24    raise ValueError('Line has too few tokens: %s.' % token_list)
25
26  return {
27    'pCpu': float(token_list[0]),
28    'pMem': float(token_list[1]),
29    'pid': int(token_list[2]),
30    'ppid': int(token_list[3]),
31    'name': ' '.join(token_list[4:])
32  }
33
34
35class ProcessCollector(object):
36  def _GetProcessesAsStrings(self):
37    """Returns a list of strings, each of which contains info about a
38    process.
39    """
40    raise NotImplementedError
41
42  # pylint: disable=unused-argument
43  def _ParseProcessString(self, proc_string):
44    """Parses an individual process string returned by _GetProcessesAsStrings().
45
46    Returns:
47      A dictionary containing keys of 'pid' (an integer process ID), 'ppid' (an
48      integer parent process ID), 'name' (a string for the process name), 'pCpu'
49      (a float for the percent CPU load incurred by the process), and 'pMem' (a
50      float for the percent memory load caused by the process).
51    """
52    raise NotImplementedError
53
54  def Init(self):
55    """Performs any required initialization before starting tracing."""
56    pass
57
58  def GetProcesses(self):
59    """Fetches the top processes returned by top command.
60
61    Returns:
62      A list of dictionaries, each containing 'pid' (an integer process ID),
63      'ppid' (an integer parent process ID), 'name (a string for the process
64      name), pCpu' (a float for the percent CPU load incurred by the process),
65      and 'pMem' (a float for the percent memory load caused by the process).
66    """
67    proc_strings = self._GetProcessesAsStrings()
68    return [
69        self._ParseProcessString(proc_string) for proc_string in proc_strings
70    ]
71
72
73class WindowsProcessCollector(ProcessCollector):
74  """Class for collecting information about processes on Windows.
75
76  Example of Windows command output:
77  '3644      1724   chrome#1                 8           84497'
78  '3644      832    chrome#2                 4           34872'
79  """
80  _GET_PERF_DATA_SHELL_COMMAND = [
81    'wmic',
82    'path', # Retrieve a WMI object from the following path.
83    'Win32_PerfFormattedData_PerfProc_Process', # Contains process perf data.
84    'get',
85    'CreatingProcessID,IDProcess,Name,PercentProcessorTime,WorkingSet'
86  ]
87
88  _GET_COMMANDS_SHELL_COMMAND = [
89    'wmic',
90    'Process',
91    'get',
92    'CommandLine,ProcessID',
93    # Formatting the result as a CSV means that if no CommandLine is available,
94    # we can at least tell by the lack of data between commas.
95    '/format:csv'
96  ]
97
98  _GET_PHYSICAL_MEMORY_BYTES_SHELL_COMMAND = [
99    'wmic',
100    'ComputerSystem',
101    'get',
102    'TotalPhysicalMemory'
103  ]
104
105  def __init__(self):
106    self._physicalMemoryBytes = None
107
108  def Init(self):
109    if not self._physicalMemoryBytes:
110      self._physicalMemoryBytes = self._GetPhysicalMemoryBytes()
111
112    # The command to get the per-process perf data takes significantly longer
113    # the first time that it's run (~10s, compared to ~60ms for subsequent
114    # runs). In order to avoid having this affect tracing, we run it once ahead
115    # of time.
116    self._GetProcessesAsStrings()
117
118  def GetProcesses(self):
119    processes = super(WindowsProcessCollector, self).GetProcesses()
120
121    # On Windows, the absolute minimal name of the process is given
122    # (e.g. "python" for Telemetry). In order to make this more useful, we check
123    # if a more descriptive command is available for each PID and use that
124    # command if it is.
125    pid_to_command_dict = self._GetPidToCommandDict()
126    for process in processes:
127      if process['pid'] in pid_to_command_dict:
128        process['name'] = pid_to_command_dict[process['pid']]
129
130    return processes
131
132  def _GetPhysicalMemoryBytes(self):
133    """Returns the number of bytes of physical memory on the computer."""
134    raw_output = subprocess.check_output(
135        self._GET_PHYSICAL_MEMORY_BYTES_SHELL_COMMAND)
136    # The bytes of physical memory is on the second row (after the header row).
137    return int(raw_output.strip().split('\n')[1])
138
139  def _GetProcessesAsStrings(self):
140    # Skip the header and total rows and strip the trailing newline.
141    return subprocess.check_output(
142        self._GET_PERF_DATA_SHELL_COMMAND).strip().split('\n')[2:]
143
144  def _ParseProcessString(self, proc_string):
145    assert self._physicalMemoryBytes, 'Must call Init() before using collector'
146
147    token_list = proc_string.strip().split()
148    if len(token_list) < 5:
149      raise ValueError('Line has too few tokens: %s.' % token_list)
150
151    # Process names are given in the form:
152    #
153    #   windowsUpdate
154    #   Windows Explorer
155    #   chrome#1
156    #   chrome#2
157    #
158    # In order to match other platforms, where multiple processes can have the
159    # same name and can be easily grouped based on that name, we strip any
160    # pound sign and number.
161    name = ' '.join(token_list[2:-2])
162    name = re.sub(r'#[0-9]+$', '', name)
163    # The working set size (roughly equivalent to the resident set size on Unix)
164    # is given in bytes. In order to convert this to percent of physical memory
165    # occupied by the process, we divide by the amount of total physical memory
166    # on the machine.
167    percent_memory = float(token_list[-1]) / self._physicalMemoryBytes * 100
168
169    return {
170      'ppid': int(token_list[0]),
171      'pid': int(token_list[1]),
172      'name': name,
173      'pCpu': float(token_list[-2]),
174      'pMem': percent_memory
175    }
176
177  def _GetPidToCommandDict(self):
178    """Returns a dictionary from the PID of a process to the full command used
179    to launch that process. If no full command is available for a given process,
180    that process is omitted from the returned dictionary.
181    """
182    # Skip the header row and strip the trailing newline.
183    process_strings = subprocess.check_output(
184        self._GET_COMMANDS_SHELL_COMMAND).strip().split('\n')[1:]
185    command_by_pid = {}
186    for process_string in process_strings:
187      process_string = process_string.strip()
188      command = self._ParseCommandString(process_string)
189
190      # Only return additional information about the command if it's available.
191      if command['command']:
192        command_by_pid[command['pid']] = command['command']
193
194    return command_by_pid
195
196  def _ParseCommandString(self, command_string):
197    groups = re.match(r'^([^,]+),(.*),([0-9]+)$', command_string).groups()
198    return {
199      # Ignore groups[0]: it's the hostname.
200      'pid': int(groups[2]),
201      'command': groups[1]
202    }
203
204
205class LinuxProcessCollector(ProcessCollector):
206  """Class for collecting information about processes on Linux.
207
208  Example of Linux command output:
209  '3.4 8.0 31887 31447 com.app.Webkit'
210  """
211  _SHELL_COMMAND = [
212    'ps',
213    '-a', # Include processes that aren't session leaders.
214    '-x', # List all processes, even those not owned by the user.
215    '-o', # Show the output in the specified format.
216    'pcpu,pmem,pid,ppid,cmd'
217  ]
218
219  def _GetProcessesAsStrings(self):
220    # Skip the header row and strip the trailing newline.
221    return subprocess.check_output(self._SHELL_COMMAND).strip().split('\n')[1:]
222
223  def _ParseProcessString(self, proc_string):
224    return _ParsePsProcessString(proc_string)
225
226
227class MacProcessCollector(ProcessCollector):
228  """Class for collecting information about processes on Mac.
229
230  Example of Mac command output:
231  '3.4 8.0 31887 31447 com.app.Webkit'
232  """
233
234  _SHELL_COMMAND = [
235    'ps',
236    '-a', # Include all users' processes.
237    '-ww', # Don't limit the length of each line.
238    '-x', # Include processes that aren't associated with a terminal.
239    '-o', # Show the output in the specified format.
240    '%cpu %mem pid ppid command' # Put the command last to avoid truncation.
241  ]
242
243  def _GetProcessesAsStrings(self):
244    # Skip the header row and strip the trailing newline.
245    return subprocess.check_output(self._SHELL_COMMAND).strip().split('\n')[1:]
246
247  def _ParseProcessString(self, proc_string):
248    return _ParsePsProcessString(proc_string)
249
250
251class CpuTracingAgent(tracing_agent.TracingAgent):
252  _SNAPSHOT_INTERVAL_BY_OS = {
253    # Sampling via wmic on Windows is about twice as expensive as sampling via
254    # ps on Linux and Mac, so we halve the sampling frequency.
255    'win': 2.0,
256    'mac': 1.0,
257    'linux': 1.0
258  }
259
260  def __init__(self, platform_backend):
261    super(CpuTracingAgent, self).__init__(platform_backend)
262    self._snapshot_ongoing = False
263    self._snapshots = []
264    self._os_name = platform_backend.GetOSName()
265    if  self._os_name == 'win':
266      self._collector = WindowsProcessCollector()
267    elif self._os_name == 'mac':
268      self._collector = MacProcessCollector()
269    else:
270      self._collector = LinuxProcessCollector()
271
272  @classmethod
273  def IsSupported(cls, platform_backend):
274    os_name = platform_backend.GetOSName()
275    return (os_name in ['mac', 'linux', 'win'])
276
277  def StartAgentTracing(self, config, timeout):
278    assert not self._snapshot_ongoing, (
279           'Agent is already taking snapshots when tracing is started.')
280    if not config.enable_cpu_trace:
281      return False
282
283    self._collector.Init()
284    self._snapshot_ongoing = True
285    self._KeepTakingSnapshots()
286    return True
287
288  def _KeepTakingSnapshots(self):
289    """Take CPU snapshots every SNAPSHOT_FREQUENCY seconds."""
290    if not self._snapshot_ongoing:
291      return
292    # Assume CpuTracingAgent shares the same clock domain as telemetry
293    self._snapshots.append(
294        (self._collector.GetProcesses(), trace_time.Now()))
295    interval = self._SNAPSHOT_INTERVAL_BY_OS[self._os_name]
296    Timer(interval, self._KeepTakingSnapshots).start()
297
298  def StopAgentTracing(self):
299    assert self._snapshot_ongoing, (
300           'Agent is not taking snapshots when tracing is stopped.')
301    self._snapshot_ongoing = False
302
303  def CollectAgentTraceData(self, trace_data_builder, timeout=None):
304    assert not self._snapshot_ongoing, (
305           'Agent is still taking snapshots when data is collected.')
306    self._snapshot_ongoing = False
307    data = json.dumps(self._FormatSnapshotsData())
308    trace_data_builder.AddTraceFor(trace_data.CPU_TRACE_DATA, data)
309
310  def _FormatSnapshotsData(self):
311    """Format raw data into Object Event specified in Trace Format document."""
312    pid = os.getpid()
313    return [{
314      'name': 'CPUSnapshots',
315      'ph': 'O',
316      'id': '0x1000',
317      'local': True,
318      'ts': timestamp,
319      'pid': pid,
320      'tid':None,
321      'args': {
322        'snapshot':{
323          'processes': snapshot
324        }
325      }
326    } for snapshot, timestamp in self._snapshots]
327