1# Copyright 2016 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import json 6import os 7import re 8import subprocess 9from threading import Timer 10 11from py_trace_event import trace_time 12from telemetry.internal.platform import tracing_agent 13from tracing.trace_data import trace_data 14 15 16def _ParsePsProcessString(line): 17 """Parses a process line from the output of `ps`. 18 19 Example of `ps` command output: 20 '3.4 8.0 31887 31447 com.app.Webkit' 21 """ 22 token_list = line.strip().split() 23 if len(token_list) < 5: 24 raise ValueError('Line has too few tokens: %s.' % token_list) 25 26 return { 27 'pCpu': float(token_list[0]), 28 'pMem': float(token_list[1]), 29 'pid': int(token_list[2]), 30 'ppid': int(token_list[3]), 31 'name': ' '.join(token_list[4:]) 32 } 33 34 35class ProcessCollector(object): 36 def _GetProcessesAsStrings(self): 37 """Returns a list of strings, each of which contains info about a 38 process. 39 """ 40 raise NotImplementedError 41 42 # pylint: disable=unused-argument 43 def _ParseProcessString(self, proc_string): 44 """Parses an individual process string returned by _GetProcessesAsStrings(). 45 46 Returns: 47 A dictionary containing keys of 'pid' (an integer process ID), 'ppid' (an 48 integer parent process ID), 'name' (a string for the process name), 'pCpu' 49 (a float for the percent CPU load incurred by the process), and 'pMem' (a 50 float for the percent memory load caused by the process). 51 """ 52 raise NotImplementedError 53 54 def Init(self): 55 """Performs any required initialization before starting tracing.""" 56 pass 57 58 def GetProcesses(self): 59 """Fetches the top processes returned by top command. 60 61 Returns: 62 A list of dictionaries, each containing 'pid' (an integer process ID), 63 'ppid' (an integer parent process ID), 'name (a string for the process 64 name), pCpu' (a float for the percent CPU load incurred by the process), 65 and 'pMem' (a float for the percent memory load caused by the process). 66 """ 67 proc_strings = self._GetProcessesAsStrings() 68 return [ 69 self._ParseProcessString(proc_string) for proc_string in proc_strings 70 ] 71 72 73class WindowsProcessCollector(ProcessCollector): 74 """Class for collecting information about processes on Windows. 75 76 Example of Windows command output: 77 '3644 1724 chrome#1 8 84497' 78 '3644 832 chrome#2 4 34872' 79 """ 80 _GET_PERF_DATA_SHELL_COMMAND = [ 81 'wmic', 82 'path', # Retrieve a WMI object from the following path. 83 'Win32_PerfFormattedData_PerfProc_Process', # Contains process perf data. 84 'get', 85 'CreatingProcessID,IDProcess,Name,PercentProcessorTime,WorkingSet' 86 ] 87 88 _GET_COMMANDS_SHELL_COMMAND = [ 89 'wmic', 90 'Process', 91 'get', 92 'CommandLine,ProcessID', 93 # Formatting the result as a CSV means that if no CommandLine is available, 94 # we can at least tell by the lack of data between commas. 95 '/format:csv' 96 ] 97 98 _GET_PHYSICAL_MEMORY_BYTES_SHELL_COMMAND = [ 99 'wmic', 100 'ComputerSystem', 101 'get', 102 'TotalPhysicalMemory' 103 ] 104 105 def __init__(self): 106 self._physicalMemoryBytes = None 107 108 def Init(self): 109 if not self._physicalMemoryBytes: 110 self._physicalMemoryBytes = self._GetPhysicalMemoryBytes() 111 112 # The command to get the per-process perf data takes significantly longer 113 # the first time that it's run (~10s, compared to ~60ms for subsequent 114 # runs). In order to avoid having this affect tracing, we run it once ahead 115 # of time. 116 self._GetProcessesAsStrings() 117 118 def GetProcesses(self): 119 processes = super(WindowsProcessCollector, self).GetProcesses() 120 121 # On Windows, the absolute minimal name of the process is given 122 # (e.g. "python" for Telemetry). In order to make this more useful, we check 123 # if a more descriptive command is available for each PID and use that 124 # command if it is. 125 pid_to_command_dict = self._GetPidToCommandDict() 126 for process in processes: 127 if process['pid'] in pid_to_command_dict: 128 process['name'] = pid_to_command_dict[process['pid']] 129 130 return processes 131 132 def _GetPhysicalMemoryBytes(self): 133 """Returns the number of bytes of physical memory on the computer.""" 134 raw_output = subprocess.check_output( 135 self._GET_PHYSICAL_MEMORY_BYTES_SHELL_COMMAND) 136 # The bytes of physical memory is on the second row (after the header row). 137 return int(raw_output.strip().split('\n')[1]) 138 139 def _GetProcessesAsStrings(self): 140 # Skip the header and total rows and strip the trailing newline. 141 return subprocess.check_output( 142 self._GET_PERF_DATA_SHELL_COMMAND).strip().split('\n')[2:] 143 144 def _ParseProcessString(self, proc_string): 145 assert self._physicalMemoryBytes, 'Must call Init() before using collector' 146 147 token_list = proc_string.strip().split() 148 if len(token_list) < 5: 149 raise ValueError('Line has too few tokens: %s.' % token_list) 150 151 # Process names are given in the form: 152 # 153 # windowsUpdate 154 # Windows Explorer 155 # chrome#1 156 # chrome#2 157 # 158 # In order to match other platforms, where multiple processes can have the 159 # same name and can be easily grouped based on that name, we strip any 160 # pound sign and number. 161 name = ' '.join(token_list[2:-2]) 162 name = re.sub(r'#[0-9]+$', '', name) 163 # The working set size (roughly equivalent to the resident set size on Unix) 164 # is given in bytes. In order to convert this to percent of physical memory 165 # occupied by the process, we divide by the amount of total physical memory 166 # on the machine. 167 percent_memory = float(token_list[-1]) / self._physicalMemoryBytes * 100 168 169 return { 170 'ppid': int(token_list[0]), 171 'pid': int(token_list[1]), 172 'name': name, 173 'pCpu': float(token_list[-2]), 174 'pMem': percent_memory 175 } 176 177 def _GetPidToCommandDict(self): 178 """Returns a dictionary from the PID of a process to the full command used 179 to launch that process. If no full command is available for a given process, 180 that process is omitted from the returned dictionary. 181 """ 182 # Skip the header row and strip the trailing newline. 183 process_strings = subprocess.check_output( 184 self._GET_COMMANDS_SHELL_COMMAND).strip().split('\n')[1:] 185 command_by_pid = {} 186 for process_string in process_strings: 187 process_string = process_string.strip() 188 command = self._ParseCommandString(process_string) 189 190 # Only return additional information about the command if it's available. 191 if command['command']: 192 command_by_pid[command['pid']] = command['command'] 193 194 return command_by_pid 195 196 def _ParseCommandString(self, command_string): 197 groups = re.match(r'^([^,]+),(.*),([0-9]+)$', command_string).groups() 198 return { 199 # Ignore groups[0]: it's the hostname. 200 'pid': int(groups[2]), 201 'command': groups[1] 202 } 203 204 205class LinuxProcessCollector(ProcessCollector): 206 """Class for collecting information about processes on Linux. 207 208 Example of Linux command output: 209 '3.4 8.0 31887 31447 com.app.Webkit' 210 """ 211 _SHELL_COMMAND = [ 212 'ps', 213 '-a', # Include processes that aren't session leaders. 214 '-x', # List all processes, even those not owned by the user. 215 '-o', # Show the output in the specified format. 216 'pcpu,pmem,pid,ppid,cmd' 217 ] 218 219 def _GetProcessesAsStrings(self): 220 # Skip the header row and strip the trailing newline. 221 return subprocess.check_output(self._SHELL_COMMAND).strip().split('\n')[1:] 222 223 def _ParseProcessString(self, proc_string): 224 return _ParsePsProcessString(proc_string) 225 226 227class MacProcessCollector(ProcessCollector): 228 """Class for collecting information about processes on Mac. 229 230 Example of Mac command output: 231 '3.4 8.0 31887 31447 com.app.Webkit' 232 """ 233 234 _SHELL_COMMAND = [ 235 'ps', 236 '-a', # Include all users' processes. 237 '-ww', # Don't limit the length of each line. 238 '-x', # Include processes that aren't associated with a terminal. 239 '-o', # Show the output in the specified format. 240 '%cpu %mem pid ppid command' # Put the command last to avoid truncation. 241 ] 242 243 def _GetProcessesAsStrings(self): 244 # Skip the header row and strip the trailing newline. 245 return subprocess.check_output(self._SHELL_COMMAND).strip().split('\n')[1:] 246 247 def _ParseProcessString(self, proc_string): 248 return _ParsePsProcessString(proc_string) 249 250 251class CpuTracingAgent(tracing_agent.TracingAgent): 252 _SNAPSHOT_INTERVAL_BY_OS = { 253 # Sampling via wmic on Windows is about twice as expensive as sampling via 254 # ps on Linux and Mac, so we halve the sampling frequency. 255 'win': 2.0, 256 'mac': 1.0, 257 'linux': 1.0 258 } 259 260 def __init__(self, platform_backend): 261 super(CpuTracingAgent, self).__init__(platform_backend) 262 self._snapshot_ongoing = False 263 self._snapshots = [] 264 self._os_name = platform_backend.GetOSName() 265 if self._os_name == 'win': 266 self._collector = WindowsProcessCollector() 267 elif self._os_name == 'mac': 268 self._collector = MacProcessCollector() 269 else: 270 self._collector = LinuxProcessCollector() 271 272 @classmethod 273 def IsSupported(cls, platform_backend): 274 os_name = platform_backend.GetOSName() 275 return (os_name in ['mac', 'linux', 'win']) 276 277 def StartAgentTracing(self, config, timeout): 278 assert not self._snapshot_ongoing, ( 279 'Agent is already taking snapshots when tracing is started.') 280 if not config.enable_cpu_trace: 281 return False 282 283 self._collector.Init() 284 self._snapshot_ongoing = True 285 self._KeepTakingSnapshots() 286 return True 287 288 def _KeepTakingSnapshots(self): 289 """Take CPU snapshots every SNAPSHOT_FREQUENCY seconds.""" 290 if not self._snapshot_ongoing: 291 return 292 # Assume CpuTracingAgent shares the same clock domain as telemetry 293 self._snapshots.append( 294 (self._collector.GetProcesses(), trace_time.Now())) 295 interval = self._SNAPSHOT_INTERVAL_BY_OS[self._os_name] 296 Timer(interval, self._KeepTakingSnapshots).start() 297 298 def StopAgentTracing(self): 299 assert self._snapshot_ongoing, ( 300 'Agent is not taking snapshots when tracing is stopped.') 301 self._snapshot_ongoing = False 302 303 def CollectAgentTraceData(self, trace_data_builder, timeout=None): 304 assert not self._snapshot_ongoing, ( 305 'Agent is still taking snapshots when data is collected.') 306 self._snapshot_ongoing = False 307 data = json.dumps(self._FormatSnapshotsData()) 308 trace_data_builder.AddTraceFor(trace_data.CPU_TRACE_DATA, data) 309 310 def _FormatSnapshotsData(self): 311 """Format raw data into Object Event specified in Trace Format document.""" 312 pid = os.getpid() 313 return [{ 314 'name': 'CPUSnapshots', 315 'ph': 'O', 316 'id': '0x1000', 317 'local': True, 318 'ts': timestamp, 319 'pid': pid, 320 'tid':None, 321 'args': { 322 'snapshot':{ 323 'processes': snapshot 324 } 325 } 326 } for snapshot, timestamp in self._snapshots] 327