procfs.py revision a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7
1#!/usr/bin/env python
2# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# A Python library to read and store procfs (/proc) information on Linux.
7#
8# Each information storage class in this file stores original data as original
9# as reasonablly possible. Translation is done when requested. It is to make it
10# always possible to probe the original data.
11
12
13import collections
14import logging
15import os
16import re
17import struct
18import sys
19
20
21class _NullHandler(logging.Handler):
22  def emit(self, record):
23    pass
24
25
26_LOGGER = logging.getLogger('procfs')
27_LOGGER.addHandler(_NullHandler())
28
29
30class ProcStat(object):
31  """Reads and stores information in /proc/pid/stat."""
32  _PATTERN = re.compile(r'^'
33                        '(?P<PID>-?[0-9]+) '
34                        '\((?P<COMM>.+)\) '
35                        '(?P<STATE>[RSDZTW]) '
36                        '(?P<PPID>-?[0-9]+) '
37                        '(?P<PGRP>-?[0-9]+) '
38                        '(?P<SESSION>-?[0-9]+) '
39                        '(?P<TTY_NR>-?[0-9]+) '
40                        '(?P<TPGID>-?[0-9]+) '
41                        '(?P<FLAGS>[0-9]+) '
42                        '(?P<MINFIT>[0-9]+) '
43                        '(?P<CMINFIT>[0-9]+) '
44                        '(?P<MAJFIT>[0-9]+) '
45                        '(?P<CMAJFIT>[0-9]+) '
46                        '(?P<UTIME>[0-9]+) '
47                        '(?P<STIME>[0-9]+) '
48                        '(?P<CUTIME>[0-9]+) '
49                        '(?P<CSTIME>[0-9]+) '
50                        '(?P<PRIORITY>[0-9]+) '
51                        '(?P<NICE>[0-9]+) '
52                        '(?P<NUM_THREADS>[0-9]+) '
53                        '(?P<ITREALVALUE>[0-9]+) '
54                        '(?P<STARTTIME>[0-9]+) '
55                        '(?P<VSIZE>[0-9]+) '
56                        '(?P<RSS>[0-9]+) '
57                        '(?P<RSSLIM>[0-9]+) '
58                        '(?P<STARTCODE>[0-9]+) '
59                        '(?P<ENDCODE>[0-9]+) '
60                        '(?P<STARTSTACK>[0-9]+) '
61                        '(?P<KSTKESP>[0-9]+) '
62                        '(?P<KSTKEIP>[0-9]+) '
63                        '(?P<SIGNAL>[0-9]+) '
64                        '(?P<BLOCKED>[0-9]+) '
65                        '(?P<SIGIGNORE>[0-9]+) '
66                        '(?P<SIGCATCH>[0-9]+) '
67                        '(?P<WCHAN>[0-9]+) '
68                        '(?P<NSWAP>[0-9]+) '
69                        '(?P<CNSWAP>[0-9]+) '
70                        '(?P<EXIT_SIGNAL>[0-9]+) '
71                        '(?P<PROCESSOR>[0-9]+) '
72                        '(?P<RT_PRIORITY>[0-9]+) '
73                        '(?P<POLICY>[0-9]+) '
74                        '(?P<DELAYACCT_BLKIO_TICKS>[0-9]+) '
75                        '(?P<GUEST_TIME>[0-9]+) '
76                        '(?P<CGUEST_TIME>[0-9]+)', re.IGNORECASE)
77
78  def __init__(self, raw, pid, vsize, rss):
79    self._raw = raw
80    self._pid = pid
81    self._vsize = vsize
82    self._rss = rss
83
84  @staticmethod
85  def load_file(stat_f):
86    raw = stat_f.readlines()
87    stat = ProcStat._PATTERN.match(raw[0])
88    return ProcStat(raw,
89                    stat.groupdict().get('PID'),
90                    stat.groupdict().get('VSIZE'),
91                    stat.groupdict().get('RSS'))
92
93  @staticmethod
94  def load(pid):
95    with open(os.path.join('/proc', str(pid), 'stat'), 'r') as stat_f:
96      return ProcStat.load_file(stat_f)
97
98  @property
99  def raw(self):
100    return self._raw
101
102  @property
103  def pid(self):
104    return int(self._pid)
105
106  @property
107  def vsize(self):
108    return int(self._vsize)
109
110  @property
111  def rss(self):
112    return int(self._rss)
113
114
115class ProcStatm(object):
116  """Reads and stores information in /proc/pid/statm."""
117  _PATTERN = re.compile(r'^'
118                        '(?P<SIZE>[0-9]+) '
119                        '(?P<RESIDENT>[0-9]+) '
120                        '(?P<SHARE>[0-9]+) '
121                        '(?P<TEXT>[0-9]+) '
122                        '(?P<LIB>[0-9]+) '
123                        '(?P<DATA>[0-9]+) '
124                        '(?P<DT>[0-9]+)', re.IGNORECASE)
125
126  def __init__(self, raw, size, resident, share, text, lib, data, dt):
127    self._raw = raw
128    self._size = size
129    self._resident = resident
130    self._share = share
131    self._text = text
132    self._lib = lib
133    self._data = data
134    self._dt = dt
135
136  @staticmethod
137  def load_file(statm_f):
138    raw = statm_f.readlines()
139    statm = ProcStatm._PATTERN.match(raw[0])
140    return ProcStatm(raw,
141                     statm.groupdict().get('SIZE'),
142                     statm.groupdict().get('RESIDENT'),
143                     statm.groupdict().get('SHARE'),
144                     statm.groupdict().get('TEXT'),
145                     statm.groupdict().get('LIB'),
146                     statm.groupdict().get('DATA'),
147                     statm.groupdict().get('DT'))
148
149  @staticmethod
150  def load(pid):
151    with open(os.path.join('/proc', str(pid), 'statm'), 'r') as statm_f:
152      return ProcStatm.load_file(statm_f)
153
154  @property
155  def raw(self):
156    return self._raw
157
158  @property
159  def size(self):
160    return int(self._size)
161
162  @property
163  def resident(self):
164    return int(self._resident)
165
166  @property
167  def share(self):
168    return int(self._share)
169
170  @property
171  def text(self):
172    return int(self._text)
173
174  @property
175  def lib(self):
176    return int(self._lib)
177
178  @property
179  def data(self):
180    return int(self._data)
181
182  @property
183  def dt(self):
184    return int(self._dt)
185
186
187class ProcStatus(object):
188  """Reads and stores information in /proc/pid/status."""
189  _PATTERN = re.compile(r'^(?P<NAME>[A-Za-z0-9_]+):\s+(?P<VALUE>.*)')
190
191  def __init__(self, raw, dct):
192    self._raw = raw
193    self._pid = dct.get('Pid')
194    self._name = dct.get('Name')
195    self._vm_peak = dct.get('VmPeak')
196    self._vm_size = dct.get('VmSize')
197    self._vm_lck = dct.get('VmLck')
198    self._vm_pin = dct.get('VmPin')
199    self._vm_hwm = dct.get('VmHWM')
200    self._vm_rss = dct.get('VmRSS')
201    self._vm_data = dct.get('VmData')
202    self._vm_stack = dct.get('VmStk')
203    self._vm_exe = dct.get('VmExe')
204    self._vm_lib = dct.get('VmLib')
205    self._vm_pte = dct.get('VmPTE')
206    self._vm_swap = dct.get('VmSwap')
207
208  @staticmethod
209  def load_file(status_f):
210    raw = status_f.readlines()
211    dct = {}
212    for line in raw:
213      status_match = ProcStatus._PATTERN.match(line)
214      if status_match:
215        match_dict = status_match.groupdict()
216        dct[match_dict['NAME']] = match_dict['VALUE']
217      else:
218        raise SyntaxError('Unknown /proc/pid/status format.')
219    return ProcStatus(raw, dct)
220
221  @staticmethod
222  def load(pid):
223    with open(os.path.join('/proc', str(pid), 'status'), 'r') as status_f:
224      return ProcStatus.load_file(status_f)
225
226  @property
227  def raw(self):
228    return self._raw
229
230  @property
231  def pid(self):
232    return int(self._pid)
233
234  @property
235  def vm_peak(self):
236    """Returns a high-water (peak) virtual memory size in kilo-bytes."""
237    if self._vm_peak.endswith('kB'):
238      return int(self._vm_peak.split()[0])
239    raise ValueError('VmPeak is not in kB.')
240
241  @property
242  def vm_size(self):
243    """Returns a virtual memory size in kilo-bytes."""
244    if self._vm_size.endswith('kB'):
245      return int(self._vm_size.split()[0])
246    raise ValueError('VmSize is not in kB.')
247
248  @property
249  def vm_hwm(self):
250    """Returns a high-water (peak) resident set size (RSS) in kilo-bytes."""
251    if self._vm_hwm.endswith('kB'):
252      return int(self._vm_hwm.split()[0])
253    raise ValueError('VmHWM is not in kB.')
254
255  @property
256  def vm_rss(self):
257    """Returns a resident set size (RSS) in kilo-bytes."""
258    if self._vm_rss.endswith('kB'):
259      return int(self._vm_rss.split()[0])
260    raise ValueError('VmRSS is not in kB.')
261
262
263class ProcMapsEntry(object):
264  """A class representing one line in /proc/pid/maps."""
265
266  def __init__(
267      self, begin, end, readable, writable, executable, private, offset,
268      major, minor, inode, name):
269    self.begin = begin
270    self.end = end
271    self.readable = readable
272    self.writable = writable
273    self.executable = executable
274    self.private = private
275    self.offset = offset
276    self.major = major
277    self.minor = minor
278    self.inode = inode
279    self.name = name
280
281  def as_dict(self):
282    return {
283        'begin': self.begin,
284        'end': self.end,
285        'readable': self.readable,
286        'writable': self.writable,
287        'executable': self.executable,
288        'private': self.private,
289        'offset': self.offset,
290        'major': self.major,
291        'minor': self.minor,
292        'inode': self.inode,
293        'name': self.name,
294    }
295
296
297class ProcMaps(object):
298  """Reads and stores information in /proc/pid/maps."""
299
300  MAPS_PATTERN = re.compile(
301      r'^([a-f0-9]+)-([a-f0-9]+)\s+(.)(.)(.)(.)\s+([a-f0-9]+)\s+(\S+):(\S+)\s+'
302      r'(\d+)\s*(.*)$', re.IGNORECASE)
303
304  def __init__(self):
305    self._sorted_indexes = []
306    self._dictionary = {}
307    self._sorted = True
308
309  def iter(self, condition):
310    if not self._sorted:
311      self._sorted_indexes.sort()
312      self._sorted = True
313    for index in self._sorted_indexes:
314      if not condition or condition(self._dictionary[index]):
315        yield self._dictionary[index]
316
317  def __iter__(self):
318    if not self._sorted:
319      self._sorted_indexes.sort()
320      self._sorted = True
321    for index in self._sorted_indexes:
322      yield self._dictionary[index]
323
324  @staticmethod
325  def load_file(maps_f):
326    table = ProcMaps()
327    for line in maps_f:
328      table.append_line(line)
329    return table
330
331  @staticmethod
332  def load(pid):
333    with open(os.path.join('/proc', str(pid), 'maps'), 'r') as maps_f:
334      return ProcMaps.load_file(maps_f)
335
336  def append_line(self, line):
337    entry = self.parse_line(line)
338    if entry:
339      self._append_entry(entry)
340    return entry
341
342  @staticmethod
343  def parse_line(line):
344    matched = ProcMaps.MAPS_PATTERN.match(line)
345    if matched:
346      return ProcMapsEntry(  # pylint: disable=W0212
347          int(matched.group(1), 16),  # begin
348          int(matched.group(2), 16),  # end
349          matched.group(3),           # readable
350          matched.group(4),           # writable
351          matched.group(5),           # executable
352          matched.group(6),           # private
353          int(matched.group(7), 16),  # offset
354          matched.group(8),           # major
355          matched.group(9),           # minor
356          int(matched.group(10), 10), # inode
357          matched.group(11)           # name
358          )
359    else:
360      return None
361
362  @staticmethod
363  def constants(entry):
364    return (entry.writable == '-' and entry.executable == '-' and re.match(
365        '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?',
366        entry.name))
367
368  @staticmethod
369  def executable(entry):
370    return (entry.executable == 'x' and re.match(
371        '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?',
372        entry.name))
373
374  @staticmethod
375  def executable_and_constants(entry):
376    return (((entry.writable == '-' and entry.executable == '-') or
377             entry.executable == 'x') and re.match(
378        '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?',
379        entry.name))
380
381  def _append_entry(self, entry):
382    if self._sorted_indexes and self._sorted_indexes[-1] > entry.begin:
383      self._sorted = False
384    self._sorted_indexes.append(entry.begin)
385    self._dictionary[entry.begin] = entry
386
387
388class ProcSmaps(object):
389  """Reads and stores information in /proc/pid/smaps."""
390  _SMAPS_PATTERN = re.compile(r'^(?P<NAME>[A-Za-z0-9_]+):\s+(?P<VALUE>.*)')
391
392  class VMA(object):
393    def __init__(self):
394      self._size = 0
395      self._rss = 0
396      self._pss = 0
397
398    def append(self, name, value):
399      dct = {
400        'Size': '_size',
401        'Rss': '_rss',
402        'Pss': '_pss',
403        'Referenced': '_referenced',
404        'Private_Clean': '_private_clean',
405        'Shared_Clean': '_shared_clean',
406        'KernelPageSize': '_kernel_page_size',
407        'MMUPageSize': '_mmu_page_size',
408        }
409      if name in dct:
410        self.__setattr__(dct[name], value)
411
412    @property
413    def size(self):
414      if self._size.endswith('kB'):
415        return int(self._size.split()[0])
416      return int(self._size)
417
418    @property
419    def rss(self):
420      if self._rss.endswith('kB'):
421        return int(self._rss.split()[0])
422      return int(self._rss)
423
424    @property
425    def pss(self):
426      if self._pss.endswith('kB'):
427        return int(self._pss.split()[0])
428      return int(self._pss)
429
430  def __init__(self, raw, total_dct, maps, vma_internals):
431    self._raw = raw
432    self._size = total_dct['Size']
433    self._rss = total_dct['Rss']
434    self._pss = total_dct['Pss']
435    self._referenced = total_dct['Referenced']
436    self._shared_clean = total_dct['Shared_Clean']
437    self._private_clean = total_dct['Private_Clean']
438    self._kernel_page_size = total_dct['KernelPageSize']
439    self._mmu_page_size = total_dct['MMUPageSize']
440    self._maps = maps
441    self._vma_internals = vma_internals
442
443  @staticmethod
444  def load(pid):
445    with open(os.path.join('/proc', str(pid), 'smaps'), 'r') as smaps_f:
446      raw = smaps_f.readlines()
447
448    vma = None
449    vma_internals = collections.OrderedDict()
450    total_dct = collections.defaultdict(int)
451    maps = ProcMaps()
452    for line in raw:
453      maps_match = ProcMaps.MAPS_PATTERN.match(line)
454      if maps_match:
455        vma = maps.append_line(line.strip())
456        vma_internals[vma] = ProcSmaps.VMA()
457      else:
458        smaps_match = ProcSmaps._SMAPS_PATTERN.match(line)
459        if smaps_match:
460          match_dict = smaps_match.groupdict()
461          vma_internals[vma].append(match_dict['NAME'], match_dict['VALUE'])
462          total_dct[match_dict['NAME']] += int(match_dict['VALUE'].split()[0])
463
464    return ProcSmaps(raw, total_dct, maps, vma_internals)
465
466  @property
467  def size(self):
468    return self._size
469
470  @property
471  def rss(self):
472    return self._rss
473
474  @property
475  def referenced(self):
476    return self._referenced
477
478  @property
479  def pss(self):
480    return self._pss
481
482  @property
483  def private_clean(self):
484    return self._private_clean
485
486  @property
487  def shared_clean(self):
488    return self._shared_clean
489
490  @property
491  def kernel_page_size(self):
492    return self._kernel_page_size
493
494  @property
495  def mmu_page_size(self):
496    return self._mmu_page_size
497
498  @property
499  def vma_internals(self):
500    return self._vma_internals
501
502
503class ProcPagemap(object):
504  """Reads and stores partial information in /proc/pid/pagemap.
505
506  It picks up virtual addresses to read based on ProcMaps (/proc/pid/maps).
507  See https://www.kernel.org/doc/Documentation/vm/pagemap.txt for details.
508  """
509  _BYTES_PER_PAGEMAP_VALUE = 8
510  _BYTES_PER_OS_PAGE = 4096
511  _VIRTUAL_TO_PAGEMAP_OFFSET = _BYTES_PER_OS_PAGE / _BYTES_PER_PAGEMAP_VALUE
512
513  _MASK_PRESENT = 1 << 63
514  _MASK_SWAPPED = 1 << 62
515  _MASK_FILEPAGE_OR_SHAREDANON = 1 << 61
516  _MASK_SOFTDIRTY = 1 << 55
517  _MASK_PFN = (1 << 55) - 1
518
519  class VMA(object):
520    def __init__(self, vsize, present, swapped, pageframes):
521      self._vsize = vsize
522      self._present = present
523      self._swapped = swapped
524      self._pageframes = pageframes
525
526    @property
527    def vsize(self):
528      return int(self._vsize)
529
530    @property
531    def present(self):
532      return int(self._present)
533
534    @property
535    def swapped(self):
536      return int(self._swapped)
537
538    @property
539    def pageframes(self):
540      return self._pageframes
541
542  def __init__(self, vsize, present, swapped, vma_internals, in_process_dup):
543    self._vsize = vsize
544    self._present = present
545    self._swapped = swapped
546    self._vma_internals = vma_internals
547    self._in_process_dup = in_process_dup
548
549  @staticmethod
550  def load(pid, maps):
551    total_present = 0
552    total_swapped = 0
553    total_vsize = 0
554    in_process_dup = 0
555    vma_internals = collections.OrderedDict()
556    process_pageframe_set = set()
557
558    pagemap_fd = os.open(
559        os.path.join('/proc', str(pid), 'pagemap'), os.O_RDONLY)
560    for vma in maps:
561      present = 0
562      swapped = 0
563      vsize = 0
564      pageframes = collections.defaultdict(int)
565      begin_offset = ProcPagemap._offset(vma.begin)
566      chunk_size = ProcPagemap._offset(vma.end) - begin_offset
567      os.lseek(pagemap_fd, begin_offset, os.SEEK_SET)
568      buf = os.read(pagemap_fd, chunk_size)
569      if len(buf) < chunk_size:
570        _LOGGER.warn('Failed to read pagemap at 0x%x in %d.' % (vma.begin, pid))
571      pagemap_values = struct.unpack(
572          '=%dQ' % (len(buf) / ProcPagemap._BYTES_PER_PAGEMAP_VALUE), buf)
573      for pagemap_value in pagemap_values:
574        vsize += ProcPagemap._BYTES_PER_OS_PAGE
575        if pagemap_value & ProcPagemap._MASK_PRESENT:
576          if (pagemap_value & ProcPagemap._MASK_PFN) in process_pageframe_set:
577            in_process_dup += ProcPagemap._BYTES_PER_OS_PAGE
578          else:
579            process_pageframe_set.add(pagemap_value & ProcPagemap._MASK_PFN)
580          if (pagemap_value & ProcPagemap._MASK_PFN) not in pageframes:
581            present += ProcPagemap._BYTES_PER_OS_PAGE
582          pageframes[pagemap_value & ProcPagemap._MASK_PFN] += 1
583        if pagemap_value & ProcPagemap._MASK_SWAPPED:
584          swapped += ProcPagemap._BYTES_PER_OS_PAGE
585      vma_internals[vma] = ProcPagemap.VMA(vsize, present, swapped, pageframes)
586      total_present += present
587      total_swapped += swapped
588      total_vsize += vsize
589    os.close(pagemap_fd)
590
591    return ProcPagemap(total_vsize, total_present, total_swapped,
592                       vma_internals, in_process_dup)
593
594  @staticmethod
595  def _offset(virtual_address):
596    return virtual_address / ProcPagemap._VIRTUAL_TO_PAGEMAP_OFFSET
597
598  @property
599  def vsize(self):
600    return int(self._vsize)
601
602  @property
603  def present(self):
604    return int(self._present)
605
606  @property
607  def swapped(self):
608    return int(self._swapped)
609
610  @property
611  def vma_internals(self):
612    return self._vma_internals
613
614
615class _ProcessMemory(object):
616  """Aggregates process memory information from /proc for manual testing."""
617  def __init__(self, pid):
618    self._pid = pid
619    self._maps = None
620    self._pagemap = None
621    self._stat = None
622    self._status = None
623    self._statm = None
624    self._smaps = []
625
626  def _read(self, proc_file):
627    lines = []
628    with open(os.path.join('/proc', str(self._pid), proc_file), 'r') as proc_f:
629      lines = proc_f.readlines()
630    return lines
631
632  def read_all(self):
633    self.read_stat()
634    self.read_statm()
635    self.read_status()
636    self.read_smaps()
637    self.read_maps()
638    self.read_pagemap(self._maps)
639
640  def read_maps(self):
641    self._maps = ProcMaps.load(self._pid)
642
643  def read_pagemap(self, maps):
644    self._pagemap = ProcPagemap.load(self._pid, maps)
645
646  def read_smaps(self):
647    self._smaps = ProcSmaps.load(self._pid)
648
649  def read_stat(self):
650    self._stat = ProcStat.load(self._pid)
651
652  def read_statm(self):
653    self._statm = ProcStatm.load(self._pid)
654
655  def read_status(self):
656    self._status = ProcStatus.load(self._pid)
657
658  @property
659  def pid(self):
660    return self._pid
661
662  @property
663  def maps(self):
664    return self._maps
665
666  @property
667  def pagemap(self):
668    return self._pagemap
669
670  @property
671  def smaps(self):
672    return self._smaps
673
674  @property
675  def stat(self):
676    return self._stat
677
678  @property
679  def statm(self):
680    return self._statm
681
682  @property
683  def status(self):
684    return self._status
685
686
687def main(argv):
688  """The main function for manual testing."""
689  _LOGGER.setLevel(logging.WARNING)
690  handler = logging.StreamHandler()
691  handler.setLevel(logging.WARNING)
692  handler.setFormatter(logging.Formatter(
693      '%(asctime)s:%(name)s:%(levelname)s:%(message)s'))
694  _LOGGER.addHandler(handler)
695
696  pids = []
697  for arg in argv[1:]:
698    try:
699      pid = int(arg)
700    except ValueError:
701      raise SyntaxError("%s is not an integer." % arg)
702    else:
703      pids.append(pid)
704
705  procs = {}
706  for pid in pids:
707    procs[pid] = _ProcessMemory(pid)
708    procs[pid].read_all()
709
710    print '=== PID: %d ===' % pid
711
712    print '   stat: %d' % procs[pid].stat.vsize
713    print '  statm: %d' % (procs[pid].statm.size * 4096)
714    print ' status: %d (Peak:%d)' % (procs[pid].status.vm_size * 1024,
715                                     procs[pid].status.vm_peak * 1024)
716    print '  smaps: %d' % (procs[pid].smaps.size * 1024)
717    print 'pagemap: %d' % procs[pid].pagemap.vsize
718    print '   stat: %d' % (procs[pid].stat.rss * 4096)
719    print '  statm: %d' % (procs[pid].statm.resident * 4096)
720    print ' status: %d (Peak:%d)' % (procs[pid].status.vm_rss * 1024,
721                                     procs[pid].status.vm_hwm * 1024)
722    print '  smaps: %d' % (procs[pid].smaps.rss * 1024)
723    print 'pagemap: %d' % procs[pid].pagemap.present
724
725  return 0
726
727
728if __name__ == '__main__':
729  sys.exit(main(sys.argv))
730