1#!/usr/bin/env python
2# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# A Python library to read and store procfs (/proc) information on Linux.
7#
8# Each information storage class in this file stores original data as original
9# as reasonablly possible. Translation is done when requested. It is to make it
10# always possible to probe the original data.
11
12
13import collections
14import logging
15import os
16import re
17import struct
18import sys
19
20
21class _NullHandler(logging.Handler):
22  def emit(self, record):
23    pass
24
25
26_LOGGER = logging.getLogger('procfs')
27_LOGGER.addHandler(_NullHandler())
28
29
30class ProcStat(object):
31  """Reads and stores information in /proc/pid/stat."""
32  _PATTERN = re.compile(r'^'
33                        '(?P<PID>-?[0-9]+) '
34                        '\((?P<COMM>.+)\) '
35                        '(?P<STATE>[RSDZTW]) '
36                        '(?P<PPID>-?[0-9]+) '
37                        '(?P<PGRP>-?[0-9]+) '
38                        '(?P<SESSION>-?[0-9]+) '
39                        '(?P<TTY_NR>-?[0-9]+) '
40                        '(?P<TPGID>-?[0-9]+) '
41                        '(?P<FLAGS>[0-9]+) '
42                        '(?P<MINFIT>[0-9]+) '
43                        '(?P<CMINFIT>[0-9]+) '
44                        '(?P<MAJFIT>[0-9]+) '
45                        '(?P<CMAJFIT>[0-9]+) '
46                        '(?P<UTIME>[0-9]+) '
47                        '(?P<STIME>[0-9]+) '
48                        '(?P<CUTIME>[0-9]+) '
49                        '(?P<CSTIME>[0-9]+) '
50                        '(?P<PRIORITY>[0-9]+) '
51                        '(?P<NICE>[0-9]+) '
52                        '(?P<NUM_THREADS>[0-9]+) '
53                        '(?P<ITREALVALUE>[0-9]+) '
54                        '(?P<STARTTIME>[0-9]+) '
55                        '(?P<VSIZE>[0-9]+) '
56                        '(?P<RSS>[0-9]+) '
57                        '(?P<RSSLIM>[0-9]+) '
58                        '(?P<STARTCODE>[0-9]+) '
59                        '(?P<ENDCODE>[0-9]+) '
60                        '(?P<STARTSTACK>[0-9]+) '
61                        '(?P<KSTKESP>[0-9]+) '
62                        '(?P<KSTKEIP>[0-9]+) '
63                        '(?P<SIGNAL>[0-9]+) '
64                        '(?P<BLOCKED>[0-9]+) '
65                        '(?P<SIGIGNORE>[0-9]+) '
66                        '(?P<SIGCATCH>[0-9]+) '
67                        '(?P<WCHAN>[0-9]+) '
68                        '(?P<NSWAP>[0-9]+) '
69                        '(?P<CNSWAP>[0-9]+) '
70                        '(?P<EXIT_SIGNAL>[0-9]+) '
71                        '(?P<PROCESSOR>[0-9]+) '
72                        '(?P<RT_PRIORITY>[0-9]+) '
73                        '(?P<POLICY>[0-9]+) '
74                        '(?P<DELAYACCT_BLKIO_TICKS>[0-9]+) '
75                        '(?P<GUEST_TIME>[0-9]+) '
76                        '(?P<CGUEST_TIME>[0-9]+)', re.IGNORECASE)
77
78  def __init__(self, raw, pid, vsize, rss):
79    self._raw = raw
80    self._pid = pid
81    self._vsize = vsize
82    self._rss = rss
83
84  @staticmethod
85  def load_file(stat_f):
86    raw = stat_f.readlines()
87    stat = ProcStat._PATTERN.match(raw[0])
88    return ProcStat(raw,
89                    stat.groupdict().get('PID'),
90                    stat.groupdict().get('VSIZE'),
91                    stat.groupdict().get('RSS'))
92
93  @staticmethod
94  def load(pid):
95    try:
96      with open(os.path.join('/proc', str(pid), 'stat'), 'r') as stat_f:
97        return ProcStat.load_file(stat_f)
98    except IOError:
99      return None
100
101  @property
102  def raw(self):
103    return self._raw
104
105  @property
106  def pid(self):
107    return int(self._pid)
108
109  @property
110  def vsize(self):
111    return int(self._vsize)
112
113  @property
114  def rss(self):
115    return int(self._rss)
116
117
118class ProcStatm(object):
119  """Reads and stores information in /proc/pid/statm."""
120  _PATTERN = re.compile(r'^'
121                        '(?P<SIZE>[0-9]+) '
122                        '(?P<RESIDENT>[0-9]+) '
123                        '(?P<SHARE>[0-9]+) '
124                        '(?P<TEXT>[0-9]+) '
125                        '(?P<LIB>[0-9]+) '
126                        '(?P<DATA>[0-9]+) '
127                        '(?P<DT>[0-9]+)', re.IGNORECASE)
128
129  def __init__(self, raw, size, resident, share, text, lib, data, dt):
130    self._raw = raw
131    self._size = size
132    self._resident = resident
133    self._share = share
134    self._text = text
135    self._lib = lib
136    self._data = data
137    self._dt = dt
138
139  @staticmethod
140  def load_file(statm_f):
141    try:
142      raw = statm_f.readlines()
143    except (IOError, OSError):
144      return None
145    statm = ProcStatm._PATTERN.match(raw[0])
146    return ProcStatm(raw,
147                     statm.groupdict().get('SIZE'),
148                     statm.groupdict().get('RESIDENT'),
149                     statm.groupdict().get('SHARE'),
150                     statm.groupdict().get('TEXT'),
151                     statm.groupdict().get('LIB'),
152                     statm.groupdict().get('DATA'),
153                     statm.groupdict().get('DT'))
154
155  @staticmethod
156  def load(pid):
157    try:
158      with open(os.path.join('/proc', str(pid), 'statm'), 'r') as statm_f:
159        return ProcStatm.load_file(statm_f)
160    except (IOError, OSError):
161      return None
162
163  @property
164  def raw(self):
165    return self._raw
166
167  @property
168  def size(self):
169    return int(self._size)
170
171  @property
172  def resident(self):
173    return int(self._resident)
174
175  @property
176  def share(self):
177    return int(self._share)
178
179  @property
180  def text(self):
181    return int(self._text)
182
183  @property
184  def lib(self):
185    return int(self._lib)
186
187  @property
188  def data(self):
189    return int(self._data)
190
191  @property
192  def dt(self):
193    return int(self._dt)
194
195
196class ProcStatus(object):
197  """Reads and stores information in /proc/pid/status."""
198  _PATTERN = re.compile(r'^(?P<NAME>[A-Za-z0-9_]+):\s+(?P<VALUE>.*)')
199
200  def __init__(self, raw, dct):
201    self._raw = raw
202    self._pid = dct.get('Pid')
203    self._name = dct.get('Name')
204    self._vm_peak = dct.get('VmPeak')
205    self._vm_size = dct.get('VmSize')
206    self._vm_lck = dct.get('VmLck')
207    self._vm_pin = dct.get('VmPin')
208    self._vm_hwm = dct.get('VmHWM')
209    self._vm_rss = dct.get('VmRSS')
210    self._vm_data = dct.get('VmData')
211    self._vm_stack = dct.get('VmStk')
212    self._vm_exe = dct.get('VmExe')
213    self._vm_lib = dct.get('VmLib')
214    self._vm_pte = dct.get('VmPTE')
215    self._vm_swap = dct.get('VmSwap')
216
217  @staticmethod
218  def load_file(status_f):
219    raw = status_f.readlines()
220    dct = {}
221    for line in raw:
222      status_match = ProcStatus._PATTERN.match(line)
223      if status_match:
224        match_dict = status_match.groupdict()
225        dct[match_dict['NAME']] = match_dict['VALUE']
226      else:
227        raise SyntaxError('Unknown /proc/pid/status format.')
228    return ProcStatus(raw, dct)
229
230  @staticmethod
231  def load(pid):
232    with open(os.path.join('/proc', str(pid), 'status'), 'r') as status_f:
233      return ProcStatus.load_file(status_f)
234
235  @property
236  def raw(self):
237    return self._raw
238
239  @property
240  def pid(self):
241    return int(self._pid)
242
243  @property
244  def vm_peak(self):
245    """Returns a high-water (peak) virtual memory size in kilo-bytes."""
246    if self._vm_peak.endswith('kB'):
247      return int(self._vm_peak.split()[0])
248    raise ValueError('VmPeak is not in kB.')
249
250  @property
251  def vm_size(self):
252    """Returns a virtual memory size in kilo-bytes."""
253    if self._vm_size.endswith('kB'):
254      return int(self._vm_size.split()[0])
255    raise ValueError('VmSize is not in kB.')
256
257  @property
258  def vm_hwm(self):
259    """Returns a high-water (peak) resident set size (RSS) in kilo-bytes."""
260    if self._vm_hwm.endswith('kB'):
261      return int(self._vm_hwm.split()[0])
262    raise ValueError('VmHWM is not in kB.')
263
264  @property
265  def vm_rss(self):
266    """Returns a resident set size (RSS) in kilo-bytes."""
267    if self._vm_rss.endswith('kB'):
268      return int(self._vm_rss.split()[0])
269    raise ValueError('VmRSS is not in kB.')
270
271
272class ProcMapsEntry(object):
273  """A class representing one line in /proc/pid/maps."""
274
275  def __init__(
276      self, begin, end, readable, writable, executable, private, offset,
277      major, minor, inode, name):
278    self.begin = begin
279    self.end = end
280    self.readable = readable
281    self.writable = writable
282    self.executable = executable
283    self.private = private
284    self.offset = offset
285    self.major = major
286    self.minor = minor
287    self.inode = inode
288    self.name = name
289
290  def as_dict(self):
291    return {
292        'begin': self.begin,
293        'end': self.end,
294        'readable': self.readable,
295        'writable': self.writable,
296        'executable': self.executable,
297        'private': self.private,
298        'offset': self.offset,
299        'major': self.major,
300        'minor': self.minor,
301        'inode': self.inode,
302        'name': self.name,
303    }
304
305
306class ProcMaps(object):
307  """Reads and stores information in /proc/pid/maps."""
308
309  MAPS_PATTERN = re.compile(
310      r'^([a-f0-9]+)-([a-f0-9]+)\s+(.)(.)(.)(.)\s+([a-f0-9]+)\s+(\S+):(\S+)\s+'
311      r'(\d+)\s*(.*)$', re.IGNORECASE)
312
313  EXECUTABLE_PATTERN = re.compile(
314      r'\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?')
315
316  def __init__(self):
317    self._sorted_indexes = []
318    self._dictionary = {}
319    self._sorted = True
320
321  def iter(self, condition):
322    if not self._sorted:
323      self._sorted_indexes.sort()
324      self._sorted = True
325    for index in self._sorted_indexes:
326      if not condition or condition(self._dictionary[index]):
327        yield self._dictionary[index]
328
329  def __iter__(self):
330    if not self._sorted:
331      self._sorted_indexes.sort()
332      self._sorted = True
333    for index in self._sorted_indexes:
334      yield self._dictionary[index]
335
336  @staticmethod
337  def load_file(maps_f):
338    table = ProcMaps()
339    for line in maps_f:
340      table.append_line(line)
341    return table
342
343  @staticmethod
344  def load(pid):
345    try:
346      with open(os.path.join('/proc', str(pid), 'maps'), 'r') as maps_f:
347        return ProcMaps.load_file(maps_f)
348    except (IOError, OSError):
349      return None
350
351  def append_line(self, line):
352    entry = self.parse_line(line)
353    if entry:
354      self._append_entry(entry)
355    return entry
356
357  @staticmethod
358  def parse_line(line):
359    matched = ProcMaps.MAPS_PATTERN.match(line)
360    if matched:
361      return ProcMapsEntry(  # pylint: disable=W0212
362          int(matched.group(1), 16),  # begin
363          int(matched.group(2), 16),  # end
364          matched.group(3),           # readable
365          matched.group(4),           # writable
366          matched.group(5),           # executable
367          matched.group(6),           # private
368          int(matched.group(7), 16),  # offset
369          matched.group(8),           # major
370          matched.group(9),           # minor
371          int(matched.group(10), 10), # inode
372          matched.group(11)           # name
373          )
374    else:
375      return None
376
377  @staticmethod
378  def constants(entry):
379    return entry.writable == '-' and entry.executable == '-'
380
381  @staticmethod
382  def executable(entry):
383    return entry.executable == 'x'
384
385  @staticmethod
386  def executable_and_constants(entry):
387    return ((entry.writable == '-' and entry.executable == '-') or
388            entry.executable == 'x')
389
390  def _append_entry(self, entry):
391    if self._sorted_indexes and self._sorted_indexes[-1] > entry.begin:
392      self._sorted = False
393    self._sorted_indexes.append(entry.begin)
394    self._dictionary[entry.begin] = entry
395
396
397class ProcSmaps(object):
398  """Reads and stores information in /proc/pid/smaps."""
399  _SMAPS_PATTERN = re.compile(r'^(?P<NAME>[A-Za-z0-9_]+):\s+(?P<VALUE>.*)')
400
401  class VMA(object):
402    def __init__(self):
403      self._size = 0
404      self._rss = 0
405      self._pss = 0
406
407    def append(self, name, value):
408      dct = {
409        'Size': '_size',
410        'Rss': '_rss',
411        'Pss': '_pss',
412        'Referenced': '_referenced',
413        'Private_Clean': '_private_clean',
414        'Shared_Clean': '_shared_clean',
415        'KernelPageSize': '_kernel_page_size',
416        'MMUPageSize': '_mmu_page_size',
417        }
418      if name in dct:
419        self.__setattr__(dct[name], value)
420
421    @property
422    def size(self):
423      if self._size.endswith('kB'):
424        return int(self._size.split()[0])
425      return int(self._size)
426
427    @property
428    def rss(self):
429      if self._rss.endswith('kB'):
430        return int(self._rss.split()[0])
431      return int(self._rss)
432
433    @property
434    def pss(self):
435      if self._pss.endswith('kB'):
436        return int(self._pss.split()[0])
437      return int(self._pss)
438
439  def __init__(self, raw, total_dct, maps, vma_internals):
440    self._raw = raw
441    self._size = total_dct['Size']
442    self._rss = total_dct['Rss']
443    self._pss = total_dct['Pss']
444    self._referenced = total_dct['Referenced']
445    self._shared_clean = total_dct['Shared_Clean']
446    self._private_clean = total_dct['Private_Clean']
447    self._kernel_page_size = total_dct['KernelPageSize']
448    self._mmu_page_size = total_dct['MMUPageSize']
449    self._maps = maps
450    self._vma_internals = vma_internals
451
452  @staticmethod
453  def load(pid):
454    with open(os.path.join('/proc', str(pid), 'smaps'), 'r') as smaps_f:
455      raw = smaps_f.readlines()
456
457    vma = None
458    vma_internals = collections.OrderedDict()
459    total_dct = collections.defaultdict(int)
460    maps = ProcMaps()
461    for line in raw:
462      maps_match = ProcMaps.MAPS_PATTERN.match(line)
463      if maps_match:
464        vma = maps.append_line(line.strip())
465        vma_internals[vma] = ProcSmaps.VMA()
466      else:
467        smaps_match = ProcSmaps._SMAPS_PATTERN.match(line)
468        if smaps_match:
469          match_dict = smaps_match.groupdict()
470          vma_internals[vma].append(match_dict['NAME'], match_dict['VALUE'])
471          total_dct[match_dict['NAME']] += int(match_dict['VALUE'].split()[0])
472
473    return ProcSmaps(raw, total_dct, maps, vma_internals)
474
475  @property
476  def size(self):
477    return self._size
478
479  @property
480  def rss(self):
481    return self._rss
482
483  @property
484  def referenced(self):
485    return self._referenced
486
487  @property
488  def pss(self):
489    return self._pss
490
491  @property
492  def private_clean(self):
493    return self._private_clean
494
495  @property
496  def shared_clean(self):
497    return self._shared_clean
498
499  @property
500  def kernel_page_size(self):
501    return self._kernel_page_size
502
503  @property
504  def mmu_page_size(self):
505    return self._mmu_page_size
506
507  @property
508  def vma_internals(self):
509    return self._vma_internals
510
511
512class ProcPagemap(object):
513  """Reads and stores partial information in /proc/pid/pagemap.
514
515  It picks up virtual addresses to read based on ProcMaps (/proc/pid/maps).
516  See https://www.kernel.org/doc/Documentation/vm/pagemap.txt for details.
517  """
518  _BYTES_PER_PAGEMAP_VALUE = 8
519  _BYTES_PER_OS_PAGE = 4096
520  _VIRTUAL_TO_PAGEMAP_OFFSET = _BYTES_PER_OS_PAGE / _BYTES_PER_PAGEMAP_VALUE
521
522  _MASK_PRESENT = 1 << 63
523  _MASK_SWAPPED = 1 << 62
524  _MASK_FILEPAGE_OR_SHAREDANON = 1 << 61
525  _MASK_SOFTDIRTY = 1 << 55
526  _MASK_PFN = (1 << 55) - 1
527
528  class VMA(object):
529    def __init__(self, vsize, present, swapped, pageframes):
530      self._vsize = vsize
531      self._present = present
532      self._swapped = swapped
533      self._pageframes = pageframes
534
535    @property
536    def vsize(self):
537      return int(self._vsize)
538
539    @property
540    def present(self):
541      return int(self._present)
542
543    @property
544    def swapped(self):
545      return int(self._swapped)
546
547    @property
548    def pageframes(self):
549      return self._pageframes
550
551  def __init__(self, vsize, present, swapped, vma_internals, in_process_dup):
552    self._vsize = vsize
553    self._present = present
554    self._swapped = swapped
555    self._vma_internals = vma_internals
556    self._in_process_dup = in_process_dup
557
558  @staticmethod
559  def load(pid, maps):
560    total_present = 0
561    total_swapped = 0
562    total_vsize = 0
563    in_process_dup = 0
564    vma_internals = collections.OrderedDict()
565    process_pageframe_set = set()
566
567    try:
568      pagemap_fd = os.open(
569          os.path.join('/proc', str(pid), 'pagemap'), os.O_RDONLY)
570    except (IOError, OSError):
571      return None
572    for vma in maps:
573      present = 0
574      swapped = 0
575      vsize = 0
576      pageframes = collections.defaultdict(int)
577      begin_offset = ProcPagemap._offset(vma.begin)
578      chunk_size = ProcPagemap._offset(vma.end) - begin_offset
579      try:
580        os.lseek(pagemap_fd, begin_offset, os.SEEK_SET)
581        buf = os.read(pagemap_fd, chunk_size)
582      except (IOError, OSError):
583        return None
584      if len(buf) < chunk_size:
585        _LOGGER.warn('Failed to read pagemap at 0x%x in %d.' % (vma.begin, pid))
586      pagemap_values = struct.unpack(
587          '=%dQ' % (len(buf) / ProcPagemap._BYTES_PER_PAGEMAP_VALUE), buf)
588      for pagemap_value in pagemap_values:
589        vsize += ProcPagemap._BYTES_PER_OS_PAGE
590        if pagemap_value & ProcPagemap._MASK_PRESENT:
591          if (pagemap_value & ProcPagemap._MASK_PFN) in process_pageframe_set:
592            in_process_dup += ProcPagemap._BYTES_PER_OS_PAGE
593          else:
594            process_pageframe_set.add(pagemap_value & ProcPagemap._MASK_PFN)
595          if (pagemap_value & ProcPagemap._MASK_PFN) not in pageframes:
596            present += ProcPagemap._BYTES_PER_OS_PAGE
597          pageframes[pagemap_value & ProcPagemap._MASK_PFN] += 1
598        if pagemap_value & ProcPagemap._MASK_SWAPPED:
599          swapped += ProcPagemap._BYTES_PER_OS_PAGE
600      vma_internals[vma] = ProcPagemap.VMA(vsize, present, swapped, pageframes)
601      total_present += present
602      total_swapped += swapped
603      total_vsize += vsize
604    try:
605      os.close(pagemap_fd)
606    except OSError:
607      return None
608
609    return ProcPagemap(total_vsize, total_present, total_swapped,
610                       vma_internals, in_process_dup)
611
612  @staticmethod
613  def _offset(virtual_address):
614    return virtual_address / ProcPagemap._VIRTUAL_TO_PAGEMAP_OFFSET
615
616  @property
617  def vsize(self):
618    return int(self._vsize)
619
620  @property
621  def present(self):
622    return int(self._present)
623
624  @property
625  def swapped(self):
626    return int(self._swapped)
627
628  @property
629  def vma_internals(self):
630    return self._vma_internals
631
632
633class _ProcessMemory(object):
634  """Aggregates process memory information from /proc for manual testing."""
635  def __init__(self, pid):
636    self._pid = pid
637    self._maps = None
638    self._pagemap = None
639    self._stat = None
640    self._status = None
641    self._statm = None
642    self._smaps = []
643
644  def _read(self, proc_file):
645    lines = []
646    with open(os.path.join('/proc', str(self._pid), proc_file), 'r') as proc_f:
647      lines = proc_f.readlines()
648    return lines
649
650  def read_all(self):
651    self.read_stat()
652    self.read_statm()
653    self.read_status()
654    self.read_smaps()
655    self.read_maps()
656    self.read_pagemap(self._maps)
657
658  def read_maps(self):
659    self._maps = ProcMaps.load(self._pid)
660
661  def read_pagemap(self, maps):
662    self._pagemap = ProcPagemap.load(self._pid, maps)
663
664  def read_smaps(self):
665    self._smaps = ProcSmaps.load(self._pid)
666
667  def read_stat(self):
668    self._stat = ProcStat.load(self._pid)
669
670  def read_statm(self):
671    self._statm = ProcStatm.load(self._pid)
672
673  def read_status(self):
674    self._status = ProcStatus.load(self._pid)
675
676  @property
677  def pid(self):
678    return self._pid
679
680  @property
681  def maps(self):
682    return self._maps
683
684  @property
685  def pagemap(self):
686    return self._pagemap
687
688  @property
689  def smaps(self):
690    return self._smaps
691
692  @property
693  def stat(self):
694    return self._stat
695
696  @property
697  def statm(self):
698    return self._statm
699
700  @property
701  def status(self):
702    return self._status
703
704
705def main(argv):
706  """The main function for manual testing."""
707  _LOGGER.setLevel(logging.WARNING)
708  handler = logging.StreamHandler()
709  handler.setLevel(logging.WARNING)
710  handler.setFormatter(logging.Formatter(
711      '%(asctime)s:%(name)s:%(levelname)s:%(message)s'))
712  _LOGGER.addHandler(handler)
713
714  pids = []
715  for arg in argv[1:]:
716    try:
717      pid = int(arg)
718    except ValueError:
719      raise SyntaxError("%s is not an integer." % arg)
720    else:
721      pids.append(pid)
722
723  procs = {}
724  for pid in pids:
725    procs[pid] = _ProcessMemory(pid)
726    procs[pid].read_all()
727
728    print '=== PID: %d ===' % pid
729
730    print '   stat: %d' % procs[pid].stat.vsize
731    print '  statm: %d' % (procs[pid].statm.size * 4096)
732    print ' status: %d (Peak:%d)' % (procs[pid].status.vm_size * 1024,
733                                     procs[pid].status.vm_peak * 1024)
734    print '  smaps: %d' % (procs[pid].smaps.size * 1024)
735    print 'pagemap: %d' % procs[pid].pagemap.vsize
736    print '   stat: %d' % (procs[pid].stat.rss * 4096)
737    print '  statm: %d' % (procs[pid].statm.resident * 4096)
738    print ' status: %d (Peak:%d)' % (procs[pid].status.vm_rss * 1024,
739                                     procs[pid].status.vm_hwm * 1024)
740    print '  smaps: %d' % (procs[pid].smaps.rss * 1024)
741    print 'pagemap: %d' % procs[pid].pagemap.present
742
743  return 0
744
745
746if __name__ == '__main__':
747  sys.exit(main(sys.argv))
748