1# Copyright 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Start and stop Web Page Replay.
6
7Of the public module names, the following one is key:
8  ReplayServer: a class to start/stop Web Page Replay.
9"""
10
11import logging
12import os
13import re
14import signal
15import subprocess
16import sys
17import urllib
18
19from telemetry.core import util
20
21_CHROME_SRC_DIR = os.path.abspath(os.path.join(
22    os.path.dirname(__file__), os.pardir, os.pardir, os.pardir, os.pardir))
23REPLAY_DIR = os.path.join(
24    _CHROME_SRC_DIR, 'third_party', 'webpagereplay')
25LOG_PATH = os.path.join(
26    _CHROME_SRC_DIR, 'webpagereplay_logs', 'logs.txt')
27
28
29# Chrome options to make it work with Web Page Replay.
30def GetChromeFlags(replay_host, http_port, https_port):
31  assert replay_host and http_port and https_port, 'All arguments required'
32  return [
33      '--host-resolver-rules=MAP * %s,EXCLUDE localhost' % replay_host,
34      '--testing-fixed-http-port=%s' % http_port,
35      '--testing-fixed-https-port=%s' % https_port,
36      '--ignore-certificate-errors',
37      ]
38
39
40# Signal masks on Linux are inherited from parent processes.  If anything
41# invoking us accidentally masks SIGINT (e.g. by putting a process in the
42# background from a shell script), sending a SIGINT to the child will fail
43# to terminate it.  Running this signal handler before execing should fix that
44# problem.
45def ResetInterruptHandler():
46  signal.signal(signal.SIGINT, signal.SIG_DFL)
47
48
49class ReplayError(Exception):
50  """Catch-all exception for the module."""
51  pass
52
53
54class ReplayNotFoundError(ReplayError):
55  def __init__(self, label, path):
56    super(ReplayNotFoundError, self).__init__()
57    self.args = (label, path)
58
59  def __str__(self):
60    label, path = self.args
61    return 'Path does not exist for %s: %s' % (label, path)
62
63
64class ReplayNotStartedError(ReplayError):
65  pass
66
67
68class ReplayServer(object):
69  """Start and Stop Web Page Replay.
70
71  Web Page Replay is a proxy that can record and "replay" web pages with
72  simulated network characteristics -- without having to edit the pages
73  by hand. With WPR, tests can use "real" web content, and catch
74  performance issues that may result from introducing network delays and
75  bandwidth throttling.
76
77  Example:
78     with ReplayServer(archive_path):
79       self.NavigateToURL(start_url)
80       self.WaitUntil(...)
81
82  Environment Variables (for development):
83    WPR_ARCHIVE_PATH: path to alternate archive file (e.g. '/tmp/foo.wpr').
84    WPR_RECORD: if set, puts Web Page Replay in record mode instead of replay.
85    WPR_REPLAY_DIR: path to alternate Web Page Replay source.
86  """
87
88  def __init__(self, archive_path, replay_host, dns_port, http_port, https_port,
89               replay_options=None, replay_dir=None,
90               log_path=None):
91    """Initialize ReplayServer.
92
93    Args:
94      archive_path: a path to a specific WPR archive (required).
95      replay_host: the hostname to serve traffic.
96      dns_port: an integer port on which to serve DNS traffic. May be zero
97          to let the OS choose an available port. If None DNS forwarding is
98          disabled.
99      http_port: an integer port on which to serve HTTP traffic. May be zero
100          to let the OS choose an available port.
101      https_port: an integer port on which to serve HTTPS traffic. May be zero
102          to let the OS choose an available port.
103      replay_options: an iterable of options strings to forward to replay.py.
104      replay_dir: directory that has replay.py and related modules.
105      log_path: a path to a log file.
106    """
107    self.archive_path = os.environ.get('WPR_ARCHIVE_PATH', archive_path)
108    self.replay_options = list(replay_options or ())
109    self.replay_dir = os.environ.get('WPR_REPLAY_DIR', replay_dir or REPLAY_DIR)
110    self.log_path = log_path or LOG_PATH
111    self.dns_port = dns_port
112    self.http_port = http_port
113    self.https_port = https_port
114    self._replay_host = replay_host
115
116    if 'WPR_RECORD' in os.environ and '--record' not in self.replay_options:
117      self.replay_options.append('--record')
118    self.is_record_mode = '--record' in self.replay_options
119    self._AddDefaultReplayOptions()
120
121    self.replay_py = os.path.join(self.replay_dir, 'replay.py')
122
123    if self.is_record_mode:
124      self._CheckPath('archive directory', os.path.dirname(self.archive_path))
125    elif not os.path.exists(self.archive_path):
126      self._CheckPath('archive file', self.archive_path)
127    self._CheckPath('replay script', self.replay_py)
128
129    self.replay_process = None
130
131  def _AddDefaultReplayOptions(self):
132    """Set WPR command-line options. Can be overridden if needed."""
133    self.replay_options = [
134        '--host', str(self._replay_host),
135        '--port', str(self.http_port),
136        '--ssl_port', str(self.https_port),
137        '--use_closest_match',
138        '--no-dns_forwarding',
139        '--log_level', 'warning'
140        ] + self.replay_options
141    if self.dns_port is not None:
142      self.replay_options.extend(['--dns_port', str(self.dns_port)])
143
144  def _CheckPath(self, label, path):
145    if not os.path.exists(path):
146      raise ReplayNotFoundError(label, path)
147
148  def _OpenLogFile(self):
149    log_dir = os.path.dirname(self.log_path)
150    if not os.path.exists(log_dir):
151      os.makedirs(log_dir)
152    return open(self.log_path, 'w')
153
154  def IsStarted(self):
155    """Checks to see if the server is up and running."""
156    port_re = re.compile(
157        '.*?(?P<protocol>[A-Z]+) server started on (?P<host>.*):(?P<port>\d+)')
158
159    if self.replay_process.poll() is not None:
160      return False
161
162    # Read the ports from the WPR log.
163    if not self.http_port or not self.https_port or not self.dns_port:
164      with open(self.log_path) as f:
165        for line in f.readlines():
166          m = port_re.match(line.strip())
167          if m:
168            if not self.http_port and m.group('protocol') == 'HTTP':
169              self.http_port = int(m.group('port'))
170            elif not self.https_port and m.group('protocol') == 'HTTPS':
171              self.https_port = int(m.group('port'))
172            elif not self.dns_port and m.group('protocol') == 'DNS':
173              self.dns_port = int(m.group('port'))
174
175    # Try to connect to the WPR ports.
176    if self.http_port and self.https_port:
177      try:
178        up_url = '%s://%s:%s/web-page-replay-generate-200'
179        http_up_url = up_url % ('http', self._replay_host, self.http_port)
180        https_up_url = up_url % ('https', self._replay_host, self.https_port)
181        if (200 == urllib.urlopen(http_up_url, None, {}).getcode() and
182            200 == urllib.urlopen(https_up_url, None, {}).getcode()):
183          return True
184      except IOError:
185        pass
186    return False
187
188  def StartServer(self):
189    """Start Web Page Replay and verify that it started.
190
191    Raises:
192      ReplayNotStartedError: if Replay start-up fails.
193    """
194    cmd_line = [sys.executable, self.replay_py]
195    cmd_line.extend(self.replay_options)
196    cmd_line.append(self.archive_path)
197
198    logging.debug('Starting Web-Page-Replay: %s', cmd_line)
199    with self._OpenLogFile() as log_fh:
200      kwargs = {'stdout': log_fh, 'stderr': subprocess.STDOUT}
201      if sys.platform.startswith('linux') or sys.platform == 'darwin':
202        kwargs['preexec_fn'] = ResetInterruptHandler
203      self.replay_process = subprocess.Popen(cmd_line, **kwargs)
204
205    try:
206      util.WaitFor(self.IsStarted, 30)
207    except util.TimeoutException:
208      with open(self.log_path) as f:
209        log = f.read()
210      raise ReplayNotStartedError(
211          'Web Page Replay failed to start. Log output:\n%s' % log)
212
213  def StopServer(self):
214    """Stop Web Page Replay."""
215    if not self.replay_process:
216      return
217
218    logging.debug('Trying to stop Web-Page-Replay gracefully')
219    try:
220      urllib.urlopen('http://%s:%s/web-page-replay-command-exit' % (
221          self._replay_host, self.http_port), None, {}).close()
222    except IOError:
223      # IOError is possible because the server might exit without response.
224      pass
225
226    try:
227      util.WaitFor(lambda: self.replay_process.poll() is not None, 10)
228    except util.TimeoutException:
229      try:
230        # Use a SIGINT so that it can do graceful cleanup.
231        self.replay_process.send_signal(signal.SIGINT)
232      except:  # pylint: disable=W0702
233        # On Windows, we are left with no other option than terminate().
234        if 'no-dns_forwarding' not in self.replay_options:
235          logging.warning('DNS configuration might not be restored!')
236        try:
237          self.replay_process.terminate()
238        except:  # pylint: disable=W0702
239          pass
240      self.replay_process.wait()
241
242  def __enter__(self):
243    """Add support for with-statement."""
244    self.StartServer()
245    return self
246
247  def __exit__(self, unused_exc_type, unused_exc_val, unused_exc_tb):
248    """Add support for with-statement."""
249    self.StopServer()
250