1558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch#!/usr/bin/env python
2558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
3558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch# Copyright 2013 The Chromium Authors. All rights reserved.
4558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch# Use of this source code is governed by a BSD-style license that can be
5558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch# found in the LICENSE file.
6558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
7558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdochimport csv
8558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdochimport datetime
9558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdochimport json
10558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdochimport os
11558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdochimport shlex
12558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdochimport subprocess
13558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdochimport sys
14558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdochfrom optparse import OptionParser
15558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
16558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch"""Start a client to fetch web pages either using wget or using quic_client.
17558790d6acca3451cf3a6b497803a5f07d0bec58Ben MurdochIf --use_wget is set, it uses wget.
18558790d6acca3451cf3a6b497803a5f07d0bec58Ben MurdochUsage: This invocation
19558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  run_client.py --quic_binary_dir=../../../../out/Debug \
20558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      --address=127.0.0.1 --port=5000 --infile=test_urls.json \
21558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      --delay_file=delay.csv --packets_file=packets.csv
22558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  fetches pages listed in test_urls.json from a quic server running at
23558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  127.0.0.1 on port 5000 using quic binary ../../../../out/Debug/quic_client
24558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  and stores the delay in delay.csv and the max received packet number (for
25558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  QUIC) in packets.csv.
26558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  If --use_wget is present, it will fetch the URLs using wget and ignores
27558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  the flags --address, --port, --quic_binary_dir, etc.
28558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch"""
29558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
30558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdochdef Timestamp(datetm=None):
31558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  """Get the timestamp in microseconds.
32558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  Args:
33558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    datetm: the date and time to be converted to timestamp.
34558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      If not set, use the current UTC time.
35558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  Returns:
36558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    The timestamp in microseconds.
37558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  """
38558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  datetm = datetm or datetime.datetime.utcnow()
39558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  diff = datetm - datetime.datetime.utcfromtimestamp(0)
40558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  timestamp = (diff.days * 86400 + diff.seconds) * 1000000 + diff.microseconds
41558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  return timestamp
42558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
43558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdochclass PageloadExperiment:
44558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  def __init__(self, use_wget, quic_binary_dir, quic_server_address,
45558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch               quic_server_port):
46558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    """Initialize PageloadExperiment.
47558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
48558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    Args:
49558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      use_wget: Whether to use wget.
50558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      quic_binary_dir: Directory for quic_binary.
51558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      quic_server_address: IP address of quic server.
52558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      quic_server_port: Port of the quic server.
53558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    """
54558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    self.use_wget = use_wget
55558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    self.quic_binary_dir = quic_binary_dir
56558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    self.quic_server_address = quic_server_address
57558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    self.quic_server_port = quic_server_port
58558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    if not use_wget and not os.path.isfile(quic_binary_dir + '/quic_client'):
59558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      raise IOError('There is no quic_client in the given dir: %s.'
60558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch                    % quic_binary_dir)
61558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
62558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  @classmethod
63558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  def ReadPages(cls, json_file):
64558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    """Return the list of URLs from the json_file.
65558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
66558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    One entry of the list may contain a html link and multiple resources.
67558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    """
68558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    page_list = []
69558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    with open(json_file) as f:
70558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      data = json.load(f)
71558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      for page in data['pages']:
72558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        url = page['url']
73558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        if 'resources' in page:
74558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch          resources = page['resources']
75558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        else:
76558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch          resources = None
77558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        if not resources:
78558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch          page_list.append([url])
79558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        else:
80558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch          urls = [url]
81558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch          # For url http://x.com/z/y.html, url_dir is http://x.com/z
82558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch          url_dir = url.rsplit('/', 1)[0]
83558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch          for resource in resources:
84558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch            urls.append(url_dir + '/' + resource)
85558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch          page_list.append(urls)
86558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    return page_list
87558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
88558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  def DownloadOnePage(self, urls):
89558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    """Download a page emulated by a list of urls.
90558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
91558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    Args:
92558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      urls: list of URLs to fetch.
93558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    Returns:
94558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      A tuple (page download time, max packet number).
95558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    """
96558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    if self.use_wget:
97558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      cmd = 'wget -O -'
98558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    else:
99558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      cmd = '%s/quic_client --port=%s --address=%s' % (
100558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch          self.quic_binary_dir, self.quic_server_port, self.quic_server_address)
101558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    cmd_in_list = shlex.split(cmd)
102558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    cmd_in_list.extend(urls)
103558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    start_time = Timestamp()
104558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    ps_proc = subprocess.Popen(cmd_in_list,
105558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch                               stdout=subprocess.PIPE,
106558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch                               stderr=subprocess.PIPE)
107558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    _std_out, std_err = ps_proc.communicate()
108558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    end_time = Timestamp()
109558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    delta_time = end_time - start_time
110558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    max_packets = 0
111558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    if not self.use_wget:
112558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      for line in std_err.splitlines():
113558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        if line.find('Client: Got packet') >= 0:
114558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch          elems = line.split()
115558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch          packet_num = int(elems[4])
116558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch          max_packets = max(max_packets, packet_num)
117558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    return delta_time, max_packets
118558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
119558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  def RunExperiment(self, infile, delay_file, packets_file=None, num_it=1):
120558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    """Run the pageload experiment.
121558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
122558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    Args:
123558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      infile: Input json file describing the page list.
124558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      delay_file: Output file storing delay in csv format.
125558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      packets_file: Output file storing max packet number in csv format.
126558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      num_it: Number of iterations to run in this experiment.
127558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    """
128558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    page_list = self.ReadPages(infile)
129558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    header = [urls[0].rsplit('/', 1)[1] for urls in  page_list]
130558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    header0 = 'wget' if self.use_wget else 'quic'
131558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    header = [header0] + header
132558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
133558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    plt_list = []
134558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    packets_list = []
135558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    for i in range(num_it):
136558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      plt_one_row = [str(i)]
137558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      packets_one_row = [str(i)]
138558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      for urls in page_list:
139558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        time_micros, num_packets = self.DownloadOnePage(urls)
140558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        time_secs = time_micros / 1000000.0
141558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        plt_one_row.append('%6.3f' % time_secs)
142558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        packets_one_row.append('%5d' % num_packets)
143558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      plt_list.append(plt_one_row)
144558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      packets_list.append(packets_one_row)
145558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
146558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    with open(delay_file, 'w') as f:
147558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      csv_writer = csv.writer(f, delimiter=',')
148558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      csv_writer.writerow(header)
149558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      for one_row in plt_list:
150558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        csv_writer.writerow(one_row)
151558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch    if packets_file:
152558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch      with open(packets_file, 'w') as f:
153558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        csv_writer = csv.writer(f, delimiter=',')
154558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        csv_writer.writerow(header)
155558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch        for one_row in packets_list:
156558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch          csv_writer.writerow(one_row)
157558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
158558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
159558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdochdef main():
160558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  parser = OptionParser()
161558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  parser.add_option('--use_wget', dest='use_wget', action='store_true',
162558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch                    default=False)
163558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  # Note that only debug version generates the log containing packets
164558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  # information.
165558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  parser.add_option('--quic_binary_dir', dest='quic_binary_dir',
166558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch                    default='../../../../out/Debug')
167558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  # For whatever server address you specify, you need to run the
168558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  # quic_server on that machine and populate it with the cache containing
169558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  # the URLs requested in the --infile.
170558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  parser.add_option('--address', dest='quic_server_address',
171558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch                    default='127.0.0.1')
172558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  parser.add_option('--port', dest='quic_server_port',
173558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch                    default='5002')
174558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  parser.add_option('--delay_file', dest='delay_file', default='delay.csv')
175558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  parser.add_option('--packets_file', dest='packets_file',
176558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch                    default='packets.csv')
177558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  parser.add_option('--infile', dest='infile', default='test_urls.json')
178558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  (options, _) = parser.parse_args()
179558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
180558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  exp = PageloadExperiment(options.use_wget, options.quic_binary_dir,
181558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch                           options.quic_server_address,
182558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch                           options.quic_server_port)
183558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  exp.RunExperiment(options.infile, options.delay_file, options.packets_file)
184558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch
185558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdochif __name__ == '__main__':
186558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch  sys.exit(main())
187