1# Copyright 2012 Google Inc. All Rights Reserved.
2# Author: mrdmnd@ (Matt Redmond)
3"""A client to pull data from Bartlett.
4
5Inspired by //depot/google3/experimental/mobile_gwp/database/app_engine_pull.py
6
7The server houses perf.data.gz, board, chrome version for each upload.
8This script first authenticates with a proper @google.com account, then
9downloads a sample (if it's not already cached) and unzips perf.data
10
11  Authenticate(): Gets login info and returns an auth token
12  DownloadSamples(): Download and unzip samples.
13  _GetServePage(): Pulls /serve page from the app engine server
14  _DownloadSampleFromServer(): Downloads a local compressed copy of a sample
15  _UncompressSample(): Decompresses a sample, deleting the compressed version.
16"""
17import cookielib
18import getpass
19import gzip
20import optparse
21import os
22import urllib
23import urllib2
24
25SERVER_NAME = 'http://chromeoswideprofiling.appspot.com'
26APP_NAME = 'chromeoswideprofiling'
27DELIMITER = '~'
28
29
30def Authenticate(server_name):
31  """Gets credentials from user and attempts to retrieve auth token.
32     TODO: Accept OAuth2 instead of password.
33  Args:
34    server_name: (string) URL that the app engine code is living on.
35  Returns:
36    authtoken: (string) The authorization token that can be used
37                        to grab other pages.
38  """
39
40  if server_name.endswith('/'):
41    server_name = server_name.rstrip('/')
42  # Grab username and password from user through stdin.
43  username = raw_input('Email (must be @google.com account): ')
44  password = getpass.getpass('Password: ')
45  # Use a cookie to authenticate with GAE.
46  cookiejar = cookielib.LWPCookieJar()
47  opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
48  urllib2.install_opener(opener)
49  # Get an AuthToken from Google accounts service.
50  auth_uri = 'https://www.google.com/accounts/ClientLogin'
51  authreq_data = urllib.urlencode({'Email': username,
52                                   'Passwd': password,
53                                   'service': 'ah',
54                                   'source': APP_NAME,
55                                   'accountType': 'HOSTED_OR_GOOGLE'})
56  auth_req = urllib2.Request(auth_uri, data=authreq_data)
57  try:
58    auth_resp = urllib2.urlopen(auth_req)
59  except urllib2.URLError:
60    print 'Error logging in to Google accounts service.'
61    return None
62  body = auth_resp.read()
63  # Auth response contains several fields.
64  # We care about the part after Auth=
65  auth_resp_dict = dict(x.split('=') for x in body.split('\n') if x)
66  authtoken = auth_resp_dict['Auth']
67  return authtoken
68
69
70def DownloadSamples(server_name, authtoken, output_dir, start, stop):
71  """Download every sample and write unzipped version
72     to output directory.
73  Args:
74    server_name: (string) URL that the app engine code is living on.
75    authtoken:   (string) Authorization token.
76    output_dir   (string) Filepath to write output to.
77    start:       (int)    Index to start downloading from, starting at top.
78    stop:        (int)    Index to stop downloading, non-inclusive. -1 for end.
79  Returns:
80    None
81  """
82
83  if server_name.endswith('/'):
84    server_name = server_name.rstrip('/')
85
86  serve_page_string = _GetServePage(server_name, authtoken)
87  if serve_page_string is None:
88    print 'Error getting /serve page.'
89    return
90
91  sample_list = serve_page_string.split('</br>')
92  print 'Will download:'
93  sample_list_subset = sample_list[start:stop]
94  for sample in sample_list_subset:
95    print sample
96  for sample in sample_list_subset:
97    assert sample, 'Sample should be valid.'
98    sample_info = [s.strip() for s in sample.split(DELIMITER)]
99    key = sample_info[0]
100    time = sample_info[1]
101    time = time.replace(' ', '_')  # No space between date and time.
102    # sample_md5 = sample_info[2]
103    board = sample_info[3]
104    version = sample_info[4]
105
106    # Put a compressed copy of the samples in output directory.
107    _DownloadSampleFromServer(server_name, authtoken, key, time, board, version,
108                              output_dir)
109    _UncompressSample(key, time, board, version, output_dir)
110
111
112def _BuildFilenameFromParams(key, time, board, version):
113  """Return the filename for our sample.
114  Args:
115    key:  (string) Key indexing our sample in the datastore.
116    time: (string) Date that the sample was uploaded.
117    board: (string) Board that the sample was taken on.
118    version: (string) Version string from /etc/lsb-release
119  Returns:
120    filename (string)
121  """
122  filename = DELIMITER.join([key, time, board, version])
123  return filename
124
125
126def _DownloadSampleFromServer(server_name, authtoken, key, time, board, version,
127                              output_dir):
128  """Downloads sample_$(samplekey).gz to current dir.
129  Args:
130    server_name: (string) URL that the app engine code is living on.
131    authtoken:   (string) Authorization token.
132    key:  (string) Key indexing our sample in the datastore
133    time: (string) Date that the sample was uploaded.
134    board: (string) Board that the sample was taken on.
135    version: (string) Version string from /etc/lsb-release
136    output_dir:  (string) Filepath to write to output to.
137  Returns:
138    None
139  """
140  filename = _BuildFilenameFromParams(key, time, board, version)
141  compressed_filename = filename + '.gz'
142
143  if os.path.exists(os.path.join(output_dir, filename)):
144    print 'Already downloaded %s, skipping.' % filename
145    return
146
147  serv_uri = server_name + '/serve/' + key
148  serv_args = {'continue': serv_uri, 'auth': authtoken}
149  full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args)
150  serv_req = urllib2.Request(full_serv_uri)
151  serv_resp = urllib2.urlopen(serv_req)
152  f = open(os.path.join(output_dir, compressed_filename), 'w+')
153  f.write(serv_resp.read())
154  f.close()
155
156
157def _UncompressSample(key, time, board, version, output_dir):
158  """Uncompresses a given sample.gz file and deletes the compressed version.
159  Args:
160    key: (string) Sample key to uncompress.
161    time: (string) Date that the sample was uploaded.
162    board: (string) Board that the sample was taken on.
163    version: (string) Version string from /etc/lsb-release
164    output_dir: (string) Filepath to find sample key in.
165  Returns:
166    None
167  """
168  filename = _BuildFilenameFromParams(key, time, board, version)
169  compressed_filename = filename + '.gz'
170
171  if os.path.exists(os.path.join(output_dir, filename)):
172    print 'Already decompressed %s, skipping.' % filename
173    return
174
175  out_file = open(os.path.join(output_dir, filename), 'wb')
176  in_file = gzip.open(os.path.join(output_dir, compressed_filename), 'rb')
177  out_file.write(in_file.read())
178  in_file.close()
179  out_file.close()
180  os.remove(os.path.join(output_dir, compressed_filename))
181
182
183def _DeleteSampleFromServer(server_name, authtoken, key):
184  """Opens the /delete page with the specified key
185     to delete the sample off the datastore.
186    Args:
187      server_name: (string) URL that the app engine code is living on.
188      authtoken:   (string) Authorization token.
189      key:  (string) Key to delete.
190    Returns:
191      None
192  """
193
194  serv_uri = server_name + '/del/' + key
195  serv_args = {'continue': serv_uri, 'auth': authtoken}
196  full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args)
197  serv_req = urllib2.Request(full_serv_uri)
198  urllib2.urlopen(serv_req)
199
200
201def _GetServePage(server_name, authtoken):
202  """Opens the /serve page and lists all keys.
203  Args:
204    server_name: (string) URL the app engine code is living on.
205    authtoken:   (string) Authorization token.
206  Returns:
207    The text of the /serve page (including HTML tags)
208  """
209
210  serv_uri = server_name + '/serve'
211  serv_args = {'continue': serv_uri, 'auth': authtoken}
212  full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args)
213  serv_req = urllib2.Request(full_serv_uri)
214  serv_resp = urllib2.urlopen(serv_req)
215  return serv_resp.read()
216
217
218def main():
219  parser = optparse.OptionParser()
220  parser.add_option('--output_dir',
221                    dest='output_dir',
222                    action='store',
223                    help='Path to output perf data files.')
224  parser.add_option('--start',
225                    dest='start_ind',
226                    action='store',
227                    default=0,
228                    help='Start index.')
229  parser.add_option('--stop',
230                    dest='stop_ind',
231                    action='store',
232                    default=-1,
233                    help='Stop index.')
234  options = parser.parse_args()[0]
235  if not options.output_dir:
236    print 'Must specify --output_dir.'
237    return 1
238  if not os.path.exists(options.output_dir):
239    print 'Specified output_dir does not exist.'
240    return 1
241
242  authtoken = Authenticate(SERVER_NAME)
243  if not authtoken:
244    print 'Could not obtain authtoken, exiting.'
245    return 1
246  DownloadSamples(SERVER_NAME, authtoken, options.output_dir, options.start_ind,
247                  options.stop_ind)
248  print 'Downloaded samples.'
249  return 0
250
251
252if __name__ == '__main__':
253  exit(main())
254