app_engine_pull.py revision f81680c018729fd4499e1e200d04b48c4b90127c
1#!/usr/bin/python
2# Copyright 2012 Google Inc. All Rights Reserved.
3# Author: mrdmnd@ (Matt Redmond)
4"""A client to pull data from Bartlett.
5
6Inspired by //depot/google3/experimental/mobile_gwp/database/app_engine_pull.py
7
8The server houses perf.data.gz, board, chrome version for each upload.
9This script first authenticates with a proper @google.com account, then
10downloads a sample (if it's not already cached) and unzips perf.data
11
12  Authenticate(): Gets login info and returns an auth token
13  DownloadSamples(): Download and unzip samples.
14  _GetServePage(): Pulls /serve page from the app engine server
15  _DownloadSampleFromServer(): Downloads a local compressed copy of a sample
16  _UncompressSample(): Decompresses a sample, deleting the compressed version.
17"""
18import cookielib
19import getpass
20import gzip
21import optparse
22import os
23import urllib
24import urllib2
25
26SERVER_NAME = "http://chromeoswideprofiling.appspot.com"
27APP_NAME = "chromeoswideprofiling"
28DELIMITER = "~"
29
30
31def Authenticate(server_name):
32  """Gets credentials from user and attempts to retrieve auth token.
33     TODO: Accept OAuth2 instead of password.
34  Args:
35    server_name: (string) URL that the app engine code is living on.
36  Returns:
37    authtoken: (string) The authorization token that can be used
38                        to grab other pages.
39  """
40
41  if server_name.endswith("/"):
42    server_name = server_name.rstrip("/")
43  # Grab username and password from user through stdin.
44  username = raw_input("Email (must be @google.com account): ")
45  password = getpass.getpass("Password: ")
46  # Use a cookie to authenticate with GAE.
47  cookiejar = cookielib.LWPCookieJar()
48  opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
49  urllib2.install_opener(opener)
50  # Get an AuthToken from Google accounts service.
51  auth_uri = "https://www.google.com/accounts/ClientLogin"
52  authreq_data = urllib.urlencode({"Email": username,
53                                   "Passwd": password,
54                                   "service": "ah",
55                                   "source": APP_NAME,
56                                   "accountType": "HOSTED_OR_GOOGLE"})
57  auth_req = urllib2.Request(auth_uri, data=authreq_data)
58  try:
59    auth_resp = urllib2.urlopen(auth_req)
60  except urllib2.URLError:
61    print "Error logging in to Google accounts service."
62    return None
63  body = auth_resp.read()
64  # Auth response contains several fields.
65  # We care about the part after Auth=
66  auth_resp_dict = dict(x.split("=") for x in body.split("\n") if x)
67  authtoken = auth_resp_dict["Auth"]
68  return authtoken
69
70
71def DownloadSamples(server_name, authtoken, output_dir, start, stop):
72  """Download every sample and write unzipped version
73     to output directory.
74  Args:
75    server_name: (string) URL that the app engine code is living on.
76    authtoken:   (string) Authorization token.
77    output_dir   (string) Filepath to write output to.
78    start:       (int)    Index to start downloading from, starting at top.
79    stop:        (int)    Index to stop downloading, non-inclusive. -1 for end.
80  Returns:
81    None
82  """
83
84  if server_name.endswith("/"):
85    server_name = server_name.rstrip("/")
86
87  serve_page_string = _GetServePage(server_name, authtoken)
88  if serve_page_string is None:
89    print "Error getting /serve page."
90    return
91
92  sample_list = serve_page_string.split("</br>")
93  print "Will download:"
94  sample_list_subset = sample_list[start:stop]
95  for sample in sample_list_subset:
96    print sample
97  for sample in sample_list_subset:
98    assert sample, "Sample should be valid."
99    sample_info = [s.strip() for s in sample.split(DELIMITER)]
100    key = sample_info[0]
101    time = sample_info[1]
102    time = time.replace(" ", "_") # No space between date and time.
103    # sample_md5 = sample_info[2]
104    board = sample_info[3]
105    version = sample_info[4]
106
107    # Put a compressed copy of the samples in output directory.
108    _DownloadSampleFromServer(server_name, authtoken, key, time, board,
109                              version, output_dir)
110    _UncompressSample(key, time, board, version, output_dir)
111
112
113def _BuildFilenameFromParams(key, time, board, version):
114  """Return the filename for our sample.
115  Args:
116    key:  (string) Key indexing our sample in the datastore.
117    time: (string) Date that the sample was uploaded.
118    board: (string) Board that the sample was taken on.
119    version: (string) Version string from /etc/lsb-release
120  Returns:
121    filename (string)
122  """
123  filename = DELIMITER.join([key, time, board, version])
124  return filename
125
126
127def _DownloadSampleFromServer(server_name, authtoken, key, time, board,
128                              version, output_dir):
129  """Downloads sample_$(samplekey).gz to current dir.
130  Args:
131    server_name: (string) URL that the app engine code is living on.
132    authtoken:   (string) Authorization token.
133    key:  (string) Key indexing our sample in the datastore
134    time: (string) Date that the sample was uploaded.
135    board: (string) Board that the sample was taken on.
136    version: (string) Version string from /etc/lsb-release
137    output_dir:  (string) Filepath to write to output to.
138  Returns:
139    None
140  """
141  filename = _BuildFilenameFromParams(key, time, board, version)
142  compressed_filename = filename+".gz"
143
144  if os.path.exists(os.path.join(output_dir, filename)):
145    print "Already downloaded %s, skipping." % filename
146    return
147
148  serv_uri = server_name + "/serve/" + key
149  serv_args = {"continue": serv_uri, "auth": authtoken}
150  full_serv_uri = server_name + "/_ah/login?%s" % urllib.urlencode(serv_args)
151  serv_req = urllib2.Request(full_serv_uri)
152  serv_resp = urllib2.urlopen(serv_req)
153  f = open(os.path.join(output_dir, compressed_filename), "w+")
154  f.write(serv_resp.read())
155  f.close()
156
157
158def _UncompressSample(key, time, board, version, output_dir):
159  """Uncompresses a given sample.gz file and deletes the compressed version.
160  Args:
161    key: (string) Sample key to uncompress.
162    time: (string) Date that the sample was uploaded.
163    board: (string) Board that the sample was taken on.
164    version: (string) Version string from /etc/lsb-release
165    output_dir: (string) Filepath to find sample key in.
166  Returns:
167    None
168  """
169  filename = _BuildFilenameFromParams(key, time, board, version)
170  compressed_filename = filename+".gz"
171
172  if os.path.exists(os.path.join(output_dir, filename)):
173    print "Already decompressed %s, skipping." % filename
174    return
175
176  out_file = open(os.path.join(output_dir, filename), "wb")
177  in_file = gzip.open(os.path.join(output_dir, compressed_filename), "rb")
178  out_file.write(in_file.read())
179  in_file.close()
180  out_file.close()
181  os.remove(os.path.join(output_dir, compressed_filename))
182
183
184def _DeleteSampleFromServer(server_name, authtoken, key):
185  """Opens the /delete page with the specified key
186     to delete the sample off the datastore.
187    Args:
188      server_name: (string) URL that the app engine code is living on.
189      authtoken:   (string) Authorization token.
190      key:  (string) Key to delete.
191    Returns:
192      None
193  """
194
195  serv_uri = server_name + "/del/" + key
196  serv_args = {"continue": serv_uri, "auth": authtoken}
197  full_serv_uri = server_name + "/_ah/login?%s" % urllib.urlencode(serv_args)
198  serv_req = urllib2.Request(full_serv_uri)
199  urllib2.urlopen(serv_req)
200
201
202def _GetServePage(server_name, authtoken):
203  """Opens the /serve page and lists all keys.
204  Args:
205    server_name: (string) URL the app engine code is living on.
206    authtoken:   (string) Authorization token.
207  Returns:
208    The text of the /serve page (including HTML tags)
209  """
210
211  serv_uri = server_name + "/serve"
212  serv_args = {"continue": serv_uri, "auth": authtoken}
213  full_serv_uri = server_name + "/_ah/login?%s" % urllib.urlencode(serv_args)
214  serv_req = urllib2.Request(full_serv_uri)
215  serv_resp = urllib2.urlopen(serv_req)
216  return serv_resp.read()
217
218
219def main():
220  parser = optparse.OptionParser()
221  parser.add_option("--output_dir", dest="output_dir", action="store",
222                    help="Path to output perf data files.")
223  parser.add_option("--start", dest="start_ind", action="store",
224                    default=0, help="Start index.")
225  parser.add_option("--stop", dest="stop_ind", action="store",
226                    default=-1, help="Stop index.")
227  options = parser.parse_args()[0]
228  if not options.output_dir:
229    print "Must specify --output_dir."
230    return 1
231  if not os.path.exists(options.output_dir):
232    print "Specified output_dir does not exist."
233    return 1
234
235  authtoken = Authenticate(SERVER_NAME)
236  if not authtoken:
237    print "Could not obtain authtoken, exiting."
238    return 1
239  DownloadSamples(SERVER_NAME, authtoken, options.output_dir,
240                  options.start_ind, options.stop_ind)
241  print "Downloaded samples."
242  return 0
243
244if __name__ == "__main__":
245  exit(main())
246