app_engine_pull.py revision f81680c018729fd4499e1e200d04b48c4b90127c
1#!/usr/bin/python 2# Copyright 2012 Google Inc. All Rights Reserved. 3# Author: mrdmnd@ (Matt Redmond) 4"""A client to pull data from Bartlett. 5 6Inspired by //depot/google3/experimental/mobile_gwp/database/app_engine_pull.py 7 8The server houses perf.data.gz, board, chrome version for each upload. 9This script first authenticates with a proper @google.com account, then 10downloads a sample (if it's not already cached) and unzips perf.data 11 12 Authenticate(): Gets login info and returns an auth token 13 DownloadSamples(): Download and unzip samples. 14 _GetServePage(): Pulls /serve page from the app engine server 15 _DownloadSampleFromServer(): Downloads a local compressed copy of a sample 16 _UncompressSample(): Decompresses a sample, deleting the compressed version. 17""" 18import cookielib 19import getpass 20import gzip 21import optparse 22import os 23import urllib 24import urllib2 25 26SERVER_NAME = "http://chromeoswideprofiling.appspot.com" 27APP_NAME = "chromeoswideprofiling" 28DELIMITER = "~" 29 30 31def Authenticate(server_name): 32 """Gets credentials from user and attempts to retrieve auth token. 33 TODO: Accept OAuth2 instead of password. 34 Args: 35 server_name: (string) URL that the app engine code is living on. 36 Returns: 37 authtoken: (string) The authorization token that can be used 38 to grab other pages. 39 """ 40 41 if server_name.endswith("/"): 42 server_name = server_name.rstrip("/") 43 # Grab username and password from user through stdin. 44 username = raw_input("Email (must be @google.com account): ") 45 password = getpass.getpass("Password: ") 46 # Use a cookie to authenticate with GAE. 47 cookiejar = cookielib.LWPCookieJar() 48 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)) 49 urllib2.install_opener(opener) 50 # Get an AuthToken from Google accounts service. 51 auth_uri = "https://www.google.com/accounts/ClientLogin" 52 authreq_data = urllib.urlencode({"Email": username, 53 "Passwd": password, 54 "service": "ah", 55 "source": APP_NAME, 56 "accountType": "HOSTED_OR_GOOGLE"}) 57 auth_req = urllib2.Request(auth_uri, data=authreq_data) 58 try: 59 auth_resp = urllib2.urlopen(auth_req) 60 except urllib2.URLError: 61 print "Error logging in to Google accounts service." 62 return None 63 body = auth_resp.read() 64 # Auth response contains several fields. 65 # We care about the part after Auth= 66 auth_resp_dict = dict(x.split("=") for x in body.split("\n") if x) 67 authtoken = auth_resp_dict["Auth"] 68 return authtoken 69 70 71def DownloadSamples(server_name, authtoken, output_dir, start, stop): 72 """Download every sample and write unzipped version 73 to output directory. 74 Args: 75 server_name: (string) URL that the app engine code is living on. 76 authtoken: (string) Authorization token. 77 output_dir (string) Filepath to write output to. 78 start: (int) Index to start downloading from, starting at top. 79 stop: (int) Index to stop downloading, non-inclusive. -1 for end. 80 Returns: 81 None 82 """ 83 84 if server_name.endswith("/"): 85 server_name = server_name.rstrip("/") 86 87 serve_page_string = _GetServePage(server_name, authtoken) 88 if serve_page_string is None: 89 print "Error getting /serve page." 90 return 91 92 sample_list = serve_page_string.split("</br>") 93 print "Will download:" 94 sample_list_subset = sample_list[start:stop] 95 for sample in sample_list_subset: 96 print sample 97 for sample in sample_list_subset: 98 assert sample, "Sample should be valid." 99 sample_info = [s.strip() for s in sample.split(DELIMITER)] 100 key = sample_info[0] 101 time = sample_info[1] 102 time = time.replace(" ", "_") # No space between date and time. 103 # sample_md5 = sample_info[2] 104 board = sample_info[3] 105 version = sample_info[4] 106 107 # Put a compressed copy of the samples in output directory. 108 _DownloadSampleFromServer(server_name, authtoken, key, time, board, 109 version, output_dir) 110 _UncompressSample(key, time, board, version, output_dir) 111 112 113def _BuildFilenameFromParams(key, time, board, version): 114 """Return the filename for our sample. 115 Args: 116 key: (string) Key indexing our sample in the datastore. 117 time: (string) Date that the sample was uploaded. 118 board: (string) Board that the sample was taken on. 119 version: (string) Version string from /etc/lsb-release 120 Returns: 121 filename (string) 122 """ 123 filename = DELIMITER.join([key, time, board, version]) 124 return filename 125 126 127def _DownloadSampleFromServer(server_name, authtoken, key, time, board, 128 version, output_dir): 129 """Downloads sample_$(samplekey).gz to current dir. 130 Args: 131 server_name: (string) URL that the app engine code is living on. 132 authtoken: (string) Authorization token. 133 key: (string) Key indexing our sample in the datastore 134 time: (string) Date that the sample was uploaded. 135 board: (string) Board that the sample was taken on. 136 version: (string) Version string from /etc/lsb-release 137 output_dir: (string) Filepath to write to output to. 138 Returns: 139 None 140 """ 141 filename = _BuildFilenameFromParams(key, time, board, version) 142 compressed_filename = filename+".gz" 143 144 if os.path.exists(os.path.join(output_dir, filename)): 145 print "Already downloaded %s, skipping." % filename 146 return 147 148 serv_uri = server_name + "/serve/" + key 149 serv_args = {"continue": serv_uri, "auth": authtoken} 150 full_serv_uri = server_name + "/_ah/login?%s" % urllib.urlencode(serv_args) 151 serv_req = urllib2.Request(full_serv_uri) 152 serv_resp = urllib2.urlopen(serv_req) 153 f = open(os.path.join(output_dir, compressed_filename), "w+") 154 f.write(serv_resp.read()) 155 f.close() 156 157 158def _UncompressSample(key, time, board, version, output_dir): 159 """Uncompresses a given sample.gz file and deletes the compressed version. 160 Args: 161 key: (string) Sample key to uncompress. 162 time: (string) Date that the sample was uploaded. 163 board: (string) Board that the sample was taken on. 164 version: (string) Version string from /etc/lsb-release 165 output_dir: (string) Filepath to find sample key in. 166 Returns: 167 None 168 """ 169 filename = _BuildFilenameFromParams(key, time, board, version) 170 compressed_filename = filename+".gz" 171 172 if os.path.exists(os.path.join(output_dir, filename)): 173 print "Already decompressed %s, skipping." % filename 174 return 175 176 out_file = open(os.path.join(output_dir, filename), "wb") 177 in_file = gzip.open(os.path.join(output_dir, compressed_filename), "rb") 178 out_file.write(in_file.read()) 179 in_file.close() 180 out_file.close() 181 os.remove(os.path.join(output_dir, compressed_filename)) 182 183 184def _DeleteSampleFromServer(server_name, authtoken, key): 185 """Opens the /delete page with the specified key 186 to delete the sample off the datastore. 187 Args: 188 server_name: (string) URL that the app engine code is living on. 189 authtoken: (string) Authorization token. 190 key: (string) Key to delete. 191 Returns: 192 None 193 """ 194 195 serv_uri = server_name + "/del/" + key 196 serv_args = {"continue": serv_uri, "auth": authtoken} 197 full_serv_uri = server_name + "/_ah/login?%s" % urllib.urlencode(serv_args) 198 serv_req = urllib2.Request(full_serv_uri) 199 urllib2.urlopen(serv_req) 200 201 202def _GetServePage(server_name, authtoken): 203 """Opens the /serve page and lists all keys. 204 Args: 205 server_name: (string) URL the app engine code is living on. 206 authtoken: (string) Authorization token. 207 Returns: 208 The text of the /serve page (including HTML tags) 209 """ 210 211 serv_uri = server_name + "/serve" 212 serv_args = {"continue": serv_uri, "auth": authtoken} 213 full_serv_uri = server_name + "/_ah/login?%s" % urllib.urlencode(serv_args) 214 serv_req = urllib2.Request(full_serv_uri) 215 serv_resp = urllib2.urlopen(serv_req) 216 return serv_resp.read() 217 218 219def main(): 220 parser = optparse.OptionParser() 221 parser.add_option("--output_dir", dest="output_dir", action="store", 222 help="Path to output perf data files.") 223 parser.add_option("--start", dest="start_ind", action="store", 224 default=0, help="Start index.") 225 parser.add_option("--stop", dest="stop_ind", action="store", 226 default=-1, help="Stop index.") 227 options = parser.parse_args()[0] 228 if not options.output_dir: 229 print "Must specify --output_dir." 230 return 1 231 if not os.path.exists(options.output_dir): 232 print "Specified output_dir does not exist." 233 return 1 234 235 authtoken = Authenticate(SERVER_NAME) 236 if not authtoken: 237 print "Could not obtain authtoken, exiting." 238 return 1 239 DownloadSamples(SERVER_NAME, authtoken, options.output_dir, 240 options.start_ind, options.stop_ind) 241 print "Downloaded samples." 242 return 0 243 244if __name__ == "__main__": 245 exit(main()) 246