redirector.py revision 1320f92c476a1ad9d19dba2a48c72b75566198e9
1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import posixpath 6from urlparse import urlsplit 7 8from file_system import FileNotFoundError 9from future import All 10from path_util import Segment, Join, SplitParent 11 12class Redirector(object): 13 def __init__(self, compiled_fs_factory, file_system): 14 self._file_system = file_system 15 self._cache = compiled_fs_factory.ForJson(file_system) 16 17 def Redirect(self, host, path): 18 ''' Check if a path should be redirected, first according to host 19 redirection rules, then from rules in redirects.json files. 20 21 Returns the path that should be redirected to, or None if no redirection 22 should occur. 23 ''' 24 return self._RedirectOldHosts(host, path) or self._RedirectFromConfig(path) 25 26 def _RedirectFromConfig(self, url): 27 ''' Look up redirects.json file in the directory hierarchy of |url|. 28 Directory-level redirects occur first, followed by the specific file 29 redirects. Returns the URL to the redirect, if any exist, or None. 30 ''' 31 dirname, filename = posixpath.split(url) 32 redirected_dirname = self._RedirectDirectory(dirname) 33 34 # Set up default return value. 35 default_redirect = None 36 if redirected_dirname != dirname: 37 default_redirect = posixpath.normpath(Join(redirected_dirname, filename)) 38 39 try: 40 rules = self._cache.GetFromFile( 41 posixpath.normpath(Join(redirected_dirname, 42 'redirects.json'))).Get() 43 except FileNotFoundError: 44 return default_redirect 45 46 redirect = rules.get(filename) 47 if redirect is None: 48 return default_redirect 49 if (redirect.startswith('/') or 50 urlsplit(redirect).scheme in ('http', 'https')): 51 return redirect 52 53 return posixpath.normpath(Join(redirected_dirname, redirect)) 54 55 def _RedirectDirectory(self, real_url): 56 ''' Returns the final redirected directory after all directory hops. 57 If there is a circular redirection, it skips the redirection that would 58 cause the infinite loop. 59 If no redirection rule is matched, the base directory is returned. 60 ''' 61 seen_redirects = set() 62 63 def lookup_redirect(url): 64 sub_url = url 65 66 for sub_url, _ in Segment(url): 67 for base, filename in Segment(sub_url): 68 try: 69 redirects = self._cache.GetFromFile(posixpath.normpath( 70 posixpath.join(base, 'redirects.json'))).Get() 71 except FileNotFoundError: 72 continue 73 74 redirect = redirects.get(posixpath.join(filename, '...')) 75 76 if redirect is None: 77 continue 78 79 redirect = Join(base, redirect.rstrip('...')) 80 81 # Avoid infinite redirection loops by breaking if seen before. 82 if redirect in seen_redirects: 83 break 84 seen_redirects.add(redirect) 85 return lookup_redirect( 86 Join(redirect, posixpath.relpath(url, sub_url))) 87 return url 88 89 return lookup_redirect(real_url) 90 91 def _RedirectOldHosts(self, host, path): 92 ''' Redirect paths from the old code.google.com to the new 93 developer.chrome.com, retaining elements like the channel and https, if 94 used. 95 ''' 96 if urlsplit(host).hostname != 'code.google.com': 97 return None 98 99 path = path.split('/') 100 if path and path[0] == 'chrome': 101 path.pop(0) 102 103 return 'https://developer.chrome.com/' + posixpath.join(*path) 104 105 def Refresh(self): 106 ''' Load files during a cron run. 107 ''' 108 futures = [] 109 for root, dirs, files in self._file_system.Walk(''): 110 if 'redirects.json' in files: 111 futures.append(self._cache.GetFromFile(Join(root, 'redirects.json'))) 112 return All(futures) 113