1eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch# Copyright 2013 The Chromium Authors. All rights reserved. 2eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch# Use of this source code is governed by a BSD-style license that can be 3eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch# found in the LICENSE file. 4eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 5eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochimport posixpath 6eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochfrom urlparse import urlsplit 7eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 8eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochfrom file_system import FileNotFoundError 91320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom future import All 10116680a4aac90f2aa7413d9095a592090648e557Ben Murdochfrom path_util import Segment, Join, SplitParent 11eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 12eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochclass Redirector(object): 130f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) def __init__(self, compiled_fs_factory, file_system): 14eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch self._file_system = file_system 150f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) self._cache = compiled_fs_factory.ForJson(file_system) 16eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 17eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch def Redirect(self, host, path): 18eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch ''' Check if a path should be redirected, first according to host 19eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch redirection rules, then from rules in redirects.json files. 20eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 21eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch Returns the path that should be redirected to, or None if no redirection 22eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch should occur. 23eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch ''' 24eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch return self._RedirectOldHosts(host, path) or self._RedirectFromConfig(path) 25eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 26eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch def _RedirectFromConfig(self, url): 27116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ''' Look up redirects.json file in the directory hierarchy of |url|. 28116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch Directory-level redirects occur first, followed by the specific file 29116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch redirects. Returns the URL to the redirect, if any exist, or None. 30eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch ''' 31eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch dirname, filename = posixpath.split(url) 32116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch redirected_dirname = self._RedirectDirectory(dirname) 33116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 34116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch # Set up default return value. 35116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch default_redirect = None 36116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch if redirected_dirname != dirname: 37116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch default_redirect = posixpath.normpath(Join(redirected_dirname, filename)) 38eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 39eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch try: 40eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch rules = self._cache.GetFromFile( 41116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch posixpath.normpath(Join(redirected_dirname, 42116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 'redirects.json'))).Get() 43eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch except FileNotFoundError: 44116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return default_redirect 45eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 46eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch redirect = rules.get(filename) 47eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch if redirect is None: 48116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return default_redirect 49eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch if (redirect.startswith('/') or 50eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch urlsplit(redirect).scheme in ('http', 'https')): 51eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch return redirect 52eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 53116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return posixpath.normpath(Join(redirected_dirname, redirect)) 54116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 55116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch def _RedirectDirectory(self, real_url): 56116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ''' Returns the final redirected directory after all directory hops. 57116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch If there is a circular redirection, it skips the redirection that would 58116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch cause the infinite loop. 59116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch If no redirection rule is matched, the base directory is returned. 60116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ''' 61116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch seen_redirects = set() 62116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 63116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch def lookup_redirect(url): 64116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch sub_url = url 65116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 66116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch for sub_url, _ in Segment(url): 67116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch for base, filename in Segment(sub_url): 68116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch try: 69116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch redirects = self._cache.GetFromFile(posixpath.normpath( 70116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch posixpath.join(base, 'redirects.json'))).Get() 71116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch except FileNotFoundError: 72116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch continue 73116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 74116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch redirect = redirects.get(posixpath.join(filename, '...')) 75116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 76116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch if redirect is None: 77116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch continue 78116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 79116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch redirect = Join(base, redirect.rstrip('...')) 80116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 81116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch # Avoid infinite redirection loops by breaking if seen before. 82116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch if redirect in seen_redirects: 83116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch break 84116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch seen_redirects.add(redirect) 85116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return lookup_redirect( 86116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch Join(redirect, posixpath.relpath(url, sub_url))) 87116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return url 88116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 89116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return lookup_redirect(real_url) 90eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 91eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch def _RedirectOldHosts(self, host, path): 92eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch ''' Redirect paths from the old code.google.com to the new 93eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch developer.chrome.com, retaining elements like the channel and https, if 94eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch used. 95eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch ''' 96eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch if urlsplit(host).hostname != 'code.google.com': 97eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch return None 98eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 99eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch path = path.split('/') 100eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch if path and path[0] == 'chrome': 101eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch path.pop(0) 102eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 103eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch return 'https://developer.chrome.com/' + posixpath.join(*path) 104eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 1051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci def Refresh(self): 106eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch ''' Load files during a cron run. 107eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch ''' 108f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) futures = [] 1090f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) for root, dirs, files in self._file_system.Walk(''): 110eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch if 'redirects.json' in files: 1111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci futures.append(self._cache.GetFromFile(Join(root, 'redirects.json'))) 1121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return All(futures) 113