redirector.py revision 116680a4aac90f2aa7413d9095a592090648e557
1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import posixpath
6from urlparse import urlsplit
7
8from file_system import FileNotFoundError
9from future import Future
10from path_util import Segment, Join, SplitParent
11
12class Redirector(object):
13  def __init__(self, compiled_fs_factory, file_system):
14    self._file_system = file_system
15    self._cache = compiled_fs_factory.ForJson(file_system)
16
17  def Redirect(self, host, path):
18    ''' Check if a path should be redirected, first according to host
19    redirection rules, then from rules in redirects.json files.
20
21    Returns the path that should be redirected to, or None if no redirection
22    should occur.
23    '''
24    return self._RedirectOldHosts(host, path) or self._RedirectFromConfig(path)
25
26  def _RedirectFromConfig(self, url):
27    ''' Look up redirects.json file in the directory hierarchy of |url|.
28    Directory-level redirects occur first, followed by the specific file
29    redirects. Returns the URL to the redirect, if any exist, or None.
30    '''
31    dirname, filename = posixpath.split(url)
32    redirected_dirname = self._RedirectDirectory(dirname)
33
34    # Set up default return value.
35    default_redirect = None
36    if redirected_dirname != dirname:
37      default_redirect = posixpath.normpath(Join(redirected_dirname, filename))
38
39    try:
40      rules = self._cache.GetFromFile(
41        posixpath.normpath(Join(redirected_dirname,
42                                          'redirects.json'))).Get()
43    except FileNotFoundError:
44      return default_redirect
45
46    redirect = rules.get(filename)
47    if redirect is None:
48      return default_redirect
49    if (redirect.startswith('/') or
50        urlsplit(redirect).scheme in ('http', 'https')):
51      return redirect
52
53    return posixpath.normpath(Join(redirected_dirname, redirect))
54
55  def _RedirectDirectory(self, real_url):
56    ''' Returns the final redirected directory after all directory hops.
57    If there is a circular redirection, it skips the redirection that would
58    cause the infinite loop.
59    If no redirection rule is matched, the base directory is returned.
60    '''
61    seen_redirects = set()
62
63    def lookup_redirect(url):
64      sub_url = url
65
66      for sub_url, _ in Segment(url):
67        for base, filename in Segment(sub_url):
68          try:
69            redirects = self._cache.GetFromFile(posixpath.normpath(
70                posixpath.join(base, 'redirects.json'))).Get()
71          except FileNotFoundError:
72            continue
73
74          redirect = redirects.get(posixpath.join(filename, '...'))
75
76          if redirect is None:
77            continue
78
79          redirect = Join(base, redirect.rstrip('...'))
80
81          # Avoid infinite redirection loops by breaking if seen before.
82          if redirect in seen_redirects:
83            break
84          seen_redirects.add(redirect)
85          return lookup_redirect(
86              Join(redirect, posixpath.relpath(url, sub_url)))
87      return url
88
89    return lookup_redirect(real_url)
90
91  def _RedirectOldHosts(self, host, path):
92    ''' Redirect paths from the old code.google.com to the new
93    developer.chrome.com, retaining elements like the channel and https, if
94    used.
95    '''
96    if urlsplit(host).hostname != 'code.google.com':
97      return None
98
99    path = path.split('/')
100    if path and path[0] == 'chrome':
101      path.pop(0)
102
103    return 'https://developer.chrome.com/' + posixpath.join(*path)
104
105  def Cron(self):
106    ''' Load files during a cron run.
107    '''
108    futures = []
109    for root, dirs, files in self._file_system.Walk(''):
110      if 'redirects.json' in files:
111        futures.append(
112            self._cache.GetFromFile(posixpath.join(root, 'redirects.json')))
113    return Future(callback=lambda: [f.Get() for f in futures])
114