1eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch# Copyright 2013 The Chromium Authors. All rights reserved.
2eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch# Use of this source code is governed by a BSD-style license that can be
3eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch# found in the LICENSE file.
4eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
5eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochimport posixpath
6eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochfrom urlparse import urlsplit
7eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
8eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochfrom file_system import FileNotFoundError
91320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom future import All
10116680a4aac90f2aa7413d9095a592090648e557Ben Murdochfrom path_util import Segment, Join, SplitParent
11eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
12eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochclass Redirector(object):
130f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  def __init__(self, compiled_fs_factory, file_system):
14eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    self._file_system = file_system
150f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    self._cache = compiled_fs_factory.ForJson(file_system)
16eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
17eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  def Redirect(self, host, path):
18eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    ''' Check if a path should be redirected, first according to host
19eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    redirection rules, then from rules in redirects.json files.
20eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
21eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    Returns the path that should be redirected to, or None if no redirection
22eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    should occur.
23eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    '''
24eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    return self._RedirectOldHosts(host, path) or self._RedirectFromConfig(path)
25eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
26eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  def _RedirectFromConfig(self, url):
27116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    ''' Look up redirects.json file in the directory hierarchy of |url|.
28116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    Directory-level redirects occur first, followed by the specific file
29116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    redirects. Returns the URL to the redirect, if any exist, or None.
30eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    '''
31eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    dirname, filename = posixpath.split(url)
32116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    redirected_dirname = self._RedirectDirectory(dirname)
33116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
34116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    # Set up default return value.
35116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    default_redirect = None
36116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    if redirected_dirname != dirname:
37116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      default_redirect = posixpath.normpath(Join(redirected_dirname, filename))
38eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
39eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    try:
40eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch      rules = self._cache.GetFromFile(
41116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch        posixpath.normpath(Join(redirected_dirname,
42116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch                                          'redirects.json'))).Get()
43eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    except FileNotFoundError:
44116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      return default_redirect
45eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
46eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    redirect = rules.get(filename)
47eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    if redirect is None:
48116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      return default_redirect
49eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    if (redirect.startswith('/') or
50eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch        urlsplit(redirect).scheme in ('http', 'https')):
51eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch      return redirect
52eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
53116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    return posixpath.normpath(Join(redirected_dirname, redirect))
54116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
55116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  def _RedirectDirectory(self, real_url):
56116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    ''' Returns the final redirected directory after all directory hops.
57116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    If there is a circular redirection, it skips the redirection that would
58116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    cause the infinite loop.
59116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    If no redirection rule is matched, the base directory is returned.
60116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    '''
61116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    seen_redirects = set()
62116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
63116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    def lookup_redirect(url):
64116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      sub_url = url
65116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
66116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      for sub_url, _ in Segment(url):
67116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch        for base, filename in Segment(sub_url):
68116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch          try:
69116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch            redirects = self._cache.GetFromFile(posixpath.normpath(
70116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch                posixpath.join(base, 'redirects.json'))).Get()
71116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch          except FileNotFoundError:
72116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch            continue
73116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
74116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch          redirect = redirects.get(posixpath.join(filename, '...'))
75116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
76116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch          if redirect is None:
77116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch            continue
78116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
79116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch          redirect = Join(base, redirect.rstrip('...'))
80116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
81116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch          # Avoid infinite redirection loops by breaking if seen before.
82116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch          if redirect in seen_redirects:
83116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch            break
84116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch          seen_redirects.add(redirect)
85116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch          return lookup_redirect(
86116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch              Join(redirect, posixpath.relpath(url, sub_url)))
87116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      return url
88116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
89116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    return lookup_redirect(real_url)
90eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
91eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  def _RedirectOldHosts(self, host, path):
92eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    ''' Redirect paths from the old code.google.com to the new
93eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    developer.chrome.com, retaining elements like the channel and https, if
94eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    used.
95eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    '''
96eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    if urlsplit(host).hostname != 'code.google.com':
97eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch      return None
98eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
99eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    path = path.split('/')
100eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    if path and path[0] == 'chrome':
101eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch      path.pop(0)
102eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
103eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    return 'https://developer.chrome.com/' + posixpath.join(*path)
104eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
1051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  def Refresh(self):
106eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    ''' Load files during a cron run.
107eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    '''
108f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    futures = []
1090f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    for root, dirs, files in self._file_system.Walk(''):
110eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch      if 'redirects.json' in files:
1111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        futures.append(self._cache.GetFromFile(Join(root, 'redirects.json')))
1121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return All(futures)
113