1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4import sys
5import os
6import re
7
8class DepsException(Exception):
9  pass
10
11"""
12The core of this script is the calc_load_sequence function. In total, this
13walks over the provided javascript files and figures out their dependencies
14using the module definitions provided in each file. This allows us to, for
15example, have a trio of modules:
16
17foo.js:
18   base.require('bar');
19and bar.js:
20   base.require('baz');
21
22calc_load_sequence(['foo'], '.') will yield:
23   [Module('baz'), Module('bar'), Module('foo')]
24
25which is, based on the dependencies, the correct sequence in which to load
26those modules.
27"""
28
29class ResourceFinder(object):
30  """Helper code for finding a module given a name and current module.
31
32  The dependency resolution code in Module.resolve will find bits of code in the
33  actual javascript that says things require('bar'). This
34  code is responsible for figuring out what filename corresponds to 'bar' given
35  a Module('foo').
36  """
37  def __init__(self, root_dir):
38    self._root_dir = root_dir
39    pass
40
41  @property
42  def root_dir(self):
43    return self._root_dir
44
45  def _find_and_load_filename(self, absolute_path):
46    if not os.path.exists(absolute_path):
47      return None, None
48
49    f = open(absolute_path, 'r')
50    contents = f.read()
51    f.close()
52
53    return absolute_path, contents
54
55  def _find_and_load(self, current_module, requested_name, extension):
56    assert current_module.filename
57    pathy_name = requested_name.replace(".", os.sep)
58    filename = pathy_name + extension
59    absolute_path = os.path.join(self._root_dir, filename)
60    return self._find_and_load_filename(absolute_path)
61
62  def find_and_load_module(self, current_module, requested_module_name):
63    return self._find_and_load(current_module, requested_module_name, ".js")
64
65  def find_and_load_raw_script(self, current_module, filename):
66    absolute_path = os.path.join(self._root_dir, filename)
67    return self._find_and_load_filename(absolute_path)
68
69  def find_and_load_style_sheet(self,
70                                current_module, requested_style_sheet_name):
71    return self._find_and_load(
72      current_module, requested_style_sheet_name, ".css")
73
74
75class StyleSheet(object):
76  """Represents a stylesheet resource referenced by a module via the
77  base.requireStylesheet(xxx) directive."""
78  def __init__(self, name, filename, contents):
79    self.name = name
80    self.filename = filename
81    self.contents = contents
82
83  def __repr__(self):
84    return "StyleSheet(%s)" % self.name
85
86class RawScript(object):
87  """Represents a raw script resource referenced by a module via the
88  base.requireRawScript(xxx) directive."""
89  def __init__(self, name, filename, contents):
90    self.name = name
91    self.filename = filename
92    self.contents = contents
93
94  def __repr__(self):
95    return "RawScript(%s)" % self.name
96
97def _tokenize_js(text):
98  rest = text
99  tokens = ["//", "/*", "*/", "\n"]
100  while len(rest):
101    indices = [rest.find(token) for token in tokens]
102    found_indices = [index for index in indices if index >= 0]
103
104    if len(found_indices) == 0:
105      # end of string
106      yield rest
107      return
108
109    min_index = min(found_indices)
110    token_with_min = tokens[indices.index(min_index)]
111
112    if min_index > 0:
113      yield rest[:min_index]
114
115    yield rest[min_index:min_index + len(token_with_min)]
116    rest = rest[min_index + len(token_with_min):]
117
118def _strip_js_comments(text):
119  result_tokens = []
120  token_stream = _tokenize_js(text).__iter__()
121  while True:
122    try:
123      t = token_stream.next()
124    except StopIteration:
125      break
126
127    if t == "//":
128      while True:
129        try:
130          t2 = token_stream.next()
131          if t2 == "\n":
132            break
133        except StopIteration:
134          break
135    elif t == '/*':
136      nesting = 1
137      while True:
138        try:
139          t2 = token_stream.next()
140          if t2 == "/*":
141            nesting += 1
142          elif t2 == "*/":
143            nesting -= 1
144            if nesting == 0:
145              break
146        except StopIteration:
147          break
148    else:
149      result_tokens.append(t)
150  return "".join(result_tokens)
151
152def _MangleRawScriptFilenameToModuleName(filename):
153  name = filename
154  name = name.replace(os.sep, ':')
155  name = name.replace('..', '!!')
156  return name
157
158class Module(object):
159  """Represents a javascript module. It can either be directly requested, e.g.
160  passed in by name to calc_load_sequence, or created by being referenced a
161  module via the base.require(xxx) directive.
162
163  Interesting properties on this object are:
164
165  - filename: the file of the actual module
166  - contents: the actual text contents of the module
167  - style_sheets: StyleSheet objects that this module relies on for styling
168    information.
169  - dependent_modules: other modules that this module needs in order to run
170  """
171  def __init__(self, name = None):
172    self.name = name
173    self.filename = None
174    self.contents = None
175
176    self.dependent_module_names = []
177    self.dependent_modules = []
178    self.dependent_raw_script_names = []
179    self.dependent_raw_scripts = []
180    self.style_sheet_names = []
181    self.style_sheets = []
182
183  def __repr__(self):
184    return "Module(%s)" % self.name
185
186  def load_and_parse(self, module_filename,
187                     module_contents = None,
188                     decl_required = True):
189    if not module_contents:
190      f = open(module_filename, 'r')
191      self.contents = f.read()
192      f.close()
193    else:
194      self.contents = module_contents
195    self.filename = module_filename
196    stripped_text = _strip_js_comments(self.contents)
197    self.validate_uses_strict_mode_(stripped_text)
198    self.parse_definition_(stripped_text, decl_required)
199
200  def resolve(self, all_resources, resource_finder):
201    if "scripts" not in all_resources:
202      all_resources["scripts"] = {}
203    if "style_sheets" not in all_resources:
204      all_resources["style_sheets"] = {}
205    if "raw_scripts" not in all_resources:
206      all_resources["raw_scripts"] = {}
207
208    assert self.filename
209
210    for name in self.dependent_module_names:
211      if name in all_resources["scripts"]:
212        assert all_resources["scripts"][name].contents
213        self.dependent_modules.append(all_resources["scripts"][name])
214        continue
215
216      filename, contents = resource_finder.find_and_load_module(self, name)
217      if not filename:
218        raise DepsException("No file for module %(name)s needed by %(dep)s" %
219          {"name": name, "dep": self.filename})
220
221      module = Module(name)
222      all_resources["scripts"][name] = module
223      self.dependent_modules.append(module)
224      try:
225        module.load_and_parse(filename, contents)
226      except Exception, e:
227        raise Exception('While processing ' + filename + ': ' + e.message)
228      module.resolve(all_resources, resource_finder)
229
230    for name in self.dependent_raw_script_names:
231      filename, contents = resource_finder.find_and_load_raw_script(self, name)
232      if not filename:
233        raise DepsException("Could not find a file for raw script %s" % name)
234
235      if name in all_resources["raw_scripts"]:
236        assert all_resources["raw_scripts"][name].contents
237        self.dependent_raw_scripts.append(all_resources["raw_scripts"][name])
238        continue
239
240      raw_script = RawScript(name, filename, contents)
241      all_resources["raw_scripts"][name] = raw_script
242      self.dependent_raw_scripts.append(raw_script)
243
244    for name in self.style_sheet_names:
245      if name in all_resources["style_sheets"]:
246        assert all_resources["style_sheets"][name].contents
247        self.style_sheets.append(all_resources["style_sheets"][name])
248        continue
249
250      filename, contents = resource_finder.find_and_load_style_sheet(self, name)
251      if not filename:
252        raise DepsException("Could not find a file for stylesheet %s" % name)
253
254      style_sheet = StyleSheet(name, filename, contents)
255      all_resources["style_sheets"][name] = style_sheet
256      self.style_sheets.append(style_sheet)
257
258  def compute_load_sequence_recursive(self, load_sequence, already_loaded_set):
259    for dependent_module in self.dependent_modules:
260      dependent_module.compute_load_sequence_recursive(load_sequence,
261                                                       already_loaded_set)
262    if self.name not in already_loaded_set:
263      already_loaded_set.add(self.name)
264      load_sequence.append(self)
265
266  def validate_uses_strict_mode_(self, stripped_text):
267    lines = stripped_text.split('\n')
268    for line in lines:
269      line = line.strip()
270      if len(line.strip()) == 0:
271        continue
272      if line.strip() == """'use strict';""":
273        break
274      raise DepsException('%s must use strict mode' % self.name)
275
276  def parse_definition_(self, stripped_text, decl_required = True):
277    if not decl_required and not self.name:
278      raise Exception("Module.name must be set for decl_required to be false.")
279
280    rest = stripped_text
281    while True:
282      # Things to search for.
283      m_r = re.search("""base\s*\.\s*require\((["'])(.+?)\\1\)""",
284                      rest, re.DOTALL)
285      m_s = re.search("""base\s*\.\s*requireStylesheet\((["'])(.+?)\\1\)""",
286                      rest, re.DOTALL)
287      m_irs = re.search("""base\s*\.\s*requireRawScript\((["'])(.+?)\\1\)""",
288                      rest, re.DOTALL)
289      matches = [m for m in [m_r, m_s, m_irs] if m]
290
291      # Figure out which was first.
292      matches.sort(key=lambda x: x.start())
293      if len(matches):
294        m = matches[0]
295      else:
296        break
297
298      if m == m_r:
299        dependent_module_name = m.group(2)
300        if '/' in dependent_module_name:
301          raise DepsException("Slashes are not allowed in module names. "
302                              "Use '.' instead: %s" % dependent_module_name)
303        if dependent_module_name.endswith('js'):
304          raise DepsException("module names shouldn't end with .js"
305                              "The module system will append that for you: %s" %
306                              dependent_module_name)
307        self.dependent_module_names.append(dependent_module_name)
308      elif m == m_s:
309        style_sheet_name = m.group(2)
310        if '/' in style_sheet_name:
311          raise DepsException("Slashes are not allowed in style sheet names. "
312                              "Use '.' instead: %s" % style_sheet_name)
313        if style_sheet_name.endswith('.css'):
314          raise DepsException("Style sheets should not end in .css. "
315                              "The module system will append that for you" %
316                              style_sheet_name)
317        self.style_sheet_names.append(style_sheet_name)
318      elif m == m_irs:
319        name = m.group(2)
320        self.dependent_raw_script_names.append(name)
321
322      rest = rest[m.end():]
323
324
325def calc_load_sequence(filenames, toplevel_dir):
326  """Given a list of starting javascript files, figure out all the Module
327  objects that need to be loaded to satisfiy their dependencies.
328
329  The javascript files shoud specify their dependencies in a format that is
330  textually equivalent to base.js' require syntax, namely:
331
332     base.require(module1);
333     base.require(module2);
334     base.requireStylesheet(stylesheet);
335
336  The output of this function is an array of Module objects ordered by
337  dependency.
338  """
339  all_resources = {}
340  all_resources["scripts"] = {}
341  resource_finder = ResourceFinder(os.path.abspath(toplevel_dir))
342  initial_module_name_indices = {}
343  for filename in filenames:
344    if not os.path.exists(filename):
345      raise Exception("Could not find %s" % filename)
346
347    rel_filename = os.path.relpath(filename, toplevel_dir)
348    dirname = os.path.dirname(rel_filename)
349    modname  = os.path.splitext(os.path.basename(rel_filename))[0]
350    if len(dirname):
351      name = dirname.replace('/', '.') + '.' + modname
352    else:
353      name = modname
354
355    if name in all_resources["scripts"]:
356      continue
357
358    module = Module(name)
359    initial_module_name_indices[module.name] = len(initial_module_name_indices)
360    module.load_and_parse(filename, decl_required = False)
361    all_resources["scripts"][module.name] = module
362    module.resolve(all_resources, resource_finder)
363
364  # Find the root modules: ones that have no dependencies. While doing that,
365  # sort the dependent module list so that the computed load order is stable.
366  module_ref_counts = {}
367  for module in all_resources["scripts"].values():
368    module.dependent_modules.sort(lambda x, y: cmp(x.name, y.name))
369    module_ref_counts[module.name] = 0
370
371  def inc_ref_count(name):
372    module_ref_counts[name] = module_ref_counts[name] + 1
373  for module in all_resources["scripts"].values():
374    for dependent_module in module.dependent_modules:
375      inc_ref_count(dependent_module.name)
376
377  root_modules = [all_resources["scripts"][name]
378                  for name, ref_count in module_ref_counts.items()
379                  if ref_count == 0]
380
381  # Sort root_modules by the order they were originally requested,
382  # then sort everything else by name.
383  def compare_root_module(x, y):
384    n = len(initial_module_name_indices);
385    iX = initial_module_name_indices.get(x.name, n)
386    iY = initial_module_name_indices.get(y.name, n)
387    if cmp(iX, iY) != 0:
388      return cmp(iX, iY)
389    return cmp(x.name, y.name)
390  root_modules.sort(compare_root_module)
391
392  already_loaded_set = set()
393  load_sequence = []
394  for module in root_modules:
395    module.compute_load_sequence_recursive(load_sequence, already_loaded_set)
396  return load_sequence
397