1a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#!/usr/bin/python2.4
2a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
3a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# Copyright 2009 the V8 project authors. All rights reserved.
4a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# Redistribution and use in source and binary forms, with or without
5a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# modification, are permitted provided that the following conditions are
6a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# met:
7a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#
8a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#     * Redistributions of source code must retain the above copyright
9a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#       notice, this list of conditions and the following disclaimer.
10a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#     * Redistributions in binary form must reproduce the above
11a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#       copyright notice, this list of conditions and the following
12a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#       disclaimer in the documentation and/or other materials provided
13a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#       with the distribution.
14a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#     * Neither the name of Google Inc. nor the names of its
15a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#       contributors may be used to endorse or promote products derived
16a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#       from this software without specific prior written permission.
17a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#
18a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
30a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block"""A JavaScript minifier.
31a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
32a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockIt is far from being a complete JS parser, so there are many valid
33a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockJavaScript programs that will be ruined by it.  Another strangeness is that
34a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockit accepts $ and % as parts of identifiers.  It doesn't merge lines or strip
35a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockout blank lines in order to ease debugging.  Variables at the top scope are
36a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockproperties of the global object so we can't rename them.  It is assumed that
37a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockyou introduce variables with var as if JavaScript followed C++ scope rules
38a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockaround curly braces, so the declaration must be above the first use.
39a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
40a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockUse as:
41a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockimport jsmin
42a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockminifier = JavaScriptMinifier()
43a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockprogram1 = minifier.JSMinify(program1)
44a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockprogram2 = minifier.JSMinify(program2)
45a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block"""
46a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
47a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockimport re
48a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
49a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
50a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass JavaScriptMinifier(object):
51a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  """An object that you can feed code snippets to to get them minified."""
52a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
53a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  def __init__(self):
54a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    # We prepopulate the list of identifiers that shouldn't be used.  These
55a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    # short language keywords could otherwise be used by the script as variable
56a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    # names.
57a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    self.seen_identifiers = {"do": True, "in": True}
58a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    self.identifier_counter = 0
59a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    self.in_comment = False
60a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    self.map = {}
61a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    self.nesting = 0
62a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
63a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  def LookAtIdentifier(self, m):
64a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    """Records identifiers or keywords that we see in use.
65a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
66a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    (So we can avoid renaming variables to these strings.)
67a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    Args:
68a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      m: The match object returned by re.search.
69a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
70a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    Returns:
71a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      Nothing.
72a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    """
73a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    identifier = m.group(1)
74a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    self.seen_identifiers[identifier] = True
75a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
76a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  def Push(self):
77a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    """Called when we encounter a '{'."""
78a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    self.nesting += 1
79a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
80a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  def Pop(self):
81a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    """Called when we encounter a '}'."""
82a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    self.nesting -= 1
83a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    # We treat each top-level opening brace as a single scope that can span
84a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    # several sets of nested braces.
85a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    if self.nesting == 0:
86a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      self.map = {}
87a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      self.identifier_counter = 0
88a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
89a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  def Declaration(self, m):
90a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    """Rewrites bits of the program selected by a regexp.
91a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
92a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    These can be curly braces, literal strings, function declarations and var
93a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    declarations.  (These last two must be on one line including the opening
94a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    curly brace of the function for their variables to be renamed).
95a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
96a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    Args:
97a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      m: The match object returned by re.search.
98a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
99a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    Returns:
100a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      The string that should replace the match in the rewritten program.
101a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    """
102a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    matched_text = m.group(0)
103a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    if matched_text == "{":
104a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      self.Push()
105a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      return matched_text
106a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    if matched_text == "}":
107a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      self.Pop()
108a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      return matched_text
109a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    if re.match("[\"'/]", matched_text):
110a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      return matched_text
111a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    m = re.match(r"var ", matched_text)
112a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    if m:
113a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      var_names = matched_text[m.end():]
114a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      var_names = re.split(r",", var_names)
115a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      return "var " + ",".join(map(self.FindNewName, var_names))
116a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text)
117a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    if m:
118a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      up_to_args = m.group(1)
119a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      args = m.group(2)
120a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      args = re.split(r",", args)
121a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      self.Push()
122a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){"
123a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
124a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    if matched_text in self.map:
125a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      return self.map[matched_text]
126a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
127a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    return matched_text
128a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
129a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  def CharFromNumber(self, number):
130a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    """A single-digit base-52 encoding using a-zA-Z."""
131a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    if number < 26:
132a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      return chr(number + 97)
133a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    number -= 26
134a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    return chr(number + 65)
135a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
136a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  def FindNewName(self, var_name):
137a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    """Finds a new 1-character or 2-character name for a variable.
138a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
139a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    Enters it into the mapping table for this scope.
140a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
141a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    Args:
142a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      var_name: The name of the variable before renaming.
143a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
144a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    Returns:
145a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      The new name of the variable.
146a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    """
147a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    new_identifier = ""
148a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    # Variable names that end in _ are member variables of the global object,
149a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    # so they can be visible from code in a different scope.  We leave them
150a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    # alone.
151a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    if var_name in self.map:
152a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      return self.map[var_name]
153a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    if self.nesting == 0:
154a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      return var_name
155a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    while True:
156a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      identifier_first_char = self.identifier_counter % 52
157a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      identifier_second_char = self.identifier_counter / 52
158a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      new_identifier = self.CharFromNumber(identifier_first_char)
159a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      if identifier_second_char != 0:
160a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block        new_identifier = (
161a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block            self.CharFromNumber(identifier_second_char - 1) + new_identifier)
162a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      self.identifier_counter += 1
163a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      if not new_identifier in self.seen_identifiers:
164a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block        break
165a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
166a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    self.map[var_name] = new_identifier
167a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    return new_identifier
168a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
169a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  def RemoveSpaces(self, m):
170a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    """Returns literal strings unchanged, replaces other inputs with group 2.
171a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
172a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    Other inputs are replaced with the contents of capture 1.  This is either
173a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    a single space or an empty string.
174a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
175a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    Args:
176a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      m: The match object returned by re.search.
177a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
178a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    Returns:
179a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      The string that should be inserted instead of the matched text.
180a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    """
181a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    entire_match = m.group(0)
182a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    replacement = m.group(1)
183a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    if re.match(r"'.*'$", entire_match):
184a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      return entire_match
185a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    if re.match(r'".*"$', entire_match):
186a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      return entire_match
187a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    if re.match(r"/.+/$", entire_match):
188a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      return entire_match
189a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    return replacement
190a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
191a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  def JSMinify(self, text):
192a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    """The main entry point.  Takes a text and returns a compressed version.
193a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
194a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    The compressed version hopefully does the same thing.  Line breaks are
195a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    preserved.
196a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
197a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    Args:
198a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      text: The text of the code snippet as a multiline string.
199a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
200a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    Returns:
201a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      The compressed text of the code snippet as a multiline string.
202a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    """
203a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    new_lines = []
204a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    for line in re.split(r"\n", text):
205a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      line = line.replace("\t", " ")
206a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      if self.in_comment:
207a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block        m = re.search(r"\*/", line)
208a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block        if m:
209a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block          line = line[m.end():]
210a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block          self.in_comment = False
211a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block        else:
212a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block          new_lines.append("")
213a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block          continue
214a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
215a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      if not self.in_comment:
216a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block        line = re.sub(r"/\*.*?\*/", " ", line)
217a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block        line = re.sub(r"//.*", "", line)
218a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block        m = re.search(r"/\*", line)
219a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block        if m:
220a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block          line = line[:m.start()]
221a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block          self.in_comment = True
222a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
223a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # Strip leading and trailing spaces.
224a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      line = re.sub(r"^ +", "", line)
225a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      line = re.sub(r" +$", "", line)
226a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # A regexp that matches a literal string surrounded by "double quotes".
227a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # This regexp can handle embedded backslash-escaped characters including
228a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # embedded backslash-escaped double quotes.
229a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      double_quoted_string = r'"(?:[^"\\]|\\.)*"'
230a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # A regexp that matches a literal string surrounded by 'double quotes'.
231a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      single_quoted_string = r"'(?:[^'\\]|\\.)*'"
232a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # A regexp that matches a regexp literal surrounded by /slashes/.
233e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke      # Don't allow a regexp to have a ) before the first ( since that's a
234e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke      # syntax error and it's probably just two unrelated slashes.
2353ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch      # Also don't allow it to come after anything that can only be the
2363ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch      # end of a primary expression.
2373ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch      slash_quoted_regexp = r"(?<![\w$'\")\]])/(?:(?=\()|(?:[^()/\\]|\\.)+)(?:\([^/\\]|\\.)*/"
238a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # Replace multiple spaces with a single space.
239a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      line = re.sub("|".join([double_quoted_string,
240a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                              single_quoted_string,
241a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                              slash_quoted_regexp,
242a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                              "( )+"]),
243a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                    self.RemoveSpaces,
244a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                    line)
245a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # Strip single spaces unless they have an identifier character both before
246a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # and after the space.  % and $ are counted as identifier characters.
247a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      line = re.sub("|".join([double_quoted_string,
248a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                              single_quoted_string,
249a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                              slash_quoted_regexp,
250a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                              r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]),
251a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                    self.RemoveSpaces,
252a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                    line)
253a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # Collect keywords and identifiers that are already in use.
254a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      if self.nesting == 0:
255a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block        re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line)
256a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      function_declaration_regexp = (
257a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block          r"\bfunction"              # Function definition keyword...
258a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block          r"( [\w$%]+)?"             # ...optional function name...
259a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block          r"\([\w$%,]+\)\{")         # ...argument declarations.
260a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # Unfortunately the keyword-value syntax { key:value } makes the key look
261a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # like a variable where in fact it is a literal string.  We use the
262a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # presence or absence of a question mark to try to distinguish between
263a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # this case and the ternary operator: "condition ? iftrue : iffalse".
264a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      if re.search(r"\?", line):
265a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block        block_trailing_colon = r""
266a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      else:
267a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block        block_trailing_colon = r"(?![:\w$%])"
268a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      # Variable use.  Cannot follow a period precede a colon.
269a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon
270a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      line = re.sub("|".join([double_quoted_string,
271a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                              single_quoted_string,
272a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                              slash_quoted_regexp,
273a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                              r"\{",                  # Curly braces.
274a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                              r"\}",
275a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                              r"\bvar [\w$%,]+",      # var declarations.
276a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                              function_declaration_regexp,
277a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                              variable_use_regexp]),
278a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                    self.Declaration,
279a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block                    line)
280a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block      new_lines.append(line)
281a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
282a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block    return "\n".join(new_lines) + "\n"
283