1c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org#!/usr/bin/python2.4
2c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
30e3f88bd850f46930aa95684377fab02a394ae41ulan@chromium.org# Copyright 2012 the V8 project authors. All rights reserved.
4c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# Redistribution and use in source and binary forms, with or without
5c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# modification, are permitted provided that the following conditions are
6c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# met:
7c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org#
8c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org#     * Redistributions of source code must retain the above copyright
9c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org#       notice, this list of conditions and the following disclaimer.
10c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org#     * Redistributions in binary form must reproduce the above
11c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org#       copyright notice, this list of conditions and the following
12c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org#       disclaimer in the documentation and/or other materials provided
13c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org#       with the distribution.
14c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org#     * Neither the name of Google Inc. nor the names of its
15c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org#       contributors may be used to endorse or promote products derived
16c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org#       from this software without specific prior written permission.
17c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org#
18c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
30c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org"""A JavaScript minifier.
31c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
32c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgIt is far from being a complete JS parser, so there are many valid
33c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgJavaScript programs that will be ruined by it.  Another strangeness is that
34c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgit accepts $ and % as parts of identifiers.  It doesn't merge lines or strip
35c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgout blank lines in order to ease debugging.  Variables at the top scope are
36c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgproperties of the global object so we can't rename them.  It is assumed that
37c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgyou introduce variables with var as if JavaScript followed C++ scope rules
38c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgaround curly braces, so the declaration must be above the first use.
39c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
40c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgUse as:
41c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgimport jsmin
42c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgminifier = JavaScriptMinifier()
43c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgprogram1 = minifier.JSMinify(program1)
44c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgprogram2 = minifier.JSMinify(program2)
45c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org"""
46c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
47c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgimport re
48c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
49c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
50c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.orgclass JavaScriptMinifier(object):
51c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org  """An object that you can feed code snippets to to get them minified."""
52c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
53c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org  def __init__(self):
54c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    # We prepopulate the list of identifiers that shouldn't be used.  These
55c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    # short language keywords could otherwise be used by the script as variable
56c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    # names.
57c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    self.seen_identifiers = {"do": True, "in": True}
58c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    self.identifier_counter = 0
59c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    self.in_comment = False
60c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    self.map = {}
61c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    self.nesting = 0
62c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
63c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org  def LookAtIdentifier(self, m):
64c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    """Records identifiers or keywords that we see in use.
65c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
66c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    (So we can avoid renaming variables to these strings.)
67c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    Args:
68c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      m: The match object returned by re.search.
69c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
70c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    Returns:
71c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      Nothing.
72c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    """
73c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    identifier = m.group(1)
74c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    self.seen_identifiers[identifier] = True
75c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
76c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org  def Push(self):
77c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    """Called when we encounter a '{'."""
78c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    self.nesting += 1
79c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
80c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org  def Pop(self):
81c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    """Called when we encounter a '}'."""
82c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    self.nesting -= 1
83c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    # We treat each top-level opening brace as a single scope that can span
84c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    # several sets of nested braces.
85c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    if self.nesting == 0:
86c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      self.map = {}
87c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      self.identifier_counter = 0
88c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
89c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org  def Declaration(self, m):
90c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    """Rewrites bits of the program selected by a regexp.
91c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
92c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    These can be curly braces, literal strings, function declarations and var
93c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    declarations.  (These last two must be on one line including the opening
94c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    curly brace of the function for their variables to be renamed).
95c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
96c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    Args:
97c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      m: The match object returned by re.search.
98c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
99c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    Returns:
100c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      The string that should replace the match in the rewritten program.
101c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    """
102c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    matched_text = m.group(0)
103c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    if matched_text == "{":
104c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      self.Push()
105c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      return matched_text
106c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    if matched_text == "}":
107c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      self.Pop()
108c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      return matched_text
109c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    if re.match("[\"'/]", matched_text):
110c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      return matched_text
111c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    m = re.match(r"var ", matched_text)
112c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    if m:
113c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      var_names = matched_text[m.end():]
114c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      var_names = re.split(r",", var_names)
115c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      return "var " + ",".join(map(self.FindNewName, var_names))
116c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text)
117c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    if m:
118c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      up_to_args = m.group(1)
119c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      args = m.group(2)
120c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      args = re.split(r",", args)
121c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      self.Push()
122c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){"
123c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
124c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    if matched_text in self.map:
125c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      return self.map[matched_text]
126c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
127c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    return matched_text
128c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
129c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org  def CharFromNumber(self, number):
130c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    """A single-digit base-52 encoding using a-zA-Z."""
131c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    if number < 26:
132c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      return chr(number + 97)
133c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    number -= 26
134c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    return chr(number + 65)
135c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
136c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org  def FindNewName(self, var_name):
137c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    """Finds a new 1-character or 2-character name for a variable.
138c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
139c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    Enters it into the mapping table for this scope.
140c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
141c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    Args:
142c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      var_name: The name of the variable before renaming.
143c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
144c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    Returns:
145c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      The new name of the variable.
146c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    """
147c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    new_identifier = ""
148c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    # Variable names that end in _ are member variables of the global object,
149c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    # so they can be visible from code in a different scope.  We leave them
150c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    # alone.
151c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    if var_name in self.map:
152c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      return self.map[var_name]
153c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    if self.nesting == 0:
154c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      return var_name
155c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    while True:
156c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      identifier_first_char = self.identifier_counter % 52
1570e3f88bd850f46930aa95684377fab02a394ae41ulan@chromium.org      identifier_second_char = self.identifier_counter // 52
158c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      new_identifier = self.CharFromNumber(identifier_first_char)
159c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      if identifier_second_char != 0:
160c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org        new_identifier = (
161c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org            self.CharFromNumber(identifier_second_char - 1) + new_identifier)
162c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      self.identifier_counter += 1
163c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      if not new_identifier in self.seen_identifiers:
164c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org        break
165c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
166c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    self.map[var_name] = new_identifier
167c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    return new_identifier
168c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
169c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org  def RemoveSpaces(self, m):
170c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    """Returns literal strings unchanged, replaces other inputs with group 2.
171c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
172c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    Other inputs are replaced with the contents of capture 1.  This is either
173c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    a single space or an empty string.
174c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
175c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    Args:
176c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      m: The match object returned by re.search.
177c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
178c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    Returns:
179c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      The string that should be inserted instead of the matched text.
180c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    """
181c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    entire_match = m.group(0)
182c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    replacement = m.group(1)
183c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    if re.match(r"'.*'$", entire_match):
184c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      return entire_match
185c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    if re.match(r'".*"$', entire_match):
186c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      return entire_match
187c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    if re.match(r"/.+/$", entire_match):
188c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      return entire_match
189c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    return replacement
190c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
191c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org  def JSMinify(self, text):
192c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    """The main entry point.  Takes a text and returns a compressed version.
193c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
194c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    The compressed version hopefully does the same thing.  Line breaks are
195c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    preserved.
196c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
197c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    Args:
198c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      text: The text of the code snippet as a multiline string.
199c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
200c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    Returns:
201c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      The compressed text of the code snippet as a multiline string.
202c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    """
203c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    new_lines = []
204c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    for line in re.split(r"\n", text):
205c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      line = line.replace("\t", " ")
206c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      if self.in_comment:
207c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org        m = re.search(r"\*/", line)
208c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org        if m:
209c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org          line = line[m.end():]
210c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org          self.in_comment = False
211c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org        else:
212c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org          new_lines.append("")
213c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org          continue
214c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
215c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      if not self.in_comment:
216c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org        line = re.sub(r"/\*.*?\*/", " ", line)
217c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org        line = re.sub(r"//.*", "", line)
218c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org        m = re.search(r"/\*", line)
219c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org        if m:
220c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org          line = line[:m.start()]
221c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org          self.in_comment = True
222c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
223c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # Strip leading and trailing spaces.
224c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      line = re.sub(r"^ +", "", line)
225c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      line = re.sub(r" +$", "", line)
226c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # A regexp that matches a literal string surrounded by "double quotes".
227c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # This regexp can handle embedded backslash-escaped characters including
228c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # embedded backslash-escaped double quotes.
229c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      double_quoted_string = r'"(?:[^"\\]|\\.)*"'
230c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # A regexp that matches a literal string surrounded by 'double quotes'.
231c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      single_quoted_string = r"'(?:[^'\\]|\\.)*'"
232c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # A regexp that matches a regexp literal surrounded by /slashes/.
2330c20e676f8a0209982ff89e5a9c707771748a585fschneider@chromium.org      # Don't allow a regexp to have a ) before the first ( since that's a
2340c20e676f8a0209982ff89e5a9c707771748a585fschneider@chromium.org      # syntax error and it's probably just two unrelated slashes.
235bbceb57d27ec53f6d6212e690ead3174192ea3f9erik.corry@gmail.com      # Also don't allow it to come after anything that can only be the
236bbceb57d27ec53f6d6212e690ead3174192ea3f9erik.corry@gmail.com      # end of a primary expression.
237bbceb57d27ec53f6d6212e690ead3174192ea3f9erik.corry@gmail.com      slash_quoted_regexp = r"(?<![\w$'\")\]])/(?:(?=\()|(?:[^()/\\]|\\.)+)(?:\([^/\\]|\\.)*/"
238c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # Replace multiple spaces with a single space.
239c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      line = re.sub("|".join([double_quoted_string,
240c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                              single_quoted_string,
241c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                              slash_quoted_regexp,
242c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                              "( )+"]),
243c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                    self.RemoveSpaces,
244c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                    line)
245c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # Strip single spaces unless they have an identifier character both before
246c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # and after the space.  % and $ are counted as identifier characters.
247c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      line = re.sub("|".join([double_quoted_string,
248c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                              single_quoted_string,
249c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                              slash_quoted_regexp,
250c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                              r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]),
251c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                    self.RemoveSpaces,
252c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                    line)
253c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # Collect keywords and identifiers that are already in use.
254c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      if self.nesting == 0:
255c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org        re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line)
256c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      function_declaration_regexp = (
257c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org          r"\bfunction"              # Function definition keyword...
258c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org          r"( [\w$%]+)?"             # ...optional function name...
259c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org          r"\([\w$%,]+\)\{")         # ...argument declarations.
260c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # Unfortunately the keyword-value syntax { key:value } makes the key look
261c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # like a variable where in fact it is a literal string.  We use the
262c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # presence or absence of a question mark to try to distinguish between
263c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # this case and the ternary operator: "condition ? iftrue : iffalse".
264c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      if re.search(r"\?", line):
265c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org        block_trailing_colon = r""
266c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      else:
267c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org        block_trailing_colon = r"(?![:\w$%])"
268c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      # Variable use.  Cannot follow a period precede a colon.
269c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon
270c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      line = re.sub("|".join([double_quoted_string,
271c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                              single_quoted_string,
272c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                              slash_quoted_regexp,
273c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                              r"\{",                  # Curly braces.
274c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                              r"\}",
275c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                              r"\bvar [\w$%,]+",      # var declarations.
276c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                              function_declaration_regexp,
277c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                              variable_use_regexp]),
278c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                    self.Declaration,
279c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org                    line)
280c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org      new_lines.append(line)
281c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org
282c514574143c1bf74d4fb6e7dccb175fe9ff2f5d3sgjesse@chromium.org    return "\n".join(new_lines) + "\n"
283