1a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#!/usr/bin/python2.4 2a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# Copyright 2009 the V8 project authors. All rights reserved. 4a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# Redistribution and use in source and binary forms, with or without 5a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# modification, are permitted provided that the following conditions are 6a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# met: 7a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# 8a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# * Redistributions of source code must retain the above copyright 9a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# notice, this list of conditions and the following disclaimer. 10a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# * Redistributions in binary form must reproduce the above 11a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# copyright notice, this list of conditions and the following 12a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# disclaimer in the documentation and/or other materials provided 13a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# with the distribution. 14a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# * Neither the name of Google Inc. nor the names of its 15a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# contributors may be used to endorse or promote products derived 16a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# from this software without specific prior written permission. 17a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# 18a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 30a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block"""A JavaScript minifier. 31a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 32a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockIt is far from being a complete JS parser, so there are many valid 33a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockJavaScript programs that will be ruined by it. Another strangeness is that 34a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockit accepts $ and % as parts of identifiers. It doesn't merge lines or strip 35a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockout blank lines in order to ease debugging. Variables at the top scope are 36a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockproperties of the global object so we can't rename them. It is assumed that 37a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockyou introduce variables with var as if JavaScript followed C++ scope rules 38a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockaround curly braces, so the declaration must be above the first use. 39a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 40a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockUse as: 41a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockimport jsmin 42a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockminifier = JavaScriptMinifier() 43a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockprogram1 = minifier.JSMinify(program1) 44a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockprogram2 = minifier.JSMinify(program2) 45a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block""" 46a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 47a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockimport re 48a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 49a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 50a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass JavaScriptMinifier(object): 51a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """An object that you can feed code snippets to to get them minified.""" 52a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 53a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block def __init__(self): 54a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # We prepopulate the list of identifiers that shouldn't be used. These 55a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # short language keywords could otherwise be used by the script as variable 56a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # names. 57a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.seen_identifiers = {"do": True, "in": True} 58a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.identifier_counter = 0 59a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.in_comment = False 60a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.map = {} 61a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.nesting = 0 62a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 63a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block def LookAtIdentifier(self, m): 64a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """Records identifiers or keywords that we see in use. 65a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 66a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block (So we can avoid renaming variables to these strings.) 67a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Args: 68a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block m: The match object returned by re.search. 69a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 70a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Returns: 71a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Nothing. 72a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """ 73a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block identifier = m.group(1) 74a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.seen_identifiers[identifier] = True 75a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 76a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block def Push(self): 77a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """Called when we encounter a '{'.""" 78a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.nesting += 1 79a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 80a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block def Pop(self): 81a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """Called when we encounter a '}'.""" 82a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.nesting -= 1 83a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # We treat each top-level opening brace as a single scope that can span 84a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # several sets of nested braces. 85a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if self.nesting == 0: 86a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.map = {} 87a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.identifier_counter = 0 88a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 89a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block def Declaration(self, m): 90a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """Rewrites bits of the program selected by a regexp. 91a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 92a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block These can be curly braces, literal strings, function declarations and var 93a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block declarations. (These last two must be on one line including the opening 94a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block curly brace of the function for their variables to be renamed). 95a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 96a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Args: 97a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block m: The match object returned by re.search. 98a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 99a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Returns: 100a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block The string that should replace the match in the rewritten program. 101a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """ 102a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block matched_text = m.group(0) 103a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if matched_text == "{": 104a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.Push() 105a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return matched_text 106a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if matched_text == "}": 107a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.Pop() 108a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return matched_text 109a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if re.match("[\"'/]", matched_text): 110a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return matched_text 111a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block m = re.match(r"var ", matched_text) 112a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if m: 113a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block var_names = matched_text[m.end():] 114a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block var_names = re.split(r",", var_names) 115a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return "var " + ",".join(map(self.FindNewName, var_names)) 116a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text) 117a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if m: 118a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block up_to_args = m.group(1) 119a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block args = m.group(2) 120a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block args = re.split(r",", args) 121a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.Push() 122a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){" 123a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 124a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if matched_text in self.map: 125a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return self.map[matched_text] 126a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 127a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return matched_text 128a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 129a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block def CharFromNumber(self, number): 130a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """A single-digit base-52 encoding using a-zA-Z.""" 131a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if number < 26: 132a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return chr(number + 97) 133a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block number -= 26 134a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return chr(number + 65) 135a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 136a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block def FindNewName(self, var_name): 137a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """Finds a new 1-character or 2-character name for a variable. 138a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 139a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Enters it into the mapping table for this scope. 140a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 141a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Args: 142a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block var_name: The name of the variable before renaming. 143a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 144a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Returns: 145a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block The new name of the variable. 146a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """ 147a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_identifier = "" 148a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # Variable names that end in _ are member variables of the global object, 149a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # so they can be visible from code in a different scope. We leave them 150a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # alone. 151a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if var_name in self.map: 152a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return self.map[var_name] 153a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if self.nesting == 0: 154a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return var_name 155a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block while True: 156a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block identifier_first_char = self.identifier_counter % 52 157a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block identifier_second_char = self.identifier_counter / 52 158a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_identifier = self.CharFromNumber(identifier_first_char) 159a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if identifier_second_char != 0: 160a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_identifier = ( 161a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.CharFromNumber(identifier_second_char - 1) + new_identifier) 162a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.identifier_counter += 1 163a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if not new_identifier in self.seen_identifiers: 164a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break 165a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 166a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.map[var_name] = new_identifier 167a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return new_identifier 168a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 169a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block def RemoveSpaces(self, m): 170a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """Returns literal strings unchanged, replaces other inputs with group 2. 171a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 172a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Other inputs are replaced with the contents of capture 1. This is either 173a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block a single space or an empty string. 174a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 175a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Args: 176a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block m: The match object returned by re.search. 177a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 178a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Returns: 179a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block The string that should be inserted instead of the matched text. 180a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """ 181a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block entire_match = m.group(0) 182a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block replacement = m.group(1) 183a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if re.match(r"'.*'$", entire_match): 184a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return entire_match 185a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if re.match(r'".*"$', entire_match): 186a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return entire_match 187a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if re.match(r"/.+/$", entire_match): 188a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return entire_match 189a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return replacement 190a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 191a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block def JSMinify(self, text): 192a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """The main entry point. Takes a text and returns a compressed version. 193a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 194a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block The compressed version hopefully does the same thing. Line breaks are 195a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block preserved. 196a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 197a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Args: 198a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block text: The text of the code snippet as a multiline string. 199a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 200a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Returns: 201a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block The compressed text of the code snippet as a multiline string. 202a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block """ 203a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_lines = [] 204a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for line in re.split(r"\n", text): 205a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block line = line.replace("\t", " ") 206a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if self.in_comment: 207a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block m = re.search(r"\*/", line) 208a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if m: 209a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block line = line[m.end():] 210a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.in_comment = False 211a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block else: 212a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_lines.append("") 213a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block continue 214a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 215a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if not self.in_comment: 216a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block line = re.sub(r"/\*.*?\*/", " ", line) 217a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block line = re.sub(r"//.*", "", line) 218a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block m = re.search(r"/\*", line) 219a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if m: 220a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block line = line[:m.start()] 221a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.in_comment = True 222a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 223a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # Strip leading and trailing spaces. 224a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block line = re.sub(r"^ +", "", line) 225a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block line = re.sub(r" +$", "", line) 226a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # A regexp that matches a literal string surrounded by "double quotes". 227a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # This regexp can handle embedded backslash-escaped characters including 228a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # embedded backslash-escaped double quotes. 229a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block double_quoted_string = r'"(?:[^"\\]|\\.)*"' 230a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # A regexp that matches a literal string surrounded by 'double quotes'. 231a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block single_quoted_string = r"'(?:[^'\\]|\\.)*'" 232a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # A regexp that matches a regexp literal surrounded by /slashes/. 233e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke # Don't allow a regexp to have a ) before the first ( since that's a 234e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke # syntax error and it's probably just two unrelated slashes. 2353ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch # Also don't allow it to come after anything that can only be the 2363ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch # end of a primary expression. 2373ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch slash_quoted_regexp = r"(?<![\w$'\")\]])/(?:(?=\()|(?:[^()/\\]|\\.)+)(?:\([^/\\]|\\.)*/" 238a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # Replace multiple spaces with a single space. 239a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block line = re.sub("|".join([double_quoted_string, 240a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block single_quoted_string, 241a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block slash_quoted_regexp, 242a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block "( )+"]), 243a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.RemoveSpaces, 244a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block line) 245a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # Strip single spaces unless they have an identifier character both before 246a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # and after the space. % and $ are counted as identifier characters. 247a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block line = re.sub("|".join([double_quoted_string, 248a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block single_quoted_string, 249a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block slash_quoted_regexp, 250a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]), 251a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.RemoveSpaces, 252a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block line) 253a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # Collect keywords and identifiers that are already in use. 254a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if self.nesting == 0: 255a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line) 256a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block function_declaration_regexp = ( 257a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block r"\bfunction" # Function definition keyword... 258a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block r"( [\w$%]+)?" # ...optional function name... 259a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block r"\([\w$%,]+\)\{") # ...argument declarations. 260a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # Unfortunately the keyword-value syntax { key:value } makes the key look 261a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # like a variable where in fact it is a literal string. We use the 262a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # presence or absence of a question mark to try to distinguish between 263a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # this case and the ternary operator: "condition ? iftrue : iffalse". 264a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if re.search(r"\?", line): 265a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block block_trailing_colon = r"" 266a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block else: 267a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block block_trailing_colon = r"(?![:\w$%])" 268a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block # Variable use. Cannot follow a period precede a colon. 269a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon 270a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block line = re.sub("|".join([double_quoted_string, 271a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block single_quoted_string, 272a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block slash_quoted_regexp, 273a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block r"\{", # Curly braces. 274a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block r"\}", 275a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block r"\bvar [\w$%,]+", # var declarations. 276a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block function_declaration_regexp, 277a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block variable_use_regexp]), 278a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block self.Declaration, 279a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block line) 280a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_lines.append(line) 281a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 282a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return "\n".join(new_lines) + "\n" 283