1#!/usr/bin/env python 2# -*- coding: ascii -*- 3r""" 4===================== 5 Javascript Minifier 6===================== 7 8rJSmin is a javascript minifier written in python. 9 10The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\\. 11 12:Copyright: 13 14 Copyright 2011 - 2014 15 Andr\xe9 Malo or his licensors, as applicable 16 17:License: 18 19 Licensed under the Apache License, Version 2.0 (the "License"); 20 you may not use this file except in compliance with the License. 21 You may obtain a copy of the License at 22 23 http://www.apache.org/licenses/LICENSE-2.0 24 25 Unless required by applicable law or agreed to in writing, software 26 distributed under the License is distributed on an "AS IS" BASIS, 27 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 28 See the License for the specific language governing permissions and 29 limitations under the License. 30 31The module is a re-implementation aiming for speed, so it can be used at 32runtime (rather than during a preprocessing step). Usually it produces the 33same results as the original ``jsmin.c``. It differs in the following ways: 34 35- there is no error detection: unterminated string, regex and comment 36 literals are treated as regular javascript code and minified as such. 37- Control characters inside string and regex literals are left untouched; they 38 are not converted to spaces (nor to \\n) 39- Newline characters are not allowed inside string and regex literals, except 40 for line continuations in string literals (ECMA-5). 41- "return /regex/" is recognized correctly. 42- "+ +" and "- -" sequences are not collapsed to '++' or '--' 43- Newlines before ! operators are removed more sensibly 44- Comments starting with an exclamation mark (``!``) can be kept optionally 45- rJSmin does not handle streams, but only complete strings. (However, the 46 module provides a "streamy" interface). 47 48Since most parts of the logic are handled by the regex engine it's way faster 49than the original python port of ``jsmin.c`` by Baruch Even. The speed factor 50varies between about 6 and 55 depending on input and python version (it gets 51faster the more compressed the input already is). Compared to the 52speed-refactored python port by Dave St.Germain the performance gain is less 53dramatic but still between 3 and 50 (for huge inputs). See the docs/BENCHMARKS 54file for details. 55 56rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more. 57 58Both python 2 and python 3 are supported. 59 60.. _jsmin.c by Douglas Crockford: 61 http://www.crockford.com/javascript/jsmin.c 62""" 63if __doc__: 64 # pylint: disable = W0622 65 __doc__ = __doc__.encode('ascii').decode('unicode_escape') 66__author__ = r"Andr\xe9 Malo".encode('ascii').decode('unicode_escape') 67__docformat__ = "restructuredtext en" 68__license__ = "Apache License, Version 2.0" 69__version__ = '1.0.10' 70__all__ = ['jsmin'] 71 72import re as _re 73 74 75def _make_jsmin(python_only=False): 76 """ 77 Generate JS minifier based on `jsmin.c by Douglas Crockford`_ 78 79 .. _jsmin.c by Douglas Crockford: 80 http://www.crockford.com/javascript/jsmin.c 81 82 :Parameters: 83 `python_only` : ``bool`` 84 Use only the python variant. If true, the c extension is not even 85 tried to be loaded. 86 87 :Return: Minifier 88 :Rtype: ``callable`` 89 """ 90 # pylint: disable = R0912, R0914, W0612 91 92 if not python_only: 93 try: 94 import _rjsmin # pylint: disable = F0401 95 except ImportError: 96 pass 97 else: 98 return _rjsmin.jsmin 99 try: 100 xrange 101 except NameError: 102 xrange = range # pylint: disable = W0622 103 104 space_chars = r'[\000-\011\013\014\016-\040]' 105 106 line_comment = r'(?://[^\r\n]*)' 107 space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' 108 space_comment_nobang = r'(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)' 109 bang_comment = r'(?:/\*![^*]*\*+(?:[^/*][^*]*\*+)*/)' 110 111 string1 = \ 112 r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)' 113 string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")' 114 strings = r'(?:%s|%s)' % (string1, string2) 115 116 charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])' 117 nospecial = r'[^/\\\[\r\n]' 118 regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % ( 119 nospecial, charclass, nospecial 120 ) 121 space = r'(?:%s|%s)' % (space_chars, space_comment) 122 space_nobang = r'(?:%s|%s)' % (space_chars, space_comment_nobang) 123 newline = r'(?:%s?[\r\n])' % line_comment 124 125 def fix_charclass(result): 126 """ Fixup string of chars to fit into a regex char class """ 127 pos = result.find('-') 128 if pos >= 0: 129 result = r'%s%s-' % (result[:pos], result[pos + 1:]) 130 131 def sequentize(string): 132 """ 133 Notate consecutive characters as sequence 134 135 (1-4 instead of 1234) 136 """ 137 first, last, result = None, None, [] 138 for char in map(ord, string): 139 if last is None: 140 first = last = char 141 elif last + 1 == char: 142 last = char 143 else: 144 result.append((first, last)) 145 first = last = char 146 if last is not None: 147 result.append((first, last)) 148 return ''.join(['%s%s%s' % ( 149 chr(first), 150 last > first + 1 and '-' or '', 151 last != first and chr(last) or '' 152 ) for first, last in result]) 153 154 return _re.sub( 155 r'([\000-\040\047])', # \047 for better portability 156 lambda m: '\\%03o' % ord(m.group(1)), ( 157 sequentize(result) 158 .replace('\\', '\\\\') 159 .replace('[', '\\[') 160 .replace(']', '\\]') 161 ) 162 ) 163 164 def id_literal_(what): 165 """ Make id_literal like char class """ 166 match = _re.compile(what).match 167 result = ''.join([ 168 chr(c) for c in xrange(127) if not match(chr(c)) 169 ]) 170 return '[^%s]' % fix_charclass(result) 171 172 def not_id_literal_(keep): 173 """ Make negated id_literal like char class """ 174 match = _re.compile(id_literal_(keep)).match 175 result = ''.join([ 176 chr(c) for c in xrange(127) if not match(chr(c)) 177 ]) 178 return r'[%s]' % fix_charclass(result) 179 180 not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]') 181 preregex1 = r'[(,=:\[!&|?{};\r\n]' 182 preregex2 = r'%(not_id_literal)sreturn' % locals() 183 184 id_literal = id_literal_(r'[a-zA-Z0-9_$]') 185 id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]') 186 id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]') 187 188 dull = r'[^\047"/\000-\040]' 189 190 space_sub_simple = _re.compile(( 191 # noqa pylint: disable = C0330 192 193 r'(%(dull)s+)' 194 r'|(%(strings)s%(dull)s*)' 195 r'|(?<=%(preregex1)s)' 196 r'%(space)s*(?:%(newline)s%(space)s*)*' 197 r'(%(regex)s%(dull)s*)' 198 r'|(?<=%(preregex2)s)' 199 r'%(space)s*(?:%(newline)s%(space)s)*' 200 r'(%(regex)s%(dull)s*)' 201 r'|(?<=%(id_literal_close)s)' 202 r'%(space)s*(?:(%(newline)s)%(space)s*)+' 203 r'(?=%(id_literal_open)s)' 204 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)' 205 r'|(?<=\+)(%(space)s)+(?=\+)' 206 r'|(?<=-)(%(space)s)+(?=-)' 207 r'|%(space)s+' 208 r'|(?:%(newline)s%(space)s*)+' 209 ) % locals()).sub 210 #print space_sub_simple.__self__.pattern 211 212 def space_subber_simple(match): 213 """ Substitution callback """ 214 # pylint: disable = R0911 215 216 groups = match.groups() 217 if groups[0]: 218 return groups[0] 219 elif groups[1]: 220 return groups[1] 221 elif groups[2]: 222 return groups[2] 223 elif groups[3]: 224 return groups[3] 225 elif groups[4]: 226 return '\n' 227 elif groups[5] or groups[6] or groups[7]: 228 return ' ' 229 else: 230 return '' 231 232 space_sub_banged = _re.compile(( 233 # noqa pylint: disable = C0330 234 235 r'(%(dull)s+)' 236 r'|(%(strings)s%(dull)s*)' 237 r'|(%(bang_comment)s%(dull)s*)' 238 r'|(?<=%(preregex1)s)' 239 r'%(space)s*(?:%(newline)s%(space)s*)*' 240 r'(%(regex)s%(dull)s*)' 241 r'|(?<=%(preregex2)s)' 242 r'%(space)s*(?:%(newline)s%(space)s)*' 243 r'(%(regex)s%(dull)s*)' 244 r'|(?<=%(id_literal_close)s)' 245 r'%(space)s*(?:(%(newline)s)%(space)s*)+' 246 r'(?=%(id_literal_open)s)' 247 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)' 248 r'|(?<=\+)(%(space)s)+(?=\+)' 249 r'|(?<=-)(%(space)s)+(?=-)' 250 r'|%(space)s+' 251 r'|(?:%(newline)s%(space)s*)+' 252 ) % dict(locals(), space=space_nobang)).sub 253 #print space_sub_banged.__self__.pattern 254 255 def space_subber_banged(match): 256 """ Substitution callback """ 257 # pylint: disable = R0911 258 259 groups = match.groups() 260 if groups[0]: 261 return groups[0] 262 elif groups[1]: 263 return groups[1] 264 elif groups[2]: 265 return groups[2] 266 elif groups[3]: 267 return groups[3] 268 elif groups[4]: 269 return groups[4] 270 elif groups[5]: 271 return '\n' 272 elif groups[6] or groups[7] or groups[8]: 273 return ' ' 274 else: 275 return '' 276 277 def jsmin(script, keep_bang_comments=False): # pylint: disable = W0621 278 r""" 279 Minify javascript based on `jsmin.c by Douglas Crockford`_\. 280 281 Instead of parsing the stream char by char, it uses a regular 282 expression approach which minifies the whole script with one big 283 substitution regex. 284 285 .. _jsmin.c by Douglas Crockford: 286 http://www.crockford.com/javascript/jsmin.c 287 288 :Parameters: 289 `script` : ``str`` 290 Script to minify 291 292 `keep_bang_comments` : ``bool`` 293 Keep comments starting with an exclamation mark? (``/*!...*/``) 294 295 :Return: Minified script 296 :Rtype: ``str`` 297 """ 298 if keep_bang_comments: 299 return space_sub_banged( 300 space_subber_banged, '\n%s\n' % script 301 ).strip() 302 else: 303 return space_sub_simple( 304 space_subber_simple, '\n%s\n' % script 305 ).strip() 306 307 return jsmin 308 309jsmin = _make_jsmin() 310 311 312def jsmin_for_posers(script, keep_bang_comments=False): 313 r""" 314 Minify javascript based on `jsmin.c by Douglas Crockford`_\. 315 316 Instead of parsing the stream char by char, it uses a regular 317 expression approach which minifies the whole script with one big 318 substitution regex. 319 320 .. _jsmin.c by Douglas Crockford: 321 http://www.crockford.com/javascript/jsmin.c 322 323 :Warning: This function is the digest of a _make_jsmin() call. It just 324 utilizes the resulting regexes. It's here for fun and may 325 vanish any time. Use the `jsmin` function instead. 326 327 :Parameters: 328 `script` : ``str`` 329 Script to minify 330 331 `keep_bang_comments` : ``bool`` 332 Keep comments starting with an exclamation mark? (``/*!...*/``) 333 334 :Return: Minified script 335 :Rtype: ``str`` 336 """ 337 if not keep_bang_comments: 338 rex = ( 339 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]' 340 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]' 341 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?' 342 r'{};\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*' 343 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0' 344 r'14\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r' 345 r'\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r' 346 r'\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<' 347 r'=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\016-\04' 348 r'0]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[' 349 r'\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^' 350 r'*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:' 351 r'\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[' 352 r'^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])(?:[\000' 353 r'-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?' 354 r':((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]|(?' 355 r':/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,.' 356 r'/:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\0' 357 r'13\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\00' 358 r'0-#%-,./:-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]' 359 r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-' 360 r'\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?' 361 r'=-)|(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]' 362 r'*\*+)*/))+|(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\0' 363 r'16-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+' 364 ) 365 366 def subber(match): 367 """ Substitution callback """ 368 groups = match.groups() 369 return ( 370 groups[0] or 371 groups[1] or 372 groups[2] or 373 groups[3] or 374 (groups[4] and '\n') or 375 (groups[5] and ' ') or 376 (groups[6] and ' ') or 377 (groups[7] and ' ') or 378 '' 379 ) 380 else: 381 rex = ( 382 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]' 383 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]' 384 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|((?:/\*![^*]*\*' 385 r'+(?:[^/*][^*]*\*+)*/)[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?{};\r' 386 r'\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*' 387 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0' 388 r'14\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(' 389 r'?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:' 390 r'\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]' 391 r'*)|(?<=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\0' 392 r'16-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[' 393 r'^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*' 394 r']*\*+(?:[^/*][^*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(' 395 r'?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/' 396 r'\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\' 397 r'^`{|~])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[' 398 r'^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011' 399 r'\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+' 400 r'(?=[^\000-\040"#%-\047)*,./:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@' 401 r'\[-^`{-~-])((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*' 402 r'+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./:-@\[-^`{-~-])|(?<=\+)' 403 r'((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^' 404 r'*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013\014\016-\040]|(?:' 405 r'/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[\000-\011\013' 406 r'\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?' 407 r':(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(' 408 r'?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+' 409 ) 410 411 def subber(match): 412 """ Substitution callback """ 413 groups = match.groups() 414 return ( 415 groups[0] or 416 groups[1] or 417 groups[2] or 418 groups[3] or 419 groups[4] or 420 (groups[5] and '\n') or 421 (groups[6] and ' ') or 422 (groups[7] and ' ') or 423 (groups[8] and ' ') or 424 '' 425 ) 426 427 return _re.sub(rex, subber, '\n%s\n' % script).strip() 428 429 430if __name__ == '__main__': 431 def main(): 432 """ Main """ 433 import sys as _sys 434 keep_bang_comments = ( 435 '-b' in _sys.argv[1:] 436 or '-bp' in _sys.argv[1:] 437 or '-pb' in _sys.argv[1:] 438 ) 439 if '-p' in _sys.argv[1:] or '-bp' in _sys.argv[1:] \ 440 or '-pb' in _sys.argv[1:]: 441 global jsmin # pylint: disable = W0603 442 jsmin = _make_jsmin(python_only=True) 443 _sys.stdout.write(jsmin( 444 _sys.stdin.read(), keep_bang_comments=keep_bang_comments 445 )) 446 main() 447