1#!/usr/bin/env python
2# -*- coding: ascii -*-
3r"""
4=====================
5 Javascript Minifier
6=====================
7
8rJSmin is a javascript minifier written in python.
9
10The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\\.
11
12:Copyright:
13
14 Copyright 2011 - 2014
15 Andr\xe9 Malo or his licensors, as applicable
16
17:License:
18
19 Licensed under the Apache License, Version 2.0 (the "License");
20 you may not use this file except in compliance with the License.
21 You may obtain a copy of the License at
22
23     http://www.apache.org/licenses/LICENSE-2.0
24
25 Unless required by applicable law or agreed to in writing, software
26 distributed under the License is distributed on an "AS IS" BASIS,
27 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 See the License for the specific language governing permissions and
29 limitations under the License.
30
31The module is a re-implementation aiming for speed, so it can be used at
32runtime (rather than during a preprocessing step). Usually it produces the
33same results as the original ``jsmin.c``. It differs in the following ways:
34
35- there is no error detection: unterminated string, regex and comment
36  literals are treated as regular javascript code and minified as such.
37- Control characters inside string and regex literals are left untouched; they
38  are not converted to spaces (nor to \\n)
39- Newline characters are not allowed inside string and regex literals, except
40  for line continuations in string literals (ECMA-5).
41- "return /regex/" is recognized correctly.
42- "+ +" and "- -" sequences are not collapsed to '++' or '--'
43- Newlines before ! operators are removed more sensibly
44- Comments starting with an exclamation mark (``!``) can be kept optionally
45- rJSmin does not handle streams, but only complete strings. (However, the
46  module provides a "streamy" interface).
47
48Since most parts of the logic are handled by the regex engine it's way faster
49than the original python port of ``jsmin.c`` by Baruch Even. The speed factor
50varies between about 6 and 55 depending on input and python version (it gets
51faster the more compressed the input already is). Compared to the
52speed-refactored python port by Dave St.Germain the performance gain is less
53dramatic but still between 3 and 50 (for huge inputs). See the docs/BENCHMARKS
54file for details.
55
56rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more.
57
58Both python 2 and python 3 are supported.
59
60.. _jsmin.c by Douglas Crockford:
61   http://www.crockford.com/javascript/jsmin.c
62"""
63if __doc__:
64    # pylint: disable = W0622
65    __doc__ = __doc__.encode('ascii').decode('unicode_escape')
66__author__ = r"Andr\xe9 Malo".encode('ascii').decode('unicode_escape')
67__docformat__ = "restructuredtext en"
68__license__ = "Apache License, Version 2.0"
69__version__ = '1.0.10'
70__all__ = ['jsmin']
71
72import re as _re
73
74
75def _make_jsmin(python_only=False):
76    """
77    Generate JS minifier based on `jsmin.c by Douglas Crockford`_
78
79    .. _jsmin.c by Douglas Crockford:
80       http://www.crockford.com/javascript/jsmin.c
81
82    :Parameters:
83      `python_only` : ``bool``
84        Use only the python variant. If true, the c extension is not even
85        tried to be loaded.
86
87    :Return: Minifier
88    :Rtype: ``callable``
89    """
90    # pylint: disable = R0912, R0914, W0612
91
92    if not python_only:
93        try:
94            import _rjsmin  # pylint: disable = F0401
95        except ImportError:
96            pass
97        else:
98            return _rjsmin.jsmin
99    try:
100        xrange
101    except NameError:
102        xrange = range  # pylint: disable = W0622
103
104    space_chars = r'[\000-\011\013\014\016-\040]'
105
106    line_comment = r'(?://[^\r\n]*)'
107    space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
108    space_comment_nobang = r'(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)'
109    bang_comment = r'(?:/\*![^*]*\*+(?:[^/*][^*]*\*+)*/)'
110
111    string1 = \
112        r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
113    string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
114    strings = r'(?:%s|%s)' % (string1, string2)
115
116    charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
117    nospecial = r'[^/\\\[\r\n]'
118    regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
119        nospecial, charclass, nospecial
120    )
121    space = r'(?:%s|%s)' % (space_chars, space_comment)
122    space_nobang = r'(?:%s|%s)' % (space_chars, space_comment_nobang)
123    newline = r'(?:%s?[\r\n])' % line_comment
124
125    def fix_charclass(result):
126        """ Fixup string of chars to fit into a regex char class """
127        pos = result.find('-')
128        if pos >= 0:
129            result = r'%s%s-' % (result[:pos], result[pos + 1:])
130
131        def sequentize(string):
132            """
133            Notate consecutive characters as sequence
134
135            (1-4 instead of 1234)
136            """
137            first, last, result = None, None, []
138            for char in map(ord, string):
139                if last is None:
140                    first = last = char
141                elif last + 1 == char:
142                    last = char
143                else:
144                    result.append((first, last))
145                    first = last = char
146            if last is not None:
147                result.append((first, last))
148            return ''.join(['%s%s%s' % (
149                chr(first),
150                last > first + 1 and '-' or '',
151                last != first and chr(last) or ''
152            ) for first, last in result])
153
154        return _re.sub(
155            r'([\000-\040\047])',  # \047 for better portability
156            lambda m: '\\%03o' % ord(m.group(1)), (
157                sequentize(result)
158                .replace('\\', '\\\\')
159                .replace('[', '\\[')
160                .replace(']', '\\]')
161            )
162        )
163
164    def id_literal_(what):
165        """ Make id_literal like char class """
166        match = _re.compile(what).match
167        result = ''.join([
168            chr(c) for c in xrange(127) if not match(chr(c))
169        ])
170        return '[^%s]' % fix_charclass(result)
171
172    def not_id_literal_(keep):
173        """ Make negated id_literal like char class """
174        match = _re.compile(id_literal_(keep)).match
175        result = ''.join([
176            chr(c) for c in xrange(127) if not match(chr(c))
177        ])
178        return r'[%s]' % fix_charclass(result)
179
180    not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
181    preregex1 = r'[(,=:\[!&|?{};\r\n]'
182    preregex2 = r'%(not_id_literal)sreturn' % locals()
183
184    id_literal = id_literal_(r'[a-zA-Z0-9_$]')
185    id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]')
186    id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
187
188    dull = r'[^\047"/\000-\040]'
189
190    space_sub_simple = _re.compile((
191        # noqa pylint: disable = C0330
192
193        r'(%(dull)s+)'
194        r'|(%(strings)s%(dull)s*)'
195        r'|(?<=%(preregex1)s)'
196            r'%(space)s*(?:%(newline)s%(space)s*)*'
197            r'(%(regex)s%(dull)s*)'
198        r'|(?<=%(preregex2)s)'
199            r'%(space)s*(?:%(newline)s%(space)s)*'
200            r'(%(regex)s%(dull)s*)'
201        r'|(?<=%(id_literal_close)s)'
202            r'%(space)s*(?:(%(newline)s)%(space)s*)+'
203            r'(?=%(id_literal_open)s)'
204        r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
205        r'|(?<=\+)(%(space)s)+(?=\+)'
206        r'|(?<=-)(%(space)s)+(?=-)'
207        r'|%(space)s+'
208        r'|(?:%(newline)s%(space)s*)+'
209    ) % locals()).sub
210    #print space_sub_simple.__self__.pattern
211
212    def space_subber_simple(match):
213        """ Substitution callback """
214        # pylint: disable = R0911
215
216        groups = match.groups()
217        if groups[0]:
218            return groups[0]
219        elif groups[1]:
220            return groups[1]
221        elif groups[2]:
222            return groups[2]
223        elif groups[3]:
224            return groups[3]
225        elif groups[4]:
226            return '\n'
227        elif groups[5] or groups[6] or groups[7]:
228            return ' '
229        else:
230            return ''
231
232    space_sub_banged = _re.compile((
233        # noqa pylint: disable = C0330
234
235        r'(%(dull)s+)'
236        r'|(%(strings)s%(dull)s*)'
237        r'|(%(bang_comment)s%(dull)s*)'
238        r'|(?<=%(preregex1)s)'
239            r'%(space)s*(?:%(newline)s%(space)s*)*'
240            r'(%(regex)s%(dull)s*)'
241        r'|(?<=%(preregex2)s)'
242            r'%(space)s*(?:%(newline)s%(space)s)*'
243            r'(%(regex)s%(dull)s*)'
244        r'|(?<=%(id_literal_close)s)'
245            r'%(space)s*(?:(%(newline)s)%(space)s*)+'
246            r'(?=%(id_literal_open)s)'
247        r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
248        r'|(?<=\+)(%(space)s)+(?=\+)'
249        r'|(?<=-)(%(space)s)+(?=-)'
250        r'|%(space)s+'
251        r'|(?:%(newline)s%(space)s*)+'
252    ) % dict(locals(), space=space_nobang)).sub
253    #print space_sub_banged.__self__.pattern
254
255    def space_subber_banged(match):
256        """ Substitution callback """
257        # pylint: disable = R0911
258
259        groups = match.groups()
260        if groups[0]:
261            return groups[0]
262        elif groups[1]:
263            return groups[1]
264        elif groups[2]:
265            return groups[2]
266        elif groups[3]:
267            return groups[3]
268        elif groups[4]:
269            return groups[4]
270        elif groups[5]:
271            return '\n'
272        elif groups[6] or groups[7] or groups[8]:
273            return ' '
274        else:
275            return ''
276
277    def jsmin(script, keep_bang_comments=False):  # pylint: disable = W0621
278        r"""
279        Minify javascript based on `jsmin.c by Douglas Crockford`_\.
280
281        Instead of parsing the stream char by char, it uses a regular
282        expression approach which minifies the whole script with one big
283        substitution regex.
284
285        .. _jsmin.c by Douglas Crockford:
286           http://www.crockford.com/javascript/jsmin.c
287
288        :Parameters:
289          `script` : ``str``
290            Script to minify
291
292          `keep_bang_comments` : ``bool``
293            Keep comments starting with an exclamation mark? (``/*!...*/``)
294
295        :Return: Minified script
296        :Rtype: ``str``
297        """
298        if keep_bang_comments:
299            return space_sub_banged(
300                space_subber_banged, '\n%s\n' % script
301            ).strip()
302        else:
303            return space_sub_simple(
304                space_subber_simple, '\n%s\n' % script
305            ).strip()
306
307    return jsmin
308
309jsmin = _make_jsmin()
310
311
312def jsmin_for_posers(script, keep_bang_comments=False):
313    r"""
314    Minify javascript based on `jsmin.c by Douglas Crockford`_\.
315
316    Instead of parsing the stream char by char, it uses a regular
317    expression approach which minifies the whole script with one big
318    substitution regex.
319
320    .. _jsmin.c by Douglas Crockford:
321       http://www.crockford.com/javascript/jsmin.c
322
323    :Warning: This function is the digest of a _make_jsmin() call. It just
324              utilizes the resulting regexes. It's here for fun and may
325              vanish any time. Use the `jsmin` function instead.
326
327    :Parameters:
328      `script` : ``str``
329        Script to minify
330
331      `keep_bang_comments` : ``bool``
332        Keep comments starting with an exclamation mark? (``/*!...*/``)
333
334    :Return: Minified script
335    :Rtype: ``str``
336    """
337    if not keep_bang_comments:
338        rex = (
339            r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
340            r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
341            r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?'
342            r'{};\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*'
343            r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0'
344            r'14\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r'
345            r'\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r'
346            r'\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<'
347            r'=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\016-\04'
348            r'0]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?['
349            r'\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^'
350            r'*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:'
351            r'\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)['
352            r'^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])(?:[\000'
353            r'-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?'
354            r':((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]|(?'
355            r':/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,.'
356            r'/:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\0'
357            r'13\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\00'
358            r'0-#%-,./:-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]'
359            r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-'
360            r'\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?'
361            r'=-)|(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]'
362            r'*\*+)*/))+|(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\0'
363            r'16-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
364        )
365
366        def subber(match):
367            """ Substitution callback """
368            groups = match.groups()
369            return (
370                groups[0] or
371                groups[1] or
372                groups[2] or
373                groups[3] or
374                (groups[4] and '\n') or
375                (groups[5] and ' ') or
376                (groups[6] and ' ') or
377                (groups[7] and ' ') or
378                ''
379            )
380    else:
381        rex = (
382            r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
383            r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
384            r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|((?:/\*![^*]*\*'
385            r'+(?:[^/*][^*]*\*+)*/)[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?{};\r'
386            r'\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*'
387            r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0'
388            r'14\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/('
389            r'?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:'
390            r'\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]'
391            r'*)|(?<=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\0'
392            r'16-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://['
393            r'^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*'
394            r']*\*+(?:[^/*][^*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:('
395            r'?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/'
396            r'\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\'
397            r'^`{|~])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:['
398            r'^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011'
399            r'\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
400            r'(?=[^\000-\040"#%-\047)*,./:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@'
401            r'\[-^`{-~-])((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*'
402            r'+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./:-@\[-^`{-~-])|(?<=\+)'
403            r'((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^'
404            r'*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013\014\016-\040]|(?:'
405            r'/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[\000-\011\013'
406            r'\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?'
407            r':(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*('
408            r'?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
409        )
410
411        def subber(match):
412            """ Substitution callback """
413            groups = match.groups()
414            return (
415                groups[0] or
416                groups[1] or
417                groups[2] or
418                groups[3] or
419                groups[4] or
420                (groups[5] and '\n') or
421                (groups[6] and ' ') or
422                (groups[7] and ' ') or
423                (groups[8] and ' ') or
424                ''
425            )
426
427    return _re.sub(rex, subber, '\n%s\n' % script).strip()
428
429
430if __name__ == '__main__':
431    def main():
432        """ Main """
433        import sys as _sys
434        keep_bang_comments = (
435            '-b' in _sys.argv[1:]
436            or '-bp' in _sys.argv[1:]
437            or '-pb' in _sys.argv[1:]
438        )
439        if '-p' in _sys.argv[1:] or '-bp' in _sys.argv[1:] \
440                or '-pb' in _sys.argv[1:]:
441            global jsmin  # pylint: disable = W0603
442            jsmin = _make_jsmin(python_only=True)
443        _sys.stdout.write(jsmin(
444            _sys.stdin.read(), keep_bang_comments=keep_bang_comments
445        ))
446    main()
447