1# -*- coding: utf-8 -*-
2"""
3    jinja2.ext
4    ~~~~~~~~~~
5
6    Jinja extensions allow to add custom tags similar to the way django custom
7    tags work.  By default two example extensions exist: an i18n and a cache
8    extension.
9
10    :copyright: (c) 2010 by the Jinja Team.
11    :license: BSD.
12"""
13from jinja2 import nodes
14from jinja2.defaults import BLOCK_START_STRING, \
15     BLOCK_END_STRING, VARIABLE_START_STRING, VARIABLE_END_STRING, \
16     COMMENT_START_STRING, COMMENT_END_STRING, LINE_STATEMENT_PREFIX, \
17     LINE_COMMENT_PREFIX, TRIM_BLOCKS, NEWLINE_SEQUENCE, \
18     KEEP_TRAILING_NEWLINE, LSTRIP_BLOCKS
19from jinja2.environment import Environment
20from jinja2.runtime import concat
21from jinja2.exceptions import TemplateAssertionError, TemplateSyntaxError
22from jinja2.utils import contextfunction, import_string, Markup
23from jinja2._compat import next, with_metaclass, string_types, iteritems
24
25
26# the only real useful gettext functions for a Jinja template.  Note
27# that ugettext must be assigned to gettext as Jinja doesn't support
28# non unicode strings.
29GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext')
30
31
32class ExtensionRegistry(type):
33    """Gives the extension an unique identifier."""
34
35    def __new__(cls, name, bases, d):
36        rv = type.__new__(cls, name, bases, d)
37        rv.identifier = rv.__module__ + '.' + rv.__name__
38        return rv
39
40
41class Extension(with_metaclass(ExtensionRegistry, object)):
42    """Extensions can be used to add extra functionality to the Jinja template
43    system at the parser level.  Custom extensions are bound to an environment
44    but may not store environment specific data on `self`.  The reason for
45    this is that an extension can be bound to another environment (for
46    overlays) by creating a copy and reassigning the `environment` attribute.
47
48    As extensions are created by the environment they cannot accept any
49    arguments for configuration.  One may want to work around that by using
50    a factory function, but that is not possible as extensions are identified
51    by their import name.  The correct way to configure the extension is
52    storing the configuration values on the environment.  Because this way the
53    environment ends up acting as central configuration storage the
54    attributes may clash which is why extensions have to ensure that the names
55    they choose for configuration are not too generic.  ``prefix`` for example
56    is a terrible name, ``fragment_cache_prefix`` on the other hand is a good
57    name as includes the name of the extension (fragment cache).
58    """
59
60    #: if this extension parses this is the list of tags it's listening to.
61    tags = set()
62
63    #: the priority of that extension.  This is especially useful for
64    #: extensions that preprocess values.  A lower value means higher
65    #: priority.
66    #:
67    #: .. versionadded:: 2.4
68    priority = 100
69
70    def __init__(self, environment):
71        self.environment = environment
72
73    def bind(self, environment):
74        """Create a copy of this extension bound to another environment."""
75        rv = object.__new__(self.__class__)
76        rv.__dict__.update(self.__dict__)
77        rv.environment = environment
78        return rv
79
80    def preprocess(self, source, name, filename=None):
81        """This method is called before the actual lexing and can be used to
82        preprocess the source.  The `filename` is optional.  The return value
83        must be the preprocessed source.
84        """
85        return source
86
87    def filter_stream(self, stream):
88        """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used
89        to filter tokens returned.  This method has to return an iterable of
90        :class:`~jinja2.lexer.Token`\s, but it doesn't have to return a
91        :class:`~jinja2.lexer.TokenStream`.
92
93        In the `ext` folder of the Jinja2 source distribution there is a file
94        called `inlinegettext.py` which implements a filter that utilizes this
95        method.
96        """
97        return stream
98
99    def parse(self, parser):
100        """If any of the :attr:`tags` matched this method is called with the
101        parser as first argument.  The token the parser stream is pointing at
102        is the name token that matched.  This method has to return one or a
103        list of multiple nodes.
104        """
105        raise NotImplementedError()
106
107    def attr(self, name, lineno=None):
108        """Return an attribute node for the current extension.  This is useful
109        to pass constants on extensions to generated template code.
110
111        ::
112
113            self.attr('_my_attribute', lineno=lineno)
114        """
115        return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno)
116
117    def call_method(self, name, args=None, kwargs=None, dyn_args=None,
118                    dyn_kwargs=None, lineno=None):
119        """Call a method of the extension.  This is a shortcut for
120        :meth:`attr` + :class:`jinja2.nodes.Call`.
121        """
122        if args is None:
123            args = []
124        if kwargs is None:
125            kwargs = []
126        return nodes.Call(self.attr(name, lineno=lineno), args, kwargs,
127                          dyn_args, dyn_kwargs, lineno=lineno)
128
129
130@contextfunction
131def _gettext_alias(__context, *args, **kwargs):
132    return __context.call(__context.resolve('gettext'), *args, **kwargs)
133
134
135def _make_new_gettext(func):
136    @contextfunction
137    def gettext(__context, __string, **variables):
138        rv = __context.call(func, __string)
139        if __context.eval_ctx.autoescape:
140            rv = Markup(rv)
141        return rv % variables
142    return gettext
143
144
145def _make_new_ngettext(func):
146    @contextfunction
147    def ngettext(__context, __singular, __plural, __num, **variables):
148        variables.setdefault('num', __num)
149        rv = __context.call(func, __singular, __plural, __num)
150        if __context.eval_ctx.autoescape:
151            rv = Markup(rv)
152        return rv % variables
153    return ngettext
154
155
156class InternationalizationExtension(Extension):
157    """This extension adds gettext support to Jinja2."""
158    tags = set(['trans'])
159
160    # TODO: the i18n extension is currently reevaluating values in a few
161    # situations.  Take this example:
162    #   {% trans count=something() %}{{ count }} foo{% pluralize
163    #     %}{{ count }} fooss{% endtrans %}
164    # something is called twice here.  One time for the gettext value and
165    # the other time for the n-parameter of the ngettext function.
166
167    def __init__(self, environment):
168        Extension.__init__(self, environment)
169        environment.globals['_'] = _gettext_alias
170        environment.extend(
171            install_gettext_translations=self._install,
172            install_null_translations=self._install_null,
173            install_gettext_callables=self._install_callables,
174            uninstall_gettext_translations=self._uninstall,
175            extract_translations=self._extract,
176            newstyle_gettext=False
177        )
178
179    def _install(self, translations, newstyle=None):
180        gettext = getattr(translations, 'ugettext', None)
181        if gettext is None:
182            gettext = translations.gettext
183        ngettext = getattr(translations, 'ungettext', None)
184        if ngettext is None:
185            ngettext = translations.ngettext
186        self._install_callables(gettext, ngettext, newstyle)
187
188    def _install_null(self, newstyle=None):
189        self._install_callables(
190            lambda x: x,
191            lambda s, p, n: (n != 1 and (p,) or (s,))[0],
192            newstyle
193        )
194
195    def _install_callables(self, gettext, ngettext, newstyle=None):
196        if newstyle is not None:
197            self.environment.newstyle_gettext = newstyle
198        if self.environment.newstyle_gettext:
199            gettext = _make_new_gettext(gettext)
200            ngettext = _make_new_ngettext(ngettext)
201        self.environment.globals.update(
202            gettext=gettext,
203            ngettext=ngettext
204        )
205
206    def _uninstall(self, translations):
207        for key in 'gettext', 'ngettext':
208            self.environment.globals.pop(key, None)
209
210    def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS):
211        if isinstance(source, string_types):
212            source = self.environment.parse(source)
213        return extract_from_ast(source, gettext_functions)
214
215    def parse(self, parser):
216        """Parse a translatable tag."""
217        lineno = next(parser.stream).lineno
218        num_called_num = False
219
220        # find all the variables referenced.  Additionally a variable can be
221        # defined in the body of the trans block too, but this is checked at
222        # a later state.
223        plural_expr = None
224        plural_expr_assignment = None
225        variables = {}
226        while parser.stream.current.type != 'block_end':
227            if variables:
228                parser.stream.expect('comma')
229
230            # skip colon for python compatibility
231            if parser.stream.skip_if('colon'):
232                break
233
234            name = parser.stream.expect('name')
235            if name.value in variables:
236                parser.fail('translatable variable %r defined twice.' %
237                            name.value, name.lineno,
238                            exc=TemplateAssertionError)
239
240            # expressions
241            if parser.stream.current.type == 'assign':
242                next(parser.stream)
243                variables[name.value] = var = parser.parse_expression()
244            else:
245                variables[name.value] = var = nodes.Name(name.value, 'load')
246
247            if plural_expr is None:
248                if isinstance(var, nodes.Call):
249                    plural_expr = nodes.Name('_trans', 'load')
250                    variables[name.value] = plural_expr
251                    plural_expr_assignment = nodes.Assign(
252                        nodes.Name('_trans', 'store'), var)
253                else:
254                    plural_expr = var
255                num_called_num = name.value == 'num'
256
257        parser.stream.expect('block_end')
258
259        plural = plural_names = None
260        have_plural = False
261        referenced = set()
262
263        # now parse until endtrans or pluralize
264        singular_names, singular = self._parse_block(parser, True)
265        if singular_names:
266            referenced.update(singular_names)
267            if plural_expr is None:
268                plural_expr = nodes.Name(singular_names[0], 'load')
269                num_called_num = singular_names[0] == 'num'
270
271        # if we have a pluralize block, we parse that too
272        if parser.stream.current.test('name:pluralize'):
273            have_plural = True
274            next(parser.stream)
275            if parser.stream.current.type != 'block_end':
276                name = parser.stream.expect('name')
277                if name.value not in variables:
278                    parser.fail('unknown variable %r for pluralization' %
279                                name.value, name.lineno,
280                                exc=TemplateAssertionError)
281                plural_expr = variables[name.value]
282                num_called_num = name.value == 'num'
283            parser.stream.expect('block_end')
284            plural_names, plural = self._parse_block(parser, False)
285            next(parser.stream)
286            referenced.update(plural_names)
287        else:
288            next(parser.stream)
289
290        # register free names as simple name expressions
291        for var in referenced:
292            if var not in variables:
293                variables[var] = nodes.Name(var, 'load')
294
295        if not have_plural:
296            plural_expr = None
297        elif plural_expr is None:
298            parser.fail('pluralize without variables', lineno)
299
300        node = self._make_node(singular, plural, variables, plural_expr,
301                               bool(referenced),
302                               num_called_num and have_plural)
303        node.set_lineno(lineno)
304        if plural_expr_assignment is not None:
305            return [plural_expr_assignment, node]
306        else:
307            return node
308
309    def _parse_block(self, parser, allow_pluralize):
310        """Parse until the next block tag with a given name."""
311        referenced = []
312        buf = []
313        while 1:
314            if parser.stream.current.type == 'data':
315                buf.append(parser.stream.current.value.replace('%', '%%'))
316                next(parser.stream)
317            elif parser.stream.current.type == 'variable_begin':
318                next(parser.stream)
319                name = parser.stream.expect('name').value
320                referenced.append(name)
321                buf.append('%%(%s)s' % name)
322                parser.stream.expect('variable_end')
323            elif parser.stream.current.type == 'block_begin':
324                next(parser.stream)
325                if parser.stream.current.test('name:endtrans'):
326                    break
327                elif parser.stream.current.test('name:pluralize'):
328                    if allow_pluralize:
329                        break
330                    parser.fail('a translatable section can have only one '
331                                'pluralize section')
332                parser.fail('control structures in translatable sections are '
333                            'not allowed')
334            elif parser.stream.eos:
335                parser.fail('unclosed translation block')
336            else:
337                assert False, 'internal parser error'
338
339        return referenced, concat(buf)
340
341    def _make_node(self, singular, plural, variables, plural_expr,
342                   vars_referenced, num_called_num):
343        """Generates a useful node from the data provided."""
344        # no variables referenced?  no need to escape for old style
345        # gettext invocations only if there are vars.
346        if not vars_referenced and not self.environment.newstyle_gettext:
347            singular = singular.replace('%%', '%')
348            if plural:
349                plural = plural.replace('%%', '%')
350
351        # singular only:
352        if plural_expr is None:
353            gettext = nodes.Name('gettext', 'load')
354            node = nodes.Call(gettext, [nodes.Const(singular)],
355                              [], None, None)
356
357        # singular and plural
358        else:
359            ngettext = nodes.Name('ngettext', 'load')
360            node = nodes.Call(ngettext, [
361                nodes.Const(singular),
362                nodes.Const(plural),
363                plural_expr
364            ], [], None, None)
365
366        # in case newstyle gettext is used, the method is powerful
367        # enough to handle the variable expansion and autoescape
368        # handling itself
369        if self.environment.newstyle_gettext:
370            for key, value in iteritems(variables):
371                # the function adds that later anyways in case num was
372                # called num, so just skip it.
373                if num_called_num and key == 'num':
374                    continue
375                node.kwargs.append(nodes.Keyword(key, value))
376
377        # otherwise do that here
378        else:
379            # mark the return value as safe if we are in an
380            # environment with autoescaping turned on
381            node = nodes.MarkSafeIfAutoescape(node)
382            if variables:
383                node = nodes.Mod(node, nodes.Dict([
384                    nodes.Pair(nodes.Const(key), value)
385                    for key, value in variables.items()
386                ]))
387        return nodes.Output([node])
388
389
390class ExprStmtExtension(Extension):
391    """Adds a `do` tag to Jinja2 that works like the print statement just
392    that it doesn't print the return value.
393    """
394    tags = set(['do'])
395
396    def parse(self, parser):
397        node = nodes.ExprStmt(lineno=next(parser.stream).lineno)
398        node.node = parser.parse_tuple()
399        return node
400
401
402class LoopControlExtension(Extension):
403    """Adds break and continue to the template engine."""
404    tags = set(['break', 'continue'])
405
406    def parse(self, parser):
407        token = next(parser.stream)
408        if token.value == 'break':
409            return nodes.Break(lineno=token.lineno)
410        return nodes.Continue(lineno=token.lineno)
411
412
413class WithExtension(Extension):
414    """Adds support for a django-like with block."""
415    tags = set(['with'])
416
417    def parse(self, parser):
418        node = nodes.Scope(lineno=next(parser.stream).lineno)
419        assignments = []
420        while parser.stream.current.type != 'block_end':
421            lineno = parser.stream.current.lineno
422            if assignments:
423                parser.stream.expect('comma')
424            target = parser.parse_assign_target()
425            parser.stream.expect('assign')
426            expr = parser.parse_expression()
427            assignments.append(nodes.Assign(target, expr, lineno=lineno))
428        node.body = assignments + \
429            list(parser.parse_statements(('name:endwith',),
430                                         drop_needle=True))
431        return node
432
433
434class AutoEscapeExtension(Extension):
435    """Changes auto escape rules for a scope."""
436    tags = set(['autoescape'])
437
438    def parse(self, parser):
439        node = nodes.ScopedEvalContextModifier(lineno=next(parser.stream).lineno)
440        node.options = [
441            nodes.Keyword('autoescape', parser.parse_expression())
442        ]
443        node.body = parser.parse_statements(('name:endautoescape',),
444                                            drop_needle=True)
445        return nodes.Scope([node])
446
447
448def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS,
449                     babel_style=True):
450    """Extract localizable strings from the given template node.  Per
451    default this function returns matches in babel style that means non string
452    parameters as well as keyword arguments are returned as `None`.  This
453    allows Babel to figure out what you really meant if you are using
454    gettext functions that allow keyword arguments for placeholder expansion.
455    If you don't want that behavior set the `babel_style` parameter to `False`
456    which causes only strings to be returned and parameters are always stored
457    in tuples.  As a consequence invalid gettext calls (calls without a single
458    string parameter or string parameters after non-string parameters) are
459    skipped.
460
461    This example explains the behavior:
462
463    >>> from jinja2 import Environment
464    >>> env = Environment()
465    >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}')
466    >>> list(extract_from_ast(node))
467    [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))]
468    >>> list(extract_from_ast(node, babel_style=False))
469    [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))]
470
471    For every string found this function yields a ``(lineno, function,
472    message)`` tuple, where:
473
474    * ``lineno`` is the number of the line on which the string was found,
475    * ``function`` is the name of the ``gettext`` function used (if the
476      string was extracted from embedded Python code), and
477    *  ``message`` is the string itself (a ``unicode`` object, or a tuple
478       of ``unicode`` objects for functions with multiple string arguments).
479
480    This extraction function operates on the AST and is because of that unable
481    to extract any comments.  For comment support you have to use the babel
482    extraction interface or extract comments yourself.
483    """
484    for node in node.find_all(nodes.Call):
485        if not isinstance(node.node, nodes.Name) or \
486           node.node.name not in gettext_functions:
487            continue
488
489        strings = []
490        for arg in node.args:
491            if isinstance(arg, nodes.Const) and \
492               isinstance(arg.value, string_types):
493                strings.append(arg.value)
494            else:
495                strings.append(None)
496
497        for arg in node.kwargs:
498            strings.append(None)
499        if node.dyn_args is not None:
500            strings.append(None)
501        if node.dyn_kwargs is not None:
502            strings.append(None)
503
504        if not babel_style:
505            strings = tuple(x for x in strings if x is not None)
506            if not strings:
507                continue
508        else:
509            if len(strings) == 1:
510                strings = strings[0]
511            else:
512                strings = tuple(strings)
513        yield node.lineno, node.node.name, strings
514
515
516class _CommentFinder(object):
517    """Helper class to find comments in a token stream.  Can only
518    find comments for gettext calls forwards.  Once the comment
519    from line 4 is found, a comment for line 1 will not return a
520    usable value.
521    """
522
523    def __init__(self, tokens, comment_tags):
524        self.tokens = tokens
525        self.comment_tags = comment_tags
526        self.offset = 0
527        self.last_lineno = 0
528
529    def find_backwards(self, offset):
530        try:
531            for _, token_type, token_value in \
532                    reversed(self.tokens[self.offset:offset]):
533                if token_type in ('comment', 'linecomment'):
534                    try:
535                        prefix, comment = token_value.split(None, 1)
536                    except ValueError:
537                        continue
538                    if prefix in self.comment_tags:
539                        return [comment.rstrip()]
540            return []
541        finally:
542            self.offset = offset
543
544    def find_comments(self, lineno):
545        if not self.comment_tags or self.last_lineno > lineno:
546            return []
547        for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset:]):
548            if token_lineno > lineno:
549                return self.find_backwards(self.offset + idx)
550        return self.find_backwards(len(self.tokens))
551
552
553def babel_extract(fileobj, keywords, comment_tags, options):
554    """Babel extraction method for Jinja templates.
555
556    .. versionchanged:: 2.3
557       Basic support for translation comments was added.  If `comment_tags`
558       is now set to a list of keywords for extraction, the extractor will
559       try to find the best preceeding comment that begins with one of the
560       keywords.  For best results, make sure to not have more than one
561       gettext call in one line of code and the matching comment in the
562       same line or the line before.
563
564    .. versionchanged:: 2.5.1
565       The `newstyle_gettext` flag can be set to `True` to enable newstyle
566       gettext calls.
567
568    .. versionchanged:: 2.7
569       A `silent` option can now be provided.  If set to `False` template
570       syntax errors are propagated instead of being ignored.
571
572    :param fileobj: the file-like object the messages should be extracted from
573    :param keywords: a list of keywords (i.e. function names) that should be
574                     recognized as translation functions
575    :param comment_tags: a list of translator tags to search for and include
576                         in the results.
577    :param options: a dictionary of additional options (optional)
578    :return: an iterator over ``(lineno, funcname, message, comments)`` tuples.
579             (comments will be empty currently)
580    """
581    extensions = set()
582    for extension in options.get('extensions', '').split(','):
583        extension = extension.strip()
584        if not extension:
585            continue
586        extensions.add(import_string(extension))
587    if InternationalizationExtension not in extensions:
588        extensions.add(InternationalizationExtension)
589
590    def getbool(options, key, default=False):
591        return options.get(key, str(default)).lower() in \
592            ('1', 'on', 'yes', 'true')
593
594    silent = getbool(options, 'silent', True)
595    environment = Environment(
596        options.get('block_start_string', BLOCK_START_STRING),
597        options.get('block_end_string', BLOCK_END_STRING),
598        options.get('variable_start_string', VARIABLE_START_STRING),
599        options.get('variable_end_string', VARIABLE_END_STRING),
600        options.get('comment_start_string', COMMENT_START_STRING),
601        options.get('comment_end_string', COMMENT_END_STRING),
602        options.get('line_statement_prefix') or LINE_STATEMENT_PREFIX,
603        options.get('line_comment_prefix') or LINE_COMMENT_PREFIX,
604        getbool(options, 'trim_blocks', TRIM_BLOCKS),
605        getbool(options, 'lstrip_blocks', LSTRIP_BLOCKS),
606        NEWLINE_SEQUENCE,
607        getbool(options, 'keep_trailing_newline', KEEP_TRAILING_NEWLINE),
608        frozenset(extensions),
609        cache_size=0,
610        auto_reload=False
611    )
612
613    if getbool(options, 'newstyle_gettext'):
614        environment.newstyle_gettext = True
615
616    source = fileobj.read().decode(options.get('encoding', 'utf-8'))
617    try:
618        node = environment.parse(source)
619        tokens = list(environment.lex(environment.preprocess(source)))
620    except TemplateSyntaxError as e:
621        if not silent:
622            raise
623        # skip templates with syntax errors
624        return
625
626    finder = _CommentFinder(tokens, comment_tags)
627    for lineno, func, message in extract_from_ast(node, keywords):
628        yield lineno, func, message, finder.find_comments(lineno)
629
630
631#: nicer import names
632i18n = InternationalizationExtension
633do = ExprStmtExtension
634loopcontrols = LoopControlExtension
635with_ = WithExtension
636autoescape = AutoEscapeExtension
637