1b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# -*- coding: utf-8 -*-
2b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)"""
3b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    jinja2.lexer
4b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    ~~~~~~~~~~~~
5b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
6b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    This module implements a Jinja / Python combination lexer. The
7b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    `Lexer` class provided by this module is used to do some preprocessing
8b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    for Jinja.
9b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
10b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    On the one hand it filters out invalid operators like the bitshift
11b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    operators we don't allow in templates. On the other hand it separates
12b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    template code and python code in expressions.
13b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
14b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    :copyright: (c) 2010 by the Jinja Team.
15b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    :license: BSD, see LICENSE for more details.
16b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)"""
17b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)import re
1858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)
19b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)from operator import itemgetter
20b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)from collections import deque
21b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)from jinja2.exceptions import TemplateSyntaxError
2258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)from jinja2.utils import LRUCache
2358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)from jinja2._compat import next, iteritems, implements_iterator, text_type, \
2458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)     intern
25b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
26b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
27b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# cache for the lexers. Exists in order to be able to have multiple
28b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# environments with the same lexer
29b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)_lexer_cache = LRUCache(50)
30b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
31b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# static regular expressions
32b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)whitespace_re = re.compile(r'\s+', re.U)
33b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
34b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                       r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S)
35b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)integer_re = re.compile(r'\d+')
36b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
37b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# we use the unicode identifier rule if this python version is able
38b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# to handle unicode identifiers, otherwise the standard ASCII one.
39b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)try:
40b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    compile('föö', '<unknown>', 'eval')
41b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)except SyntaxError:
42b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
43b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)else:
44b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    from jinja2 import _stringdefs
45b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    name_re = re.compile(r'[%s][%s]*' % (_stringdefs.xid_start,
46b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                         _stringdefs.xid_continue))
47b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
48b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)float_re = re.compile(r'(?<!\.)\d+\.\d+')
49b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)newline_re = re.compile(r'(\r\n|\r|\n)')
50b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
51b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# internal the tokens and keep references to them
52b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_ADD = intern('add')
53b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_ASSIGN = intern('assign')
54b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_COLON = intern('colon')
55b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_COMMA = intern('comma')
56b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_DIV = intern('div')
57b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_DOT = intern('dot')
58b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_EQ = intern('eq')
59b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_FLOORDIV = intern('floordiv')
60b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_GT = intern('gt')
61b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_GTEQ = intern('gteq')
62b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LBRACE = intern('lbrace')
63b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LBRACKET = intern('lbracket')
64b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LPAREN = intern('lparen')
65b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LT = intern('lt')
66b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LTEQ = intern('lteq')
67b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_MOD = intern('mod')
68b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_MUL = intern('mul')
69b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_NE = intern('ne')
70b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_PIPE = intern('pipe')
71b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_POW = intern('pow')
72b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_RBRACE = intern('rbrace')
73b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_RBRACKET = intern('rbracket')
74b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_RPAREN = intern('rparen')
75b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_SEMICOLON = intern('semicolon')
76b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_SUB = intern('sub')
77b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_TILDE = intern('tilde')
78b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_WHITESPACE = intern('whitespace')
79b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_FLOAT = intern('float')
80b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_INTEGER = intern('integer')
81b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_NAME = intern('name')
82b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_STRING = intern('string')
83b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_OPERATOR = intern('operator')
84b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_BLOCK_BEGIN = intern('block_begin')
85b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_BLOCK_END = intern('block_end')
86b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_VARIABLE_BEGIN = intern('variable_begin')
87b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_VARIABLE_END = intern('variable_end')
88b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_RAW_BEGIN = intern('raw_begin')
89b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_RAW_END = intern('raw_end')
90b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_COMMENT_BEGIN = intern('comment_begin')
91b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_COMMENT_END = intern('comment_end')
92b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_COMMENT = intern('comment')
93b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin')
94b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LINESTATEMENT_END = intern('linestatement_end')
95b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin')
96b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LINECOMMENT_END = intern('linecomment_end')
97b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LINECOMMENT = intern('linecomment')
98b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_DATA = intern('data')
99b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_INITIAL = intern('initial')
100b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_EOF = intern('eof')
101b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
102b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# bind operators to token types
103b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)operators = {
104b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '+':            TOKEN_ADD,
105b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '-':            TOKEN_SUB,
106b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '/':            TOKEN_DIV,
107b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '//':           TOKEN_FLOORDIV,
108b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '*':            TOKEN_MUL,
109b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '%':            TOKEN_MOD,
110b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '**':           TOKEN_POW,
111b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '~':            TOKEN_TILDE,
112b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '[':            TOKEN_LBRACKET,
113b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    ']':            TOKEN_RBRACKET,
114b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '(':            TOKEN_LPAREN,
115b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    ')':            TOKEN_RPAREN,
116b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '{':            TOKEN_LBRACE,
117b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '}':            TOKEN_RBRACE,
118b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '==':           TOKEN_EQ,
119b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '!=':           TOKEN_NE,
120b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '>':            TOKEN_GT,
121b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '>=':           TOKEN_GTEQ,
122b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '<':            TOKEN_LT,
123b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '<=':           TOKEN_LTEQ,
124b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '=':            TOKEN_ASSIGN,
125b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '.':            TOKEN_DOT,
126b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    ':':            TOKEN_COLON,
127b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    '|':            TOKEN_PIPE,
128b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    ',':            TOKEN_COMMA,
129b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    ';':            TOKEN_SEMICOLON
130b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)}
131b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
13258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)reverse_operators = dict([(v, k) for k, v in iteritems(operators)])
133b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)assert len(operators) == len(reverse_operators), 'operators dropped'
134b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
135b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                         sorted(operators, key=lambda x: -len(x))))
136b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
137b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT,
138b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            TOKEN_COMMENT_END, TOKEN_WHITESPACE,
139b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN,
140b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT])
141b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA,
142b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                             TOKEN_COMMENT, TOKEN_LINECOMMENT])
143b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
144b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
145b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)def _describe_token_type(token_type):
146b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    if token_type in reverse_operators:
147b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return reverse_operators[token_type]
148b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    return {
149b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        TOKEN_COMMENT_BEGIN:        'begin of comment',
150b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        TOKEN_COMMENT_END:          'end of comment',
151b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        TOKEN_COMMENT:              'comment',
152b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        TOKEN_LINECOMMENT:          'comment',
153b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        TOKEN_BLOCK_BEGIN:          'begin of statement block',
154b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        TOKEN_BLOCK_END:            'end of statement block',
155b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        TOKEN_VARIABLE_BEGIN:       'begin of print statement',
156b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        TOKEN_VARIABLE_END:         'end of print statement',
157b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        TOKEN_LINESTATEMENT_BEGIN:  'begin of line statement',
158b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        TOKEN_LINESTATEMENT_END:    'end of line statement',
159b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        TOKEN_DATA:                 'template data / text',
160b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        TOKEN_EOF:                  'end of template'
161b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    }.get(token_type, token_type)
162b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
163b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
164b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)def describe_token(token):
165b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """Returns a description of the token."""
166b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    if token.type == 'name':
167b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return token.value
168b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    return _describe_token_type(token.type)
169b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
170b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
171b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)def describe_token_expr(expr):
172b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """Like `describe_token` but for token expressions."""
173b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    if ':' in expr:
174b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        type, value = expr.split(':', 1)
175b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        if type == 'name':
176b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            return value
177b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    else:
178b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        type = expr
179b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    return _describe_token_type(type)
180b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
181b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
182b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)def count_newlines(value):
183b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """Count the number of newline characters in the string.  This is
184b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    useful for extensions that filter a stream.
185b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """
186b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    return len(newline_re.findall(value))
187b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
188b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
189b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)def compile_rules(environment):
190b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """Compiles all the rules from the environment into a list of rules."""
191b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    e = re.escape
192b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    rules = [
193b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        (len(environment.comment_start_string), 'comment',
194b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)         e(environment.comment_start_string)),
195b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        (len(environment.block_start_string), 'block',
196b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)         e(environment.block_start_string)),
197b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        (len(environment.variable_start_string), 'variable',
198b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)         e(environment.variable_start_string))
199b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    ]
200b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
201b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    if environment.line_statement_prefix is not None:
202b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        rules.append((len(environment.line_statement_prefix), 'linestatement',
20358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                      r'^[ \t\v]*' + e(environment.line_statement_prefix)))
204b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    if environment.line_comment_prefix is not None:
205b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        rules.append((len(environment.line_comment_prefix), 'linecomment',
206b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                      r'(?:^|(?<=\S))[^\S\r\n]*' +
207b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                      e(environment.line_comment_prefix)))
208b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
209b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    return [x[1:] for x in sorted(rules, reverse=True)]
210b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
211b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
212b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)class Failure(object):
213b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """Class that raises a `TemplateSyntaxError` if called.
214b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    Used by the `Lexer` to specify known errors.
215b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """
216b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
217b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def __init__(self, message, cls=TemplateSyntaxError):
218b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self.message = message
219b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self.error_class = cls
220b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
221b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def __call__(self, lineno, filename):
222b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        raise self.error_class(self.message, lineno, filename)
223b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
224b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
225b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)class Token(tuple):
226b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """Token class."""
227b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    __slots__ = ()
228b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    lineno, type, value = (property(itemgetter(x)) for x in range(3))
229b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
230b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def __new__(cls, lineno, type, value):
231b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return tuple.__new__(cls, (lineno, intern(str(type)), value))
232b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
233b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def __str__(self):
234b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        if self.type in reverse_operators:
235b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            return reverse_operators[self.type]
236b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        elif self.type == 'name':
237b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            return self.value
238b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return self.type
239b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
240b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def test(self, expr):
241b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """Test a token against a token expression.  This can either be a
242b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        token type or ``'token_type:token_value'``.  This can only test
243b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        against string values and types.
244b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """
245b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        # here we do a regular string equality check as test_any is usually
246b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        # passed an iterable of not interned strings.
247b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        if self.type == expr:
248b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            return True
249b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        elif ':' in expr:
250b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            return expr.split(':', 1) == [self.type, self.value]
251b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return False
252b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
253b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def test_any(self, *iterable):
254b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """Test against multiple token expressions."""
255b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        for expr in iterable:
256b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            if self.test(expr):
257b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                return True
258b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return False
259b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
260b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def __repr__(self):
261b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return 'Token(%r, %r, %r)' % (
262b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            self.lineno,
263b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            self.type,
264b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            self.value
265b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        )
266b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
267b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
26858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)@implements_iterator
269b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)class TokenStreamIterator(object):
270b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """The iterator for tokenstreams.  Iterate over the stream
271b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    until the eof token is reached.
272b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """
273b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
274b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def __init__(self, stream):
275b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self.stream = stream
276b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
277b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def __iter__(self):
278b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return self
279b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
28058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)    def __next__(self):
281b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        token = self.stream.current
282b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        if token.type is TOKEN_EOF:
283b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            self.stream.close()
284b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            raise StopIteration()
285b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        next(self.stream)
286b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return token
287b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
288b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
28958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)@implements_iterator
290b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)class TokenStream(object):
291b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """A token stream is an iterable that yields :class:`Token`\s.  The
292b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    parser however does not iterate over it but calls :meth:`next` to go
293b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    one token ahead.  The current active token is stored as :attr:`current`.
294b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """
295b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
296b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def __init__(self, generator, name, filename):
29758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        self._iter = iter(generator)
298b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self._pushed = deque()
299b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self.name = name
300b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self.filename = filename
301b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self.closed = False
302b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self.current = Token(1, TOKEN_INITIAL, '')
303b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        next(self)
304b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
305b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def __iter__(self):
306b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return TokenStreamIterator(self)
307b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
30858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)    def __bool__(self):
309b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return bool(self._pushed) or self.current.type is not TOKEN_EOF
31058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)    __nonzero__ = __bool__  # py2
311b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
312b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    eos = property(lambda x: not x, doc="Are we at the end of the stream?")
313b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
314b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def push(self, token):
315b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """Push a token back to the stream."""
316b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self._pushed.append(token)
317b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
318b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def look(self):
319b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """Look at the next token."""
320b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        old_token = next(self)
321b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        result = self.current
322b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self.push(result)
323b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self.current = old_token
324b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return result
325b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
326b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def skip(self, n=1):
327b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """Got n tokens ahead."""
32858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        for x in range(n):
329b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            next(self)
330b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
331b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def next_if(self, expr):
332b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """Perform the token test and return the token if it matched.
333b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        Otherwise the return value is `None`.
334b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """
335b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        if self.current.test(expr):
336b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            return next(self)
337b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
338b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def skip_if(self, expr):
339b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """Like :meth:`next_if` but only returns `True` or `False`."""
340b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return self.next_if(expr) is not None
341b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
34258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)    def __next__(self):
343b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """Go one token ahead and return the old one"""
344b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        rv = self.current
345b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        if self._pushed:
346b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            self.current = self._pushed.popleft()
347b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        elif self.current.type is not TOKEN_EOF:
348b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            try:
34958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                self.current = next(self._iter)
350b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            except StopIteration:
351b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                self.close()
352b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return rv
353b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
354b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def close(self):
355b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """Close the stream."""
356b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self.current = Token(self.current.lineno, TOKEN_EOF, '')
35758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        self._iter = None
358b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self.closed = True
359b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
360b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def expect(self, expr):
361b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """Expect a given token type and return it.  This accepts the same
362b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        argument as :meth:`jinja2.lexer.Token.test`.
363b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """
364b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        if not self.current.test(expr):
365b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            expr = describe_token_expr(expr)
366b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            if self.current.type is TOKEN_EOF:
367b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                raise TemplateSyntaxError('unexpected end of template, '
368b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                          'expected %r.' % expr,
369b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                          self.current.lineno,
370b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                          self.name, self.filename)
371b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            raise TemplateSyntaxError("expected token %r, got %r" %
372b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                      (expr, describe_token(self.current)),
373b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                      self.current.lineno,
374b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                      self.name, self.filename)
375b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        try:
376b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            return self.current
377b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        finally:
378b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            next(self)
379b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
380b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
381b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)def get_lexer(environment):
382b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """Return a lexer which is probably cached."""
383b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    key = (environment.block_start_string,
384b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)           environment.block_end_string,
385b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)           environment.variable_start_string,
386b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)           environment.variable_end_string,
387b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)           environment.comment_start_string,
388b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)           environment.comment_end_string,
389b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)           environment.line_statement_prefix,
390b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)           environment.line_comment_prefix,
391b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)           environment.trim_blocks,
39258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)           environment.lstrip_blocks,
39358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)           environment.newline_sequence,
39458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)           environment.keep_trailing_newline)
395b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    lexer = _lexer_cache.get(key)
396b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    if lexer is None:
397b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        lexer = Lexer(environment)
398b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        _lexer_cache[key] = lexer
399b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    return lexer
400b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
401b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
402b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)class Lexer(object):
403b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """Class that implements a lexer for a given environment. Automatically
404b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    created by the environment class, usually you don't have to do that.
405b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
406b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    Note that the lexer is not automatically bound to an environment.
407b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    Multiple environments can share the same lexer.
408b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    """
409b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
410b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def __init__(self, environment):
411b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        # shortcuts
412b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        c = lambda x: re.compile(x, re.M | re.S)
413b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        e = re.escape
414b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
415b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        # lexing rules for tags
416b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        tag_rules = [
417b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            (whitespace_re, TOKEN_WHITESPACE, None),
418b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            (float_re, TOKEN_FLOAT, None),
419b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            (integer_re, TOKEN_INTEGER, None),
420b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            (name_re, TOKEN_NAME, None),
421b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            (string_re, TOKEN_STRING, None),
422b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            (operator_re, TOKEN_OPERATOR, None)
423b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        ]
424b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
42558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        # assemble the root lexing rule. because "|" is ungreedy
426b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        # we have to sort by length so that the lexer continues working
427b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        # as expected when we have parsing rules like <% for block and
428b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        # <%= for variables. (if someone wants asp like syntax)
429b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        # variables are just part of the rules if variable processing
430b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        # is required.
431b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        root_tag_rules = compile_rules(environment)
432b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
433b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        # block suffix if trimming is enabled
434b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        block_suffix_re = environment.trim_blocks and '\\n?' or ''
435b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
43658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        # strip leading spaces if lstrip_blocks is enabled
43758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        prefix_re = {}
43858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        if environment.lstrip_blocks:
43958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            # use '{%+' to manually disable lstrip_blocks behavior
44058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            no_lstrip_re = e('+')
44158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            # detect overlap between block and variable or comment strings
44258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            block_diff = c(r'^%s(.*)' % e(environment.block_start_string))
44358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            # make sure we don't mistake a block for a variable or a comment
44458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            m = block_diff.match(environment.comment_start_string)
44558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
44658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            m = block_diff.match(environment.variable_start_string)
44758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
44858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)
44958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            # detect overlap between comment and variable strings
45058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string))
45158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            m = comment_diff.match(environment.variable_start_string)
45258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            no_variable_re = m and r'(?!%s)' % e(m.group(1)) or ''
45358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)
45458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            lstrip_re = r'^[ \t]*'
45558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            block_prefix_re = r'%s%s(?!%s)|%s\+?' % (
45658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                    lstrip_re,
45758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                    e(environment.block_start_string),
45858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                    no_lstrip_re,
45958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                    e(environment.block_start_string),
46058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                    )
46158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            comment_prefix_re = r'%s%s%s|%s\+?' % (
46258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                    lstrip_re,
46358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                    e(environment.comment_start_string),
46458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                    no_variable_re,
46558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                    e(environment.comment_start_string),
46658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                    )
46758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            prefix_re['block'] = block_prefix_re
46858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            prefix_re['comment'] = comment_prefix_re
46958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        else:
47058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            block_prefix_re = '%s' % e(environment.block_start_string)
47158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)
472b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self.newline_sequence = environment.newline_sequence
47358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        self.keep_trailing_newline = environment.keep_trailing_newline
474b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
475b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        # global lexing rules
476b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        self.rules = {
477b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            'root': [
478b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # directives
479b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                (c('(.*?)(?:%s)' % '|'.join(
480b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % (
481b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        e(environment.block_start_string),
48258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                        block_prefix_re,
483b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        e(environment.block_end_string),
484b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        e(environment.block_end_string)
485b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    )] + [
48658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                        r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r))
487b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        for n, r in root_tag_rules
488b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    ])), (TOKEN_DATA, '#bygroup'), '#bygroup'),
489b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # data
490b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                (c('.+'), TOKEN_DATA, None)
491b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            ],
492b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            # comments
493b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            TOKEN_COMMENT_BEGIN: [
494b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
495b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    e(environment.comment_end_string),
496b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    e(environment.comment_end_string),
497b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    block_suffix_re
498b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'),
499b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                (c('(.)'), (Failure('Missing end of comment tag'),), None)
500b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            ],
501b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            # blocks
502b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            TOKEN_BLOCK_BEGIN: [
503b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                (c('(?:\-%s\s*|%s)%s' % (
504b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    e(environment.block_end_string),
505b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    e(environment.block_end_string),
506b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    block_suffix_re
507b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                )), TOKEN_BLOCK_END, '#pop'),
508b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            ] + tag_rules,
509b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            # variables
510b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            TOKEN_VARIABLE_BEGIN: [
511b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                (c('\-%s\s*|%s' % (
512b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    e(environment.variable_end_string),
513b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    e(environment.variable_end_string)
514b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                )), TOKEN_VARIABLE_END, '#pop')
515b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            ] + tag_rules,
516b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            # raw block
517b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            TOKEN_RAW_BEGIN: [
518b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
519b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    e(environment.block_start_string),
52058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                    block_prefix_re,
521b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    e(environment.block_end_string),
522b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    e(environment.block_end_string),
523b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    block_suffix_re
524b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'),
525b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                (c('(.)'), (Failure('Missing end of raw directive'),), None)
526b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            ],
527b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            # line statements
528b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            TOKEN_LINESTATEMENT_BEGIN: [
529b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop')
530b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            ] + tag_rules,
531b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            # line comments
532b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            TOKEN_LINECOMMENT_BEGIN: [
533b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT,
534b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                 TOKEN_LINECOMMENT_END), '#pop')
535b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            ]
536b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        }
537b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
538b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def _normalize_newlines(self, value):
53958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        """Called for strings and template data to normalize it to unicode."""
540b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return newline_re.sub(self.newline_sequence, value)
541b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
542b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def tokenize(self, source, name=None, filename=None, state=None):
543b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """Calls tokeniter + tokenize and wraps it in a token stream.
544b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """
545b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        stream = self.tokeniter(source, name, filename, state)
546b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        return TokenStream(self.wrap(stream, name, filename), name, filename)
547b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
548b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def wrap(self, stream, name=None, filename=None):
549b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """This is called with the stream as returned by `tokenize` and wraps
550b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        every token in a :class:`Token` and converts the value.
551b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """
552b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        for lineno, token, value in stream:
553b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            if token in ignored_tokens:
554b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                continue
555b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            elif token == 'linestatement_begin':
556b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                token = 'block_begin'
557b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            elif token == 'linestatement_end':
558b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                token = 'block_end'
559b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            # we are not interested in those tokens in the parser
560b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            elif token in ('raw_begin', 'raw_end'):
561b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                continue
562b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            elif token == 'data':
563b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                value = self._normalize_newlines(value)
564b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            elif token == 'keyword':
565b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                token = value
566b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            elif token == 'name':
567b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                value = str(value)
568b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            elif token == 'string':
569b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # try to unescape string
570b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                try:
571b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    value = self._normalize_newlines(value[1:-1]) \
572b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        .encode('ascii', 'backslashreplace') \
573b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        .decode('unicode-escape')
57458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                except Exception as e:
575b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    msg = str(e).split(':')[-1].strip()
576b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    raise TemplateSyntaxError(msg, lineno, name, filename)
577b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # if we can express it as bytestring (ascii only)
578b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # we do that for support of semi broken APIs
579b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # as datetime.datetime.strftime.  On python 3 this
580b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # call becomes a noop thanks to 2to3
581b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                try:
582b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    value = str(value)
583b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                except UnicodeError:
584b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    pass
585b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            elif token == 'integer':
586b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                value = int(value)
587b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            elif token == 'float':
588b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                value = float(value)
589b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            elif token == 'operator':
590b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                token = operators[value]
591b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            yield Token(lineno, token, value)
592b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
593b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)    def tokeniter(self, source, name, filename=None, state=None):
594b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """This method tokenizes the text and returns the tokens in a
595b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        generator.  Use this method if you just want to tokenize a template.
596b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        """
59758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        source = text_type(source)
59858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        lines = source.splitlines()
59958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        if self.keep_trailing_newline and source:
60058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            for newline in ('\r\n', '\r', '\n'):
60158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                if source.endswith(newline):
60258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                    lines.append('')
60358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                    break
60458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        source = '\n'.join(lines)
605b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        pos = 0
606b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        lineno = 1
607b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        stack = ['root']
608b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        if state is not None and state != 'root':
609b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            assert state in ('variable', 'block'), 'invalid state'
610b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            stack.append(state + '_begin')
611b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        else:
612b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            state = 'root'
613b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        statetokens = self.rules[stack[-1]]
614b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        source_length = len(source)
615b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
616b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        balancing_stack = []
617b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
618b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)        while 1:
619b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            # tokenizer loop
620b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            for regex, tokens, new_state in statetokens:
621b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                m = regex.match(source, pos)
622b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # if no match we try again with the next rule
623b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                if m is None:
624b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    continue
625b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
62658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                # we only match blocks and variables if braces / parentheses
627b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # are balanced. continue parsing with the lower rule which
628b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # is the operator rule. do this only if the end tags look
629b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # like operators
630b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                if balancing_stack and \
631b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                   tokens in ('variable_end', 'block_end',
632b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                              'linestatement_end'):
633b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    continue
634b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
635b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # tuples support more options
636b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                if isinstance(tokens, tuple):
637b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    for idx, token in enumerate(tokens):
638b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        # failure group
639b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        if token.__class__ is Failure:
640b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            raise token(lineno, filename)
641b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        # bygroup is a bit more complex, in that case we
642b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        # yield for the current token the first named
643b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        # group that matched
644b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        elif token == '#bygroup':
64558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                            for key, value in iteritems(m.groupdict()):
646b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                if value is not None:
647b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                    yield lineno, key, value
648b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                    lineno += value.count('\n')
649b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                    break
650b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            else:
651b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                raise RuntimeError('%r wanted to resolve '
652b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                                   'the token dynamically'
653b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                                   ' but no group matched'
654b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                                   % regex)
655b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        # normal group
656b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        else:
657b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            data = m.group(idx + 1)
658b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            if data or token not in ignore_if_empty:
659b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                yield lineno, token, data
660b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            lineno += data.count('\n')
661b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
662b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # strings as token just are yielded as it.
663b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                else:
664b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    data = m.group()
665b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    # update brace/parentheses balance
666b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    if tokens == 'operator':
667b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        if data == '{':
668b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            balancing_stack.append('}')
669b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        elif data == '(':
670b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            balancing_stack.append(')')
671b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        elif data == '[':
672b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            balancing_stack.append(']')
673b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        elif data in ('}', ')', ']'):
674b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            if not balancing_stack:
675b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                raise TemplateSyntaxError('unexpected \'%s\'' %
676b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                                          data, lineno, name,
677b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                                          filename)
678b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            expected_op = balancing_stack.pop()
679b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            if expected_op != data:
680b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                raise TemplateSyntaxError('unexpected \'%s\', '
681b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                                          'expected \'%s\'' %
682b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                                          (data, expected_op),
683b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                                          lineno, name,
684b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                                          filename)
685b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    # yield items
686b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    if data or tokens not in ignore_if_empty:
687b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        yield lineno, tokens, data
688b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    lineno += data.count('\n')
689b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
690b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # fetch new position into new variable so that we can check
691b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # if there is a internal parsing error which would result
692b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # in an infinite loop
693b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                pos2 = m.end()
694b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
695b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # handle state changes
696b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                if new_state is not None:
697b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    # remove the uppermost state
698b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    if new_state == '#pop':
699b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        stack.pop()
700b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    # resolve the new state by group checking
701b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    elif new_state == '#bygroup':
70258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                        for key, value in iteritems(m.groupdict()):
703b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            if value is not None:
704b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                stack.append(key)
705b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                break
706b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        else:
707b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                            raise RuntimeError('%r wanted to resolve the '
708b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                               'new state dynamically but'
709b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                               ' no group matched' %
710b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                               regex)
711b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    # direct state name given
712b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    else:
713b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                        stack.append(new_state)
714b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    statetokens = self.rules[stack[-1]]
715b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # we are still at the same position and no stack change.
716b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # this means a loop without break condition, avoid that and
717b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # raise error
718b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                elif pos2 == pos:
719b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    raise RuntimeError('%r yielded empty string without '
720b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                       'stack change' % regex)
721b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # publish new function and start again
722b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                pos = pos2
723b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                break
72458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            # if loop terminated without break we haven't found a single match
725b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            # either we are at the end of the file or we have a problem
726b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)            else:
727b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # end of text
728b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                if pos >= source_length:
729b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                    return
730b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                # something went wrong
731b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                raise TemplateSyntaxError('unexpected char %r at %d' %
732b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                          (source[pos], pos), lineno,
733b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)                                          name, filename)
734