1# -*- coding: utf-8 -*-
2"""
3    jinja2.lexer
4    ~~~~~~~~~~~~
5
6    This module implements a Jinja / Python combination lexer. The
7    `Lexer` class provided by this module is used to do some preprocessing
8    for Jinja.
9
10    On the one hand it filters out invalid operators like the bitshift
11    operators we don't allow in templates. On the other hand it separates
12    template code and python code in expressions.
13
14    :copyright: (c) 2010 by the Jinja Team.
15    :license: BSD, see LICENSE for more details.
16"""
17import re
18
19from operator import itemgetter
20from collections import deque
21from jinja2.exceptions import TemplateSyntaxError
22from jinja2.utils import LRUCache
23from jinja2._compat import next, iteritems, implements_iterator, text_type, \
24     intern
25
26
27# cache for the lexers. Exists in order to be able to have multiple
28# environments with the same lexer
29_lexer_cache = LRUCache(50)
30
31# static regular expressions
32whitespace_re = re.compile(r'\s+', re.U)
33string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
34                       r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S)
35integer_re = re.compile(r'\d+')
36
37# we use the unicode identifier rule if this python version is able
38# to handle unicode identifiers, otherwise the standard ASCII one.
39try:
40    compile('föö', '<unknown>', 'eval')
41except SyntaxError:
42    name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
43else:
44    from jinja2 import _stringdefs
45    name_re = re.compile(r'[%s][%s]*' % (_stringdefs.xid_start,
46                                         _stringdefs.xid_continue))
47
48float_re = re.compile(r'(?<!\.)\d+\.\d+')
49newline_re = re.compile(r'(\r\n|\r|\n)')
50
51# internal the tokens and keep references to them
52TOKEN_ADD = intern('add')
53TOKEN_ASSIGN = intern('assign')
54TOKEN_COLON = intern('colon')
55TOKEN_COMMA = intern('comma')
56TOKEN_DIV = intern('div')
57TOKEN_DOT = intern('dot')
58TOKEN_EQ = intern('eq')
59TOKEN_FLOORDIV = intern('floordiv')
60TOKEN_GT = intern('gt')
61TOKEN_GTEQ = intern('gteq')
62TOKEN_LBRACE = intern('lbrace')
63TOKEN_LBRACKET = intern('lbracket')
64TOKEN_LPAREN = intern('lparen')
65TOKEN_LT = intern('lt')
66TOKEN_LTEQ = intern('lteq')
67TOKEN_MOD = intern('mod')
68TOKEN_MUL = intern('mul')
69TOKEN_NE = intern('ne')
70TOKEN_PIPE = intern('pipe')
71TOKEN_POW = intern('pow')
72TOKEN_RBRACE = intern('rbrace')
73TOKEN_RBRACKET = intern('rbracket')
74TOKEN_RPAREN = intern('rparen')
75TOKEN_SEMICOLON = intern('semicolon')
76TOKEN_SUB = intern('sub')
77TOKEN_TILDE = intern('tilde')
78TOKEN_WHITESPACE = intern('whitespace')
79TOKEN_FLOAT = intern('float')
80TOKEN_INTEGER = intern('integer')
81TOKEN_NAME = intern('name')
82TOKEN_STRING = intern('string')
83TOKEN_OPERATOR = intern('operator')
84TOKEN_BLOCK_BEGIN = intern('block_begin')
85TOKEN_BLOCK_END = intern('block_end')
86TOKEN_VARIABLE_BEGIN = intern('variable_begin')
87TOKEN_VARIABLE_END = intern('variable_end')
88TOKEN_RAW_BEGIN = intern('raw_begin')
89TOKEN_RAW_END = intern('raw_end')
90TOKEN_COMMENT_BEGIN = intern('comment_begin')
91TOKEN_COMMENT_END = intern('comment_end')
92TOKEN_COMMENT = intern('comment')
93TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin')
94TOKEN_LINESTATEMENT_END = intern('linestatement_end')
95TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin')
96TOKEN_LINECOMMENT_END = intern('linecomment_end')
97TOKEN_LINECOMMENT = intern('linecomment')
98TOKEN_DATA = intern('data')
99TOKEN_INITIAL = intern('initial')
100TOKEN_EOF = intern('eof')
101
102# bind operators to token types
103operators = {
104    '+':            TOKEN_ADD,
105    '-':            TOKEN_SUB,
106    '/':            TOKEN_DIV,
107    '//':           TOKEN_FLOORDIV,
108    '*':            TOKEN_MUL,
109    '%':            TOKEN_MOD,
110    '**':           TOKEN_POW,
111    '~':            TOKEN_TILDE,
112    '[':            TOKEN_LBRACKET,
113    ']':            TOKEN_RBRACKET,
114    '(':            TOKEN_LPAREN,
115    ')':            TOKEN_RPAREN,
116    '{':            TOKEN_LBRACE,
117    '}':            TOKEN_RBRACE,
118    '==':           TOKEN_EQ,
119    '!=':           TOKEN_NE,
120    '>':            TOKEN_GT,
121    '>=':           TOKEN_GTEQ,
122    '<':            TOKEN_LT,
123    '<=':           TOKEN_LTEQ,
124    '=':            TOKEN_ASSIGN,
125    '.':            TOKEN_DOT,
126    ':':            TOKEN_COLON,
127    '|':            TOKEN_PIPE,
128    ',':            TOKEN_COMMA,
129    ';':            TOKEN_SEMICOLON
130}
131
132reverse_operators = dict([(v, k) for k, v in iteritems(operators)])
133assert len(operators) == len(reverse_operators), 'operators dropped'
134operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
135                         sorted(operators, key=lambda x: -len(x))))
136
137ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT,
138                            TOKEN_COMMENT_END, TOKEN_WHITESPACE,
139                            TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN,
140                            TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT])
141ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA,
142                             TOKEN_COMMENT, TOKEN_LINECOMMENT])
143
144
145def _describe_token_type(token_type):
146    if token_type in reverse_operators:
147        return reverse_operators[token_type]
148    return {
149        TOKEN_COMMENT_BEGIN:        'begin of comment',
150        TOKEN_COMMENT_END:          'end of comment',
151        TOKEN_COMMENT:              'comment',
152        TOKEN_LINECOMMENT:          'comment',
153        TOKEN_BLOCK_BEGIN:          'begin of statement block',
154        TOKEN_BLOCK_END:            'end of statement block',
155        TOKEN_VARIABLE_BEGIN:       'begin of print statement',
156        TOKEN_VARIABLE_END:         'end of print statement',
157        TOKEN_LINESTATEMENT_BEGIN:  'begin of line statement',
158        TOKEN_LINESTATEMENT_END:    'end of line statement',
159        TOKEN_DATA:                 'template data / text',
160        TOKEN_EOF:                  'end of template'
161    }.get(token_type, token_type)
162
163
164def describe_token(token):
165    """Returns a description of the token."""
166    if token.type == 'name':
167        return token.value
168    return _describe_token_type(token.type)
169
170
171def describe_token_expr(expr):
172    """Like `describe_token` but for token expressions."""
173    if ':' in expr:
174        type, value = expr.split(':', 1)
175        if type == 'name':
176            return value
177    else:
178        type = expr
179    return _describe_token_type(type)
180
181
182def count_newlines(value):
183    """Count the number of newline characters in the string.  This is
184    useful for extensions that filter a stream.
185    """
186    return len(newline_re.findall(value))
187
188
189def compile_rules(environment):
190    """Compiles all the rules from the environment into a list of rules."""
191    e = re.escape
192    rules = [
193        (len(environment.comment_start_string), 'comment',
194         e(environment.comment_start_string)),
195        (len(environment.block_start_string), 'block',
196         e(environment.block_start_string)),
197        (len(environment.variable_start_string), 'variable',
198         e(environment.variable_start_string))
199    ]
200
201    if environment.line_statement_prefix is not None:
202        rules.append((len(environment.line_statement_prefix), 'linestatement',
203                      r'^[ \t\v]*' + e(environment.line_statement_prefix)))
204    if environment.line_comment_prefix is not None:
205        rules.append((len(environment.line_comment_prefix), 'linecomment',
206                      r'(?:^|(?<=\S))[^\S\r\n]*' +
207                      e(environment.line_comment_prefix)))
208
209    return [x[1:] for x in sorted(rules, reverse=True)]
210
211
212class Failure(object):
213    """Class that raises a `TemplateSyntaxError` if called.
214    Used by the `Lexer` to specify known errors.
215    """
216
217    def __init__(self, message, cls=TemplateSyntaxError):
218        self.message = message
219        self.error_class = cls
220
221    def __call__(self, lineno, filename):
222        raise self.error_class(self.message, lineno, filename)
223
224
225class Token(tuple):
226    """Token class."""
227    __slots__ = ()
228    lineno, type, value = (property(itemgetter(x)) for x in range(3))
229
230    def __new__(cls, lineno, type, value):
231        return tuple.__new__(cls, (lineno, intern(str(type)), value))
232
233    def __str__(self):
234        if self.type in reverse_operators:
235            return reverse_operators[self.type]
236        elif self.type == 'name':
237            return self.value
238        return self.type
239
240    def test(self, expr):
241        """Test a token against a token expression.  This can either be a
242        token type or ``'token_type:token_value'``.  This can only test
243        against string values and types.
244        """
245        # here we do a regular string equality check as test_any is usually
246        # passed an iterable of not interned strings.
247        if self.type == expr:
248            return True
249        elif ':' in expr:
250            return expr.split(':', 1) == [self.type, self.value]
251        return False
252
253    def test_any(self, *iterable):
254        """Test against multiple token expressions."""
255        for expr in iterable:
256            if self.test(expr):
257                return True
258        return False
259
260    def __repr__(self):
261        return 'Token(%r, %r, %r)' % (
262            self.lineno,
263            self.type,
264            self.value
265        )
266
267
268@implements_iterator
269class TokenStreamIterator(object):
270    """The iterator for tokenstreams.  Iterate over the stream
271    until the eof token is reached.
272    """
273
274    def __init__(self, stream):
275        self.stream = stream
276
277    def __iter__(self):
278        return self
279
280    def __next__(self):
281        token = self.stream.current
282        if token.type is TOKEN_EOF:
283            self.stream.close()
284            raise StopIteration()
285        next(self.stream)
286        return token
287
288
289@implements_iterator
290class TokenStream(object):
291    """A token stream is an iterable that yields :class:`Token`\s.  The
292    parser however does not iterate over it but calls :meth:`next` to go
293    one token ahead.  The current active token is stored as :attr:`current`.
294    """
295
296    def __init__(self, generator, name, filename):
297        self._iter = iter(generator)
298        self._pushed = deque()
299        self.name = name
300        self.filename = filename
301        self.closed = False
302        self.current = Token(1, TOKEN_INITIAL, '')
303        next(self)
304
305    def __iter__(self):
306        return TokenStreamIterator(self)
307
308    def __bool__(self):
309        return bool(self._pushed) or self.current.type is not TOKEN_EOF
310    __nonzero__ = __bool__  # py2
311
312    eos = property(lambda x: not x, doc="Are we at the end of the stream?")
313
314    def push(self, token):
315        """Push a token back to the stream."""
316        self._pushed.append(token)
317
318    def look(self):
319        """Look at the next token."""
320        old_token = next(self)
321        result = self.current
322        self.push(result)
323        self.current = old_token
324        return result
325
326    def skip(self, n=1):
327        """Got n tokens ahead."""
328        for x in range(n):
329            next(self)
330
331    def next_if(self, expr):
332        """Perform the token test and return the token if it matched.
333        Otherwise the return value is `None`.
334        """
335        if self.current.test(expr):
336            return next(self)
337
338    def skip_if(self, expr):
339        """Like :meth:`next_if` but only returns `True` or `False`."""
340        return self.next_if(expr) is not None
341
342    def __next__(self):
343        """Go one token ahead and return the old one"""
344        rv = self.current
345        if self._pushed:
346            self.current = self._pushed.popleft()
347        elif self.current.type is not TOKEN_EOF:
348            try:
349                self.current = next(self._iter)
350            except StopIteration:
351                self.close()
352        return rv
353
354    def close(self):
355        """Close the stream."""
356        self.current = Token(self.current.lineno, TOKEN_EOF, '')
357        self._iter = None
358        self.closed = True
359
360    def expect(self, expr):
361        """Expect a given token type and return it.  This accepts the same
362        argument as :meth:`jinja2.lexer.Token.test`.
363        """
364        if not self.current.test(expr):
365            expr = describe_token_expr(expr)
366            if self.current.type is TOKEN_EOF:
367                raise TemplateSyntaxError('unexpected end of template, '
368                                          'expected %r.' % expr,
369                                          self.current.lineno,
370                                          self.name, self.filename)
371            raise TemplateSyntaxError("expected token %r, got %r" %
372                                      (expr, describe_token(self.current)),
373                                      self.current.lineno,
374                                      self.name, self.filename)
375        try:
376            return self.current
377        finally:
378            next(self)
379
380
381def get_lexer(environment):
382    """Return a lexer which is probably cached."""
383    key = (environment.block_start_string,
384           environment.block_end_string,
385           environment.variable_start_string,
386           environment.variable_end_string,
387           environment.comment_start_string,
388           environment.comment_end_string,
389           environment.line_statement_prefix,
390           environment.line_comment_prefix,
391           environment.trim_blocks,
392           environment.lstrip_blocks,
393           environment.newline_sequence,
394           environment.keep_trailing_newline)
395    lexer = _lexer_cache.get(key)
396    if lexer is None:
397        lexer = Lexer(environment)
398        _lexer_cache[key] = lexer
399    return lexer
400
401
402class Lexer(object):
403    """Class that implements a lexer for a given environment. Automatically
404    created by the environment class, usually you don't have to do that.
405
406    Note that the lexer is not automatically bound to an environment.
407    Multiple environments can share the same lexer.
408    """
409
410    def __init__(self, environment):
411        # shortcuts
412        c = lambda x: re.compile(x, re.M | re.S)
413        e = re.escape
414
415        # lexing rules for tags
416        tag_rules = [
417            (whitespace_re, TOKEN_WHITESPACE, None),
418            (float_re, TOKEN_FLOAT, None),
419            (integer_re, TOKEN_INTEGER, None),
420            (name_re, TOKEN_NAME, None),
421            (string_re, TOKEN_STRING, None),
422            (operator_re, TOKEN_OPERATOR, None)
423        ]
424
425        # assemble the root lexing rule. because "|" is ungreedy
426        # we have to sort by length so that the lexer continues working
427        # as expected when we have parsing rules like <% for block and
428        # <%= for variables. (if someone wants asp like syntax)
429        # variables are just part of the rules if variable processing
430        # is required.
431        root_tag_rules = compile_rules(environment)
432
433        # block suffix if trimming is enabled
434        block_suffix_re = environment.trim_blocks and '\\n?' or ''
435
436        # strip leading spaces if lstrip_blocks is enabled
437        prefix_re = {}
438        if environment.lstrip_blocks:
439            # use '{%+' to manually disable lstrip_blocks behavior
440            no_lstrip_re = e('+')
441            # detect overlap between block and variable or comment strings
442            block_diff = c(r'^%s(.*)' % e(environment.block_start_string))
443            # make sure we don't mistake a block for a variable or a comment
444            m = block_diff.match(environment.comment_start_string)
445            no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
446            m = block_diff.match(environment.variable_start_string)
447            no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
448
449            # detect overlap between comment and variable strings
450            comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string))
451            m = comment_diff.match(environment.variable_start_string)
452            no_variable_re = m and r'(?!%s)' % e(m.group(1)) or ''
453
454            lstrip_re = r'^[ \t]*'
455            block_prefix_re = r'%s%s(?!%s)|%s\+?' % (
456                    lstrip_re,
457                    e(environment.block_start_string),
458                    no_lstrip_re,
459                    e(environment.block_start_string),
460                    )
461            comment_prefix_re = r'%s%s%s|%s\+?' % (
462                    lstrip_re,
463                    e(environment.comment_start_string),
464                    no_variable_re,
465                    e(environment.comment_start_string),
466                    )
467            prefix_re['block'] = block_prefix_re
468            prefix_re['comment'] = comment_prefix_re
469        else:
470            block_prefix_re = '%s' % e(environment.block_start_string)
471
472        self.newline_sequence = environment.newline_sequence
473        self.keep_trailing_newline = environment.keep_trailing_newline
474
475        # global lexing rules
476        self.rules = {
477            'root': [
478                # directives
479                (c('(.*?)(?:%s)' % '|'.join(
480                    [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % (
481                        e(environment.block_start_string),
482                        block_prefix_re,
483                        e(environment.block_end_string),
484                        e(environment.block_end_string)
485                    )] + [
486                        r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r))
487                        for n, r in root_tag_rules
488                    ])), (TOKEN_DATA, '#bygroup'), '#bygroup'),
489                # data
490                (c('.+'), TOKEN_DATA, None)
491            ],
492            # comments
493            TOKEN_COMMENT_BEGIN: [
494                (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
495                    e(environment.comment_end_string),
496                    e(environment.comment_end_string),
497                    block_suffix_re
498                )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'),
499                (c('(.)'), (Failure('Missing end of comment tag'),), None)
500            ],
501            # blocks
502            TOKEN_BLOCK_BEGIN: [
503                (c('(?:\-%s\s*|%s)%s' % (
504                    e(environment.block_end_string),
505                    e(environment.block_end_string),
506                    block_suffix_re
507                )), TOKEN_BLOCK_END, '#pop'),
508            ] + tag_rules,
509            # variables
510            TOKEN_VARIABLE_BEGIN: [
511                (c('\-%s\s*|%s' % (
512                    e(environment.variable_end_string),
513                    e(environment.variable_end_string)
514                )), TOKEN_VARIABLE_END, '#pop')
515            ] + tag_rules,
516            # raw block
517            TOKEN_RAW_BEGIN: [
518                (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
519                    e(environment.block_start_string),
520                    block_prefix_re,
521                    e(environment.block_end_string),
522                    e(environment.block_end_string),
523                    block_suffix_re
524                )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'),
525                (c('(.)'), (Failure('Missing end of raw directive'),), None)
526            ],
527            # line statements
528            TOKEN_LINESTATEMENT_BEGIN: [
529                (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop')
530            ] + tag_rules,
531            # line comments
532            TOKEN_LINECOMMENT_BEGIN: [
533                (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT,
534                 TOKEN_LINECOMMENT_END), '#pop')
535            ]
536        }
537
538    def _normalize_newlines(self, value):
539        """Called for strings and template data to normalize it to unicode."""
540        return newline_re.sub(self.newline_sequence, value)
541
542    def tokenize(self, source, name=None, filename=None, state=None):
543        """Calls tokeniter + tokenize and wraps it in a token stream.
544        """
545        stream = self.tokeniter(source, name, filename, state)
546        return TokenStream(self.wrap(stream, name, filename), name, filename)
547
548    def wrap(self, stream, name=None, filename=None):
549        """This is called with the stream as returned by `tokenize` and wraps
550        every token in a :class:`Token` and converts the value.
551        """
552        for lineno, token, value in stream:
553            if token in ignored_tokens:
554                continue
555            elif token == 'linestatement_begin':
556                token = 'block_begin'
557            elif token == 'linestatement_end':
558                token = 'block_end'
559            # we are not interested in those tokens in the parser
560            elif token in ('raw_begin', 'raw_end'):
561                continue
562            elif token == 'data':
563                value = self._normalize_newlines(value)
564            elif token == 'keyword':
565                token = value
566            elif token == 'name':
567                value = str(value)
568            elif token == 'string':
569                # try to unescape string
570                try:
571                    value = self._normalize_newlines(value[1:-1]) \
572                        .encode('ascii', 'backslashreplace') \
573                        .decode('unicode-escape')
574                except Exception as e:
575                    msg = str(e).split(':')[-1].strip()
576                    raise TemplateSyntaxError(msg, lineno, name, filename)
577                # if we can express it as bytestring (ascii only)
578                # we do that for support of semi broken APIs
579                # as datetime.datetime.strftime.  On python 3 this
580                # call becomes a noop thanks to 2to3
581                try:
582                    value = str(value)
583                except UnicodeError:
584                    pass
585            elif token == 'integer':
586                value = int(value)
587            elif token == 'float':
588                value = float(value)
589            elif token == 'operator':
590                token = operators[value]
591            yield Token(lineno, token, value)
592
593    def tokeniter(self, source, name, filename=None, state=None):
594        """This method tokenizes the text and returns the tokens in a
595        generator.  Use this method if you just want to tokenize a template.
596        """
597        source = text_type(source)
598        lines = source.splitlines()
599        if self.keep_trailing_newline and source:
600            for newline in ('\r\n', '\r', '\n'):
601                if source.endswith(newline):
602                    lines.append('')
603                    break
604        source = '\n'.join(lines)
605        pos = 0
606        lineno = 1
607        stack = ['root']
608        if state is not None and state != 'root':
609            assert state in ('variable', 'block'), 'invalid state'
610            stack.append(state + '_begin')
611        else:
612            state = 'root'
613        statetokens = self.rules[stack[-1]]
614        source_length = len(source)
615
616        balancing_stack = []
617
618        while 1:
619            # tokenizer loop
620            for regex, tokens, new_state in statetokens:
621                m = regex.match(source, pos)
622                # if no match we try again with the next rule
623                if m is None:
624                    continue
625
626                # we only match blocks and variables if braces / parentheses
627                # are balanced. continue parsing with the lower rule which
628                # is the operator rule. do this only if the end tags look
629                # like operators
630                if balancing_stack and \
631                   tokens in ('variable_end', 'block_end',
632                              'linestatement_end'):
633                    continue
634
635                # tuples support more options
636                if isinstance(tokens, tuple):
637                    for idx, token in enumerate(tokens):
638                        # failure group
639                        if token.__class__ is Failure:
640                            raise token(lineno, filename)
641                        # bygroup is a bit more complex, in that case we
642                        # yield for the current token the first named
643                        # group that matched
644                        elif token == '#bygroup':
645                            for key, value in iteritems(m.groupdict()):
646                                if value is not None:
647                                    yield lineno, key, value
648                                    lineno += value.count('\n')
649                                    break
650                            else:
651                                raise RuntimeError('%r wanted to resolve '
652                                                   'the token dynamically'
653                                                   ' but no group matched'
654                                                   % regex)
655                        # normal group
656                        else:
657                            data = m.group(idx + 1)
658                            if data or token not in ignore_if_empty:
659                                yield lineno, token, data
660                            lineno += data.count('\n')
661
662                # strings as token just are yielded as it.
663                else:
664                    data = m.group()
665                    # update brace/parentheses balance
666                    if tokens == 'operator':
667                        if data == '{':
668                            balancing_stack.append('}')
669                        elif data == '(':
670                            balancing_stack.append(')')
671                        elif data == '[':
672                            balancing_stack.append(']')
673                        elif data in ('}', ')', ']'):
674                            if not balancing_stack:
675                                raise TemplateSyntaxError('unexpected \'%s\'' %
676                                                          data, lineno, name,
677                                                          filename)
678                            expected_op = balancing_stack.pop()
679                            if expected_op != data:
680                                raise TemplateSyntaxError('unexpected \'%s\', '
681                                                          'expected \'%s\'' %
682                                                          (data, expected_op),
683                                                          lineno, name,
684                                                          filename)
685                    # yield items
686                    if data or tokens not in ignore_if_empty:
687                        yield lineno, tokens, data
688                    lineno += data.count('\n')
689
690                # fetch new position into new variable so that we can check
691                # if there is a internal parsing error which would result
692                # in an infinite loop
693                pos2 = m.end()
694
695                # handle state changes
696                if new_state is not None:
697                    # remove the uppermost state
698                    if new_state == '#pop':
699                        stack.pop()
700                    # resolve the new state by group checking
701                    elif new_state == '#bygroup':
702                        for key, value in iteritems(m.groupdict()):
703                            if value is not None:
704                                stack.append(key)
705                                break
706                        else:
707                            raise RuntimeError('%r wanted to resolve the '
708                                               'new state dynamically but'
709                                               ' no group matched' %
710                                               regex)
711                    # direct state name given
712                    else:
713                        stack.append(new_state)
714                    statetokens = self.rules[stack[-1]]
715                # we are still at the same position and no stack change.
716                # this means a loop without break condition, avoid that and
717                # raise error
718                elif pos2 == pos:
719                    raise RuntimeError('%r yielded empty string without '
720                                       'stack change' % regex)
721                # publish new function and start again
722                pos = pos2
723                break
724            # if loop terminated without break we haven't found a single match
725            # either we are at the end of the file or we have a problem
726            else:
727                # end of text
728                if pos >= source_length:
729                    return
730                # something went wrong
731                raise TemplateSyntaxError('unexpected char %r at %d' %
732                                          (source[pos], pos), lineno,
733                                          name, filename)
734