1b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# -*- coding: utf-8 -*- 2b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)""" 3b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) jinja2.lexer 4b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ~~~~~~~~~~~~ 5b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 6b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) This module implements a Jinja / Python combination lexer. The 7b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) `Lexer` class provided by this module is used to do some preprocessing 8b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) for Jinja. 9b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 10b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) On the one hand it filters out invalid operators like the bitshift 11b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) operators we don't allow in templates. On the other hand it separates 12b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) template code and python code in expressions. 13b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 14b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) :copyright: (c) 2010 by the Jinja Team. 15b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) :license: BSD, see LICENSE for more details. 16b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)""" 17b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)import re 1858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 19b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)from operator import itemgetter 20b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)from collections import deque 21b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)from jinja2.exceptions import TemplateSyntaxError 2258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)from jinja2.utils import LRUCache 2358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)from jinja2._compat import next, iteritems, implements_iterator, text_type, \ 2458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) intern 25b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 26b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 27b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# cache for the lexers. Exists in order to be able to have multiple 28b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# environments with the same lexer 29b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)_lexer_cache = LRUCache(50) 30b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 31b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# static regular expressions 32b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)whitespace_re = re.compile(r'\s+', re.U) 33b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" 34b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) 35b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)integer_re = re.compile(r'\d+') 36b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 37b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# we use the unicode identifier rule if this python version is able 38b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# to handle unicode identifiers, otherwise the standard ASCII one. 39b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)try: 40b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) compile('föö', '<unknown>', 'eval') 41b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)except SyntaxError: 42b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b') 43b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)else: 44b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) from jinja2 import _stringdefs 45b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) name_re = re.compile(r'[%s][%s]*' % (_stringdefs.xid_start, 46b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) _stringdefs.xid_continue)) 47b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 48b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)float_re = re.compile(r'(?<!\.)\d+\.\d+') 49b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)newline_re = re.compile(r'(\r\n|\r|\n)') 50b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 51b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# internal the tokens and keep references to them 52b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_ADD = intern('add') 53b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_ASSIGN = intern('assign') 54b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_COLON = intern('colon') 55b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_COMMA = intern('comma') 56b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_DIV = intern('div') 57b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_DOT = intern('dot') 58b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_EQ = intern('eq') 59b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_FLOORDIV = intern('floordiv') 60b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_GT = intern('gt') 61b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_GTEQ = intern('gteq') 62b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LBRACE = intern('lbrace') 63b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LBRACKET = intern('lbracket') 64b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LPAREN = intern('lparen') 65b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LT = intern('lt') 66b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LTEQ = intern('lteq') 67b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_MOD = intern('mod') 68b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_MUL = intern('mul') 69b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_NE = intern('ne') 70b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_PIPE = intern('pipe') 71b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_POW = intern('pow') 72b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_RBRACE = intern('rbrace') 73b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_RBRACKET = intern('rbracket') 74b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_RPAREN = intern('rparen') 75b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_SEMICOLON = intern('semicolon') 76b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_SUB = intern('sub') 77b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_TILDE = intern('tilde') 78b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_WHITESPACE = intern('whitespace') 79b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_FLOAT = intern('float') 80b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_INTEGER = intern('integer') 81b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_NAME = intern('name') 82b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_STRING = intern('string') 83b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_OPERATOR = intern('operator') 84b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_BLOCK_BEGIN = intern('block_begin') 85b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_BLOCK_END = intern('block_end') 86b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_VARIABLE_BEGIN = intern('variable_begin') 87b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_VARIABLE_END = intern('variable_end') 88b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_RAW_BEGIN = intern('raw_begin') 89b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_RAW_END = intern('raw_end') 90b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_COMMENT_BEGIN = intern('comment_begin') 91b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_COMMENT_END = intern('comment_end') 92b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_COMMENT = intern('comment') 93b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin') 94b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LINESTATEMENT_END = intern('linestatement_end') 95b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin') 96b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LINECOMMENT_END = intern('linecomment_end') 97b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_LINECOMMENT = intern('linecomment') 98b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_DATA = intern('data') 99b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_INITIAL = intern('initial') 100b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)TOKEN_EOF = intern('eof') 101b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 102b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)# bind operators to token types 103b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)operators = { 104b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '+': TOKEN_ADD, 105b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '-': TOKEN_SUB, 106b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '/': TOKEN_DIV, 107b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '//': TOKEN_FLOORDIV, 108b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '*': TOKEN_MUL, 109b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '%': TOKEN_MOD, 110b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '**': TOKEN_POW, 111b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '~': TOKEN_TILDE, 112b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '[': TOKEN_LBRACKET, 113b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ']': TOKEN_RBRACKET, 114b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '(': TOKEN_LPAREN, 115b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ')': TOKEN_RPAREN, 116b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '{': TOKEN_LBRACE, 117b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '}': TOKEN_RBRACE, 118b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '==': TOKEN_EQ, 119b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '!=': TOKEN_NE, 120b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '>': TOKEN_GT, 121b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '>=': TOKEN_GTEQ, 122b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '<': TOKEN_LT, 123b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '<=': TOKEN_LTEQ, 124b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '=': TOKEN_ASSIGN, 125b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '.': TOKEN_DOT, 126b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ':': TOKEN_COLON, 127b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) '|': TOKEN_PIPE, 128b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ',': TOKEN_COMMA, 129b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ';': TOKEN_SEMICOLON 130b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)} 131b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 13258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) 133b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)assert len(operators) == len(reverse_operators), 'operators dropped' 134b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in 135b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) sorted(operators, key=lambda x: -len(x)))) 136b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 137b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, 138b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_COMMENT_END, TOKEN_WHITESPACE, 139b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN, 140b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT]) 141b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, 142b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_COMMENT, TOKEN_LINECOMMENT]) 143b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 144b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 145b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)def _describe_token_type(token_type): 146b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if token_type in reverse_operators: 147b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return reverse_operators[token_type] 148b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return { 149b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_COMMENT_BEGIN: 'begin of comment', 150b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_COMMENT_END: 'end of comment', 151b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_COMMENT: 'comment', 152b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_LINECOMMENT: 'comment', 153b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_BLOCK_BEGIN: 'begin of statement block', 154b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_BLOCK_END: 'end of statement block', 155b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_VARIABLE_BEGIN: 'begin of print statement', 156b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_VARIABLE_END: 'end of print statement', 157b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_LINESTATEMENT_BEGIN: 'begin of line statement', 158b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_LINESTATEMENT_END: 'end of line statement', 159b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_DATA: 'template data / text', 160b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_EOF: 'end of template' 161b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) }.get(token_type, token_type) 162b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 163b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 164b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)def describe_token(token): 165b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Returns a description of the token.""" 166b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if token.type == 'name': 167b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return token.value 168b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return _describe_token_type(token.type) 169b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 170b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 171b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)def describe_token_expr(expr): 172b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Like `describe_token` but for token expressions.""" 173b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if ':' in expr: 174b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) type, value = expr.split(':', 1) 175b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if type == 'name': 176b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return value 177b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) else: 178b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) type = expr 179b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return _describe_token_type(type) 180b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 181b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 182b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)def count_newlines(value): 183b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Count the number of newline characters in the string. This is 184b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) useful for extensions that filter a stream. 185b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """ 186b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return len(newline_re.findall(value)) 187b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 188b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 189b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)def compile_rules(environment): 190b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Compiles all the rules from the environment into a list of rules.""" 191b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e = re.escape 192b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) rules = [ 193b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (len(environment.comment_start_string), 'comment', 194b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.comment_start_string)), 195b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (len(environment.block_start_string), 'block', 196b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.block_start_string)), 197b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (len(environment.variable_start_string), 'variable', 198b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.variable_start_string)) 199b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ] 200b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 201b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if environment.line_statement_prefix is not None: 202b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) rules.append((len(environment.line_statement_prefix), 'linestatement', 20358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) r'^[ \t\v]*' + e(environment.line_statement_prefix))) 204b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if environment.line_comment_prefix is not None: 205b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) rules.append((len(environment.line_comment_prefix), 'linecomment', 206b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) r'(?:^|(?<=\S))[^\S\r\n]*' + 207b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.line_comment_prefix))) 208b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 209b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return [x[1:] for x in sorted(rules, reverse=True)] 210b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 211b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 212b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)class Failure(object): 213b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Class that raises a `TemplateSyntaxError` if called. 214b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) Used by the `Lexer` to specify known errors. 215b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """ 216b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 217b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def __init__(self, message, cls=TemplateSyntaxError): 218b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.message = message 219b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.error_class = cls 220b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 221b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def __call__(self, lineno, filename): 222b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) raise self.error_class(self.message, lineno, filename) 223b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 224b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 225b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)class Token(tuple): 226b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Token class.""" 227b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) __slots__ = () 228b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) lineno, type, value = (property(itemgetter(x)) for x in range(3)) 229b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 230b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def __new__(cls, lineno, type, value): 231b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return tuple.__new__(cls, (lineno, intern(str(type)), value)) 232b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 233b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def __str__(self): 234b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if self.type in reverse_operators: 235b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return reverse_operators[self.type] 236b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif self.type == 'name': 237b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return self.value 238b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return self.type 239b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 240b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def test(self, expr): 241b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Test a token against a token expression. This can either be a 242b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) token type or ``'token_type:token_value'``. This can only test 243b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) against string values and types. 244b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """ 245b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # here we do a regular string equality check as test_any is usually 246b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # passed an iterable of not interned strings. 247b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if self.type == expr: 248b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return True 249b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif ':' in expr: 250b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return expr.split(':', 1) == [self.type, self.value] 251b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return False 252b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 253b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def test_any(self, *iterable): 254b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Test against multiple token expressions.""" 255b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) for expr in iterable: 256b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if self.test(expr): 257b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return True 258b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return False 259b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 260b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def __repr__(self): 261b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return 'Token(%r, %r, %r)' % ( 262b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.lineno, 263b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.type, 264b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.value 265b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ) 266b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 267b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 26858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)@implements_iterator 269b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)class TokenStreamIterator(object): 270b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """The iterator for tokenstreams. Iterate over the stream 271b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) until the eof token is reached. 272b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """ 273b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 274b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def __init__(self, stream): 275b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.stream = stream 276b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 277b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def __iter__(self): 278b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return self 279b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 28058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) def __next__(self): 281b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) token = self.stream.current 282b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if token.type is TOKEN_EOF: 283b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.stream.close() 284b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) raise StopIteration() 285b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) next(self.stream) 286b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return token 287b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 288b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 28958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)@implements_iterator 290b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)class TokenStream(object): 291b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """A token stream is an iterable that yields :class:`Token`\s. The 292b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) parser however does not iterate over it but calls :meth:`next` to go 293b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) one token ahead. The current active token is stored as :attr:`current`. 294b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """ 295b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 296b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def __init__(self, generator, name, filename): 29758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) self._iter = iter(generator) 298b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self._pushed = deque() 299b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.name = name 300b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.filename = filename 301b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.closed = False 302b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.current = Token(1, TOKEN_INITIAL, '') 303b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) next(self) 304b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 305b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def __iter__(self): 306b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return TokenStreamIterator(self) 307b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 30858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) def __bool__(self): 309b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return bool(self._pushed) or self.current.type is not TOKEN_EOF 31058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) __nonzero__ = __bool__ # py2 311b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 312b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) eos = property(lambda x: not x, doc="Are we at the end of the stream?") 313b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 314b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def push(self, token): 315b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Push a token back to the stream.""" 316b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self._pushed.append(token) 317b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 318b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def look(self): 319b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Look at the next token.""" 320b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) old_token = next(self) 321b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) result = self.current 322b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.push(result) 323b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.current = old_token 324b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return result 325b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 326b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def skip(self, n=1): 327b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Got n tokens ahead.""" 32858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) for x in range(n): 329b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) next(self) 330b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 331b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def next_if(self, expr): 332b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Perform the token test and return the token if it matched. 333b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) Otherwise the return value is `None`. 334b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """ 335b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if self.current.test(expr): 336b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return next(self) 337b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 338b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def skip_if(self, expr): 339b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Like :meth:`next_if` but only returns `True` or `False`.""" 340b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return self.next_if(expr) is not None 341b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 34258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) def __next__(self): 343b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Go one token ahead and return the old one""" 344b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) rv = self.current 345b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if self._pushed: 346b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.current = self._pushed.popleft() 347b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif self.current.type is not TOKEN_EOF: 348b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) try: 34958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) self.current = next(self._iter) 350b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) except StopIteration: 351b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.close() 352b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return rv 353b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 354b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def close(self): 355b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Close the stream.""" 356b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.current = Token(self.current.lineno, TOKEN_EOF, '') 35758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) self._iter = None 358b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.closed = True 359b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 360b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def expect(self, expr): 361b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Expect a given token type and return it. This accepts the same 362b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) argument as :meth:`jinja2.lexer.Token.test`. 363b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """ 364b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if not self.current.test(expr): 365b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) expr = describe_token_expr(expr) 366b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if self.current.type is TOKEN_EOF: 367b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) raise TemplateSyntaxError('unexpected end of template, ' 368b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 'expected %r.' % expr, 369b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.current.lineno, 370b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.name, self.filename) 371b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) raise TemplateSyntaxError("expected token %r, got %r" % 372b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (expr, describe_token(self.current)), 373b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.current.lineno, 374b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.name, self.filename) 375b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) try: 376b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return self.current 377b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) finally: 378b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) next(self) 379b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 380b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 381b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)def get_lexer(environment): 382b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Return a lexer which is probably cached.""" 383b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) key = (environment.block_start_string, 384b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) environment.block_end_string, 385b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) environment.variable_start_string, 386b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) environment.variable_end_string, 387b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) environment.comment_start_string, 388b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) environment.comment_end_string, 389b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) environment.line_statement_prefix, 390b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) environment.line_comment_prefix, 391b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) environment.trim_blocks, 39258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) environment.lstrip_blocks, 39358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) environment.newline_sequence, 39458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) environment.keep_trailing_newline) 395b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) lexer = _lexer_cache.get(key) 396b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if lexer is None: 397b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) lexer = Lexer(environment) 398b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) _lexer_cache[key] = lexer 399b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return lexer 400b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 401b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 402b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)class Lexer(object): 403b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Class that implements a lexer for a given environment. Automatically 404b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) created by the environment class, usually you don't have to do that. 405b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 406b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) Note that the lexer is not automatically bound to an environment. 407b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) Multiple environments can share the same lexer. 408b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """ 409b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 410b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def __init__(self, environment): 411b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # shortcuts 412b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) c = lambda x: re.compile(x, re.M | re.S) 413b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e = re.escape 414b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 415b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # lexing rules for tags 416b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) tag_rules = [ 417b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (whitespace_re, TOKEN_WHITESPACE, None), 418b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (float_re, TOKEN_FLOAT, None), 419b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (integer_re, TOKEN_INTEGER, None), 420b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (name_re, TOKEN_NAME, None), 421b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (string_re, TOKEN_STRING, None), 422b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (operator_re, TOKEN_OPERATOR, None) 423b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ] 424b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 42558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) # assemble the root lexing rule. because "|" is ungreedy 426b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # we have to sort by length so that the lexer continues working 427b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # as expected when we have parsing rules like <% for block and 428b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # <%= for variables. (if someone wants asp like syntax) 429b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # variables are just part of the rules if variable processing 430b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # is required. 431b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) root_tag_rules = compile_rules(environment) 432b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 433b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # block suffix if trimming is enabled 434b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) block_suffix_re = environment.trim_blocks and '\\n?' or '' 435b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 43658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) # strip leading spaces if lstrip_blocks is enabled 43758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) prefix_re = {} 43858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) if environment.lstrip_blocks: 43958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) # use '{%+' to manually disable lstrip_blocks behavior 44058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) no_lstrip_re = e('+') 44158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) # detect overlap between block and variable or comment strings 44258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) block_diff = c(r'^%s(.*)' % e(environment.block_start_string)) 44358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) # make sure we don't mistake a block for a variable or a comment 44458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) m = block_diff.match(environment.comment_start_string) 44558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' 44658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) m = block_diff.match(environment.variable_start_string) 44758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' 44858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 44958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) # detect overlap between comment and variable strings 45058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string)) 45158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) m = comment_diff.match(environment.variable_start_string) 45258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) no_variable_re = m and r'(?!%s)' % e(m.group(1)) or '' 45358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 45458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) lstrip_re = r'^[ \t]*' 45558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) block_prefix_re = r'%s%s(?!%s)|%s\+?' % ( 45658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) lstrip_re, 45758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) e(environment.block_start_string), 45858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) no_lstrip_re, 45958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) e(environment.block_start_string), 46058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) ) 46158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) comment_prefix_re = r'%s%s%s|%s\+?' % ( 46258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) lstrip_re, 46358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) e(environment.comment_start_string), 46458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) no_variable_re, 46558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) e(environment.comment_start_string), 46658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) ) 46758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) prefix_re['block'] = block_prefix_re 46858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) prefix_re['comment'] = comment_prefix_re 46958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) else: 47058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) block_prefix_re = '%s' % e(environment.block_start_string) 47158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 472b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.newline_sequence = environment.newline_sequence 47358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) self.keep_trailing_newline = environment.keep_trailing_newline 474b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 475b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # global lexing rules 476b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) self.rules = { 477b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 'root': [ 478b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # directives 479b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (c('(.*?)(?:%s)' % '|'.join( 480b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % ( 481b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.block_start_string), 48258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) block_prefix_re, 483b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.block_end_string), 484b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.block_end_string) 485b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) )] + [ 48658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r)) 487b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) for n, r in root_tag_rules 488b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ])), (TOKEN_DATA, '#bygroup'), '#bygroup'), 489b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # data 490b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (c('.+'), TOKEN_DATA, None) 491b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ], 492b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # comments 493b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_COMMENT_BEGIN: [ 494b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (c(r'(.*?)((?:\-%s\s*|%s)%s)' % ( 495b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.comment_end_string), 496b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.comment_end_string), 497b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) block_suffix_re 498b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'), 499b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (c('(.)'), (Failure('Missing end of comment tag'),), None) 500b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ], 501b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # blocks 502b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_BLOCK_BEGIN: [ 503b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (c('(?:\-%s\s*|%s)%s' % ( 504b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.block_end_string), 505b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.block_end_string), 506b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) block_suffix_re 507b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) )), TOKEN_BLOCK_END, '#pop'), 508b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ] + tag_rules, 509b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # variables 510b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_VARIABLE_BEGIN: [ 511b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (c('\-%s\s*|%s' % ( 512b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.variable_end_string), 513b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.variable_end_string) 514b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) )), TOKEN_VARIABLE_END, '#pop') 515b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ] + tag_rules, 516b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # raw block 517b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_RAW_BEGIN: [ 518b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % ( 519b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.block_start_string), 52058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) block_prefix_re, 521b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.block_end_string), 522b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) e(environment.block_end_string), 523b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) block_suffix_re 524b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'), 525b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (c('(.)'), (Failure('Missing end of raw directive'),), None) 526b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ], 527b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # line statements 528b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_LINESTATEMENT_BEGIN: [ 529b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop') 530b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ] + tag_rules, 531b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # line comments 532b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_LINECOMMENT_BEGIN: [ 533b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT, 534b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) TOKEN_LINECOMMENT_END), '#pop') 535b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ] 536b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) } 537b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 538b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def _normalize_newlines(self, value): 53958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) """Called for strings and template data to normalize it to unicode.""" 540b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return newline_re.sub(self.newline_sequence, value) 541b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 542b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def tokenize(self, source, name=None, filename=None, state=None): 543b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """Calls tokeniter + tokenize and wraps it in a token stream. 544b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """ 545b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) stream = self.tokeniter(source, name, filename, state) 546b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return TokenStream(self.wrap(stream, name, filename), name, filename) 547b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 548b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def wrap(self, stream, name=None, filename=None): 549b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """This is called with the stream as returned by `tokenize` and wraps 550b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) every token in a :class:`Token` and converts the value. 551b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """ 552b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) for lineno, token, value in stream: 553b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if token in ignored_tokens: 554b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) continue 555b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif token == 'linestatement_begin': 556b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) token = 'block_begin' 557b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif token == 'linestatement_end': 558b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) token = 'block_end' 559b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # we are not interested in those tokens in the parser 560b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif token in ('raw_begin', 'raw_end'): 561b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) continue 562b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif token == 'data': 563b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) value = self._normalize_newlines(value) 564b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif token == 'keyword': 565b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) token = value 566b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif token == 'name': 567b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) value = str(value) 568b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif token == 'string': 569b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # try to unescape string 570b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) try: 571b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) value = self._normalize_newlines(value[1:-1]) \ 572b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) .encode('ascii', 'backslashreplace') \ 573b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) .decode('unicode-escape') 57458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) except Exception as e: 575b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) msg = str(e).split(':')[-1].strip() 576b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) raise TemplateSyntaxError(msg, lineno, name, filename) 577b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # if we can express it as bytestring (ascii only) 578b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # we do that for support of semi broken APIs 579b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # as datetime.datetime.strftime. On python 3 this 580b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # call becomes a noop thanks to 2to3 581b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) try: 582b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) value = str(value) 583b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) except UnicodeError: 584b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) pass 585b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif token == 'integer': 586b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) value = int(value) 587b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif token == 'float': 588b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) value = float(value) 589b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif token == 'operator': 590b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) token = operators[value] 591b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) yield Token(lineno, token, value) 592b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 593b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) def tokeniter(self, source, name, filename=None, state=None): 594b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """This method tokenizes the text and returns the tokens in a 595b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) generator. Use this method if you just want to tokenize a template. 596b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) """ 59758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) source = text_type(source) 59858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) lines = source.splitlines() 59958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) if self.keep_trailing_newline and source: 60058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) for newline in ('\r\n', '\r', '\n'): 60158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) if source.endswith(newline): 60258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) lines.append('') 60358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) break 60458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) source = '\n'.join(lines) 605b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) pos = 0 606b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) lineno = 1 607b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) stack = ['root'] 608b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if state is not None and state != 'root': 609b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) assert state in ('variable', 'block'), 'invalid state' 610b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) stack.append(state + '_begin') 611b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) else: 612b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) state = 'root' 613b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) statetokens = self.rules[stack[-1]] 614b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) source_length = len(source) 615b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 616b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) balancing_stack = [] 617b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 618b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) while 1: 619b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # tokenizer loop 620b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) for regex, tokens, new_state in statetokens: 621b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) m = regex.match(source, pos) 622b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # if no match we try again with the next rule 623b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if m is None: 624b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) continue 625b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 62658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) # we only match blocks and variables if braces / parentheses 627b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # are balanced. continue parsing with the lower rule which 628b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # is the operator rule. do this only if the end tags look 629b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # like operators 630b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if balancing_stack and \ 631b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) tokens in ('variable_end', 'block_end', 632b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 'linestatement_end'): 633b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) continue 634b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 635b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # tuples support more options 636b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if isinstance(tokens, tuple): 637b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) for idx, token in enumerate(tokens): 638b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # failure group 639b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if token.__class__ is Failure: 640b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) raise token(lineno, filename) 641b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # bygroup is a bit more complex, in that case we 642b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # yield for the current token the first named 643b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # group that matched 644b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif token == '#bygroup': 64558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) for key, value in iteritems(m.groupdict()): 646b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if value is not None: 647b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) yield lineno, key, value 648b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) lineno += value.count('\n') 649b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) break 650b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) else: 651b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) raise RuntimeError('%r wanted to resolve ' 652b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 'the token dynamically' 653b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ' but no group matched' 654b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) % regex) 655b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # normal group 656b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) else: 657b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) data = m.group(idx + 1) 658b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if data or token not in ignore_if_empty: 659b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) yield lineno, token, data 660b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) lineno += data.count('\n') 661b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 662b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # strings as token just are yielded as it. 663b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) else: 664b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) data = m.group() 665b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # update brace/parentheses balance 666b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if tokens == 'operator': 667b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if data == '{': 668b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) balancing_stack.append('}') 669b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif data == '(': 670b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) balancing_stack.append(')') 671b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif data == '[': 672b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) balancing_stack.append(']') 673b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif data in ('}', ')', ']'): 674b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if not balancing_stack: 675b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) raise TemplateSyntaxError('unexpected \'%s\'' % 676b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) data, lineno, name, 677b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) filename) 678b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) expected_op = balancing_stack.pop() 679b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if expected_op != data: 680b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) raise TemplateSyntaxError('unexpected \'%s\', ' 681b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 'expected \'%s\'' % 682b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (data, expected_op), 683b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) lineno, name, 684b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) filename) 685b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # yield items 686b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if data or tokens not in ignore_if_empty: 687b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) yield lineno, tokens, data 688b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) lineno += data.count('\n') 689b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 690b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # fetch new position into new variable so that we can check 691b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # if there is a internal parsing error which would result 692b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # in an infinite loop 693b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) pos2 = m.end() 694b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 695b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # handle state changes 696b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if new_state is not None: 697b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # remove the uppermost state 698b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if new_state == '#pop': 699b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) stack.pop() 700b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # resolve the new state by group checking 701b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif new_state == '#bygroup': 70258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) for key, value in iteritems(m.groupdict()): 703b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if value is not None: 704b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) stack.append(key) 705b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) break 706b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) else: 707b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) raise RuntimeError('%r wanted to resolve the ' 708b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 'new state dynamically but' 709b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) ' no group matched' % 710b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) regex) 711b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # direct state name given 712b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) else: 713b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) stack.append(new_state) 714b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) statetokens = self.rules[stack[-1]] 715b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # we are still at the same position and no stack change. 716b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # this means a loop without break condition, avoid that and 717b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # raise error 718b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) elif pos2 == pos: 719b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) raise RuntimeError('%r yielded empty string without ' 720b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 'stack change' % regex) 721b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # publish new function and start again 722b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) pos = pos2 723b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) break 72458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) # if loop terminated without break we haven't found a single match 725b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # either we are at the end of the file or we have a problem 726b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) else: 727b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # end of text 728b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) if pos >= source_length: 729b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) return 730b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) # something went wrong 731b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) raise TemplateSyntaxError('unexpected char %r at %d' % 732b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) (source[pos], pos), lineno, 733b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) name, filename) 734