1# cython: language_level=3, py2_import=True
2#
3#   Cython Scanner - Lexical Definitions
4#
5
6raw_prefixes = "rR"
7bytes_prefixes = "bB"
8string_prefixes = "uU" + bytes_prefixes
9char_prefixes = "cC"
10any_string_prefix = raw_prefixes + string_prefixes + char_prefixes
11IDENT = 'IDENT'
12
13def make_lexicon():
14    from Cython.Plex import \
15        Str, Any, AnyBut, AnyChar, Rep, Rep1, Opt, Bol, Eol, Eof, \
16        TEXT, IGNORE, State, Lexicon
17    from Scanning import Method
18
19    letter = Any("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_")
20    digit = Any("0123456789")
21    bindigit = Any("01")
22    octdigit = Any("01234567")
23    hexdigit = Any("0123456789ABCDEFabcdef")
24    indentation = Bol + Rep(Any(" \t"))
25
26    decimal = Rep1(digit)
27    dot = Str(".")
28    exponent = Any("Ee") + Opt(Any("+-")) + decimal
29    decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal)
30
31    name = letter + Rep(letter | digit)
32    intconst = decimal | (Str("0") + ((Any("Xx") + Rep1(hexdigit)) |
33                                      (Any("Oo") + Rep1(octdigit)) |
34                                      (Any("Bb") + Rep1(bindigit)) ))
35    intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu")))
36    intliteral = intconst + intsuffix
37    fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent)
38    imagconst = (intconst | fltconst) + Any("jJ")
39
40    beginstring = Opt(Any(string_prefixes) + Opt(Any(raw_prefixes)) |
41                      Any(raw_prefixes) + Opt(Any(bytes_prefixes)) |
42                      Any(char_prefixes)
43                      ) + (Str("'") | Str('"') | Str("'''") | Str('"""'))
44    two_oct = octdigit + octdigit
45    three_oct = octdigit + octdigit + octdigit
46    two_hex = hexdigit + hexdigit
47    four_hex = two_hex + two_hex
48    escapeseq = Str("\\") + (two_oct | three_oct |
49                             Str('N{') + Rep(AnyBut('}')) + Str('}') |
50                             Str('u') + four_hex | Str('x') + two_hex |
51                             Str('U') + four_hex + four_hex | AnyChar)
52
53    deco = Str("@")
54    bra = Any("([{")
55    ket = Any(")]}")
56    punct = Any(":,;+-*/|&<>=.%`~^?!")
57    diphthong = Str("==", "<>", "!=", "<=", ">=", "<<", ">>", "**", "//",
58                    "+=", "-=", "*=", "/=", "%=", "|=", "^=", "&=",
59                    "<<=", ">>=", "**=", "//=", "->")
60    spaces = Rep1(Any(" \t\f"))
61    escaped_newline = Str("\\\n")
62    lineterm = Eol + Opt(Str("\n"))
63
64    comment = Str("#") + Rep(AnyBut("\n"))
65
66    return Lexicon([
67        (name, IDENT),
68        (intliteral, 'INT'),
69        (fltconst, 'FLOAT'),
70        (imagconst, 'IMAG'),
71        (deco, 'DECORATOR'),
72        (punct | diphthong, TEXT),
73
74        (bra, Method('open_bracket_action')),
75        (ket, Method('close_bracket_action')),
76        (lineterm, Method('newline_action')),
77
78        (beginstring, Method('begin_string_action')),
79
80        (comment, IGNORE),
81        (spaces, IGNORE),
82        (escaped_newline, IGNORE),
83
84        State('INDENT', [
85            (comment + lineterm, Method('commentline')),
86            (Opt(spaces) + Opt(comment) + lineterm, IGNORE),
87            (indentation, Method('indentation_action')),
88            (Eof, Method('eof_action'))
89        ]),
90
91        State('SQ_STRING', [
92            (escapeseq, 'ESCAPE'),
93            (Rep1(AnyBut("'\"\n\\")), 'CHARS'),
94            (Str('"'), 'CHARS'),
95            (Str("\n"), Method('unclosed_string_action')),
96            (Str("'"), Method('end_string_action')),
97            (Eof, 'EOF')
98        ]),
99
100        State('DQ_STRING', [
101            (escapeseq, 'ESCAPE'),
102            (Rep1(AnyBut('"\n\\')), 'CHARS'),
103            (Str("'"), 'CHARS'),
104            (Str("\n"), Method('unclosed_string_action')),
105            (Str('"'), Method('end_string_action')),
106            (Eof, 'EOF')
107        ]),
108
109        State('TSQ_STRING', [
110            (escapeseq, 'ESCAPE'),
111            (Rep1(AnyBut("'\"\n\\")), 'CHARS'),
112            (Any("'\""), 'CHARS'),
113            (Str("\n"), 'NEWLINE'),
114            (Str("'''"), Method('end_string_action')),
115            (Eof, 'EOF')
116        ]),
117
118        State('TDQ_STRING', [
119            (escapeseq, 'ESCAPE'),
120            (Rep1(AnyBut('"\'\n\\')), 'CHARS'),
121            (Any("'\""), 'CHARS'),
122            (Str("\n"), 'NEWLINE'),
123            (Str('"""'), Method('end_string_action')),
124            (Eof, 'EOF')
125        ]),
126
127        (Eof, Method('eof_action'))
128        ],
129
130        # FIXME: Plex 1.9 needs different args here from Plex 1.1.4
131        #debug_flags = scanner_debug_flags,
132        #debug_file = scanner_dump_file
133        )
134
135