1c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
2c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Tokenizer implementation */
3c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
4c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "Python.h"
5c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "pgenheaders.h"
6c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
7c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include <ctype.h>
8c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include <assert.h>
9c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
10c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "tokenizer.h"
11c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "errcode.h"
12c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
13c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef PGEN
14c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "unicodeobject.h"
15c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "stringobject.h"
16c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "fileobject.h"
17c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "codecs.h"
18c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "abstract.h"
19c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "pydebug.h"
20c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif /* PGEN */
21c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
22c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielextern char *PyOS_Readline(FILE *, FILE *, char *);
23c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Return malloc'ed string including trailing \n;
24c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   empty malloc'ed string for EOF;
25c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   NULL if interrupted */
26c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
27c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Don't ever change this -- it would break the portability of Python code */
28c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#define TABSIZE 8
29c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
30c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Forward */
31c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic struct tok_state *tok_new(void);
32c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int tok_nextc(struct tok_state *tok);
33c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void tok_backup(struct tok_state *tok, int c);
34c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
35c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Token names */
36c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
37c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielchar *_PyParser_TokenNames[] = {
38c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "ENDMARKER",
39c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "NAME",
40c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "NUMBER",
41c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "STRING",
42c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "NEWLINE",
43c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "INDENT",
44c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "DEDENT",
45c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "LPAR",
46c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "RPAR",
47c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "LSQB",
48c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "RSQB",
49c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "COLON",
50c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "COMMA",
51c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "SEMI",
52c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "PLUS",
53c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "MINUS",
54c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "STAR",
55c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "SLASH",
56c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "VBAR",
57c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "AMPER",
58c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "LESS",
59c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "GREATER",
60c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "EQUAL",
61c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "DOT",
62c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "PERCENT",
63c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "BACKQUOTE",
64c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "LBRACE",
65c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "RBRACE",
66c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "EQEQUAL",
67c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "NOTEQUAL",
68c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "LESSEQUAL",
69c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "GREATEREQUAL",
70c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "TILDE",
71c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "CIRCUMFLEX",
72c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "LEFTSHIFT",
73c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "RIGHTSHIFT",
74c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "DOUBLESTAR",
75c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "PLUSEQUAL",
76c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "MINEQUAL",
77c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "STAREQUAL",
78c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "SLASHEQUAL",
79c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "PERCENTEQUAL",
80c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "AMPEREQUAL",
81c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "VBAREQUAL",
82c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "CIRCUMFLEXEQUAL",
83c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "LEFTSHIFTEQUAL",
84c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "RIGHTSHIFTEQUAL",
85c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "DOUBLESTAREQUAL",
86c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "DOUBLESLASH",
87c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "DOUBLESLASHEQUAL",
88c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "AT",
89c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* This table must match the #defines in token.h! */
90c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "OP",
91c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "<ERRORTOKEN>",
92c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    "<N_TOKENS>"
93c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel};
94c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
95c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Create and initialize a new tok_state structure */
96c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
97c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic struct tok_state *
98c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltok_new(void)
99c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
100c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    struct tok_state *tok = (struct tok_state *)PyMem_MALLOC(
101c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                                            sizeof(struct tok_state));
102c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok == NULL)
103c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NULL;
104c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
105c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->done = E_OK;
106c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->fp = NULL;
107c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->input = NULL;
108c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->tabsize = TABSIZE;
109c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->indent = 0;
110c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->indstack[0] = 0;
111c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->atbol = 1;
112c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->pendin = 0;
113c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->prompt = tok->nextprompt = NULL;
114c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->lineno = 0;
115c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->level = 0;
116c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->filename = NULL;
117c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->altwarning = 0;
118c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->alterror = 0;
119c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->alttabsize = 1;
120c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->altindstack[0] = 0;
121c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->decoding_state = 0;
122c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->decoding_erred = 0;
123c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->read_coding_spec = 0;
124c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->encoding = NULL;
125c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->cont_line = 0;
126c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef PGEN
127c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->decoding_readline = NULL;
128c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->decoding_buffer = NULL;
129c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
130c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return tok;
131c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
132c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
133c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char *
134c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielnew_string(const char *s, Py_ssize_t len)
135c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
136c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    char* result = (char *)PyMem_MALLOC(len + 1);
137c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (result != NULL) {
138c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        memcpy(result, s, len);
139c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        result[len] = '\0';
140c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
141c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return result;
142c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
143c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
144c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef PGEN
145c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
146c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char *
147c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldecoding_fgets(char *s, int size, struct tok_state *tok)
148c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
149c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return fgets(s, size, tok->fp);
150c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
151c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
152c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
153c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldecoding_feof(struct tok_state *tok)
154c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
155c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return feof(tok->fp);
156c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
157c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
158c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char *
159c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldecode_str(const char *str, int exec_input, struct tok_state *tok)
160c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
161c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return new_string(str, strlen(str));
162c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
163c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
164c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#else /* PGEN */
165c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
166c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char *
167c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielerror_ret(struct tok_state *tok) /* XXX */
168c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
169c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->decoding_erred = 1;
170c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
171c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyMem_FREE(tok->buf);
172c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->buf = NULL;
173c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return NULL;                /* as if it were EOF */
174c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
175c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
176c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
177c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char *
178c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielget_normal_name(char *s)        /* for utf-8 and latin-1 */
179c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
180c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    char buf[13];
181c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int i;
182c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    for (i = 0; i < 12; i++) {
183c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        int c = s[i];
184c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (c == '\0')
185c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            break;
186c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        else if (c == '_')
187c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            buf[i] = '-';
188c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        else
189c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            buf[i] = tolower(c);
190c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
191c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    buf[i] = '\0';
192c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (strcmp(buf, "utf-8") == 0 ||
193c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        strncmp(buf, "utf-8-", 6) == 0)
194c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return "utf-8";
195c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    else if (strcmp(buf, "latin-1") == 0 ||
196c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel             strcmp(buf, "iso-8859-1") == 0 ||
197c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel             strcmp(buf, "iso-latin-1") == 0 ||
198c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel             strncmp(buf, "latin-1-", 8) == 0 ||
199c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel             strncmp(buf, "iso-8859-1-", 11) == 0 ||
200c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel             strncmp(buf, "iso-latin-1-", 12) == 0)
201c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return "iso-8859-1";
202c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    else
203c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return s;
204c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
205c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
206c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Return the coding spec in S, or NULL if none is found.  */
207c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
208c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char *
209c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielget_coding_spec(const char *s, Py_ssize_t size)
210c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
211c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_ssize_t i;
212c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Coding spec must be in a comment, and that comment must be
213c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel     * the only statement on the source code line. */
214c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    for (i = 0; i < size - 6; i++) {
215c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (s[i] == '#')
216c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            break;
217c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
218c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return NULL;
219c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
220c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    for (; i < size - 6; i++) { /* XXX inefficient search */
221c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        const char* t = s + i;
222c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (strncmp(t, "coding", 6) == 0) {
223c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            const char* begin = NULL;
224c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            t += 6;
225c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (t[0] != ':' && t[0] != '=')
226c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                continue;
227c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            do {
228c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                t++;
229c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            } while (t[0] == '\x20' || t[0] == '\t');
230c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
231c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            begin = t;
232c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            while (Py_ISALNUM(t[0]) ||
233c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                   t[0] == '-' || t[0] == '_' || t[0] == '.')
234c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                t++;
235c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
236c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (begin < t) {
237c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                char* r = new_string(begin, t - begin);
238c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                char* q = get_normal_name(r);
239c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (r != q) {
240c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    PyMem_FREE(r);
241c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    r = new_string(q, strlen(q));
242c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
243c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                return r;
244c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
245c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
246c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
247c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return NULL;
248c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
249c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
250c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Check whether the line contains a coding spec. If it does,
251c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   invoke the set_readline function for the new encoding.
252c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   This function receives the tok_state and the new encoding.
253c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   Return 1 on success, 0 on failure.  */
254c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
255c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
256c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielcheck_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
257c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                  int set_readline(struct tok_state *, const char *))
258c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
259c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    char * cs;
260c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int r = 1;
261c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
262c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->cont_line) {
263c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        /* It's a continuation line, so it can't be a coding spec. */
264c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->read_coding_spec = 1;
265c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return 1;
266c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
267c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    cs = get_coding_spec(line, size);
268c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (!cs) {
269c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        Py_ssize_t i;
270c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        for (i = 0; i < size; i++) {
271c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (line[i] == '#' || line[i] == '\n' || line[i] == '\r')
272c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                break;
273c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (line[i] != ' ' && line[i] != '\t' && line[i] != '\014') {
274c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* Stop checking coding spec after a line containing
275c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                 * anything except a comment. */
276c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->read_coding_spec = 1;
277c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                break;
278c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
279c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
280c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    } else {
281c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->read_coding_spec = 1;
282c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (tok->encoding == NULL) {
283c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            assert(tok->decoding_state == 1); /* raw */
284c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (strcmp(cs, "utf-8") == 0 ||
285c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                strcmp(cs, "iso-8859-1") == 0) {
286c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->encoding = cs;
287c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            } else {
288c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_USING_UNICODE
289c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                r = set_readline(tok, cs);
290c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (r) {
291c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->encoding = cs;
292c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->decoding_state = -1;
293c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
294c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                else {
295c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    PyErr_Format(PyExc_SyntaxError,
296c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                                 "encoding problem: %s", cs);
297c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    PyMem_FREE(cs);
298c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
299c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#else
300c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* Without Unicode support, we cannot
301c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                   process the coding spec. Since there
302c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                   won't be any Unicode literals, that
303c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                   won't matter. */
304c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                PyMem_FREE(cs);
305c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
306c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
307c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        } else {                /* then, compare cs with BOM */
308c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            r = (strcmp(tok->encoding, cs) == 0);
309c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (!r)
310c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                PyErr_Format(PyExc_SyntaxError,
311c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                             "encoding problem: %s with BOM", cs);
312c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            PyMem_FREE(cs);
313c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
314c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
315c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return r;
316c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
317c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
318c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* See whether the file starts with a BOM. If it does,
319c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   invoke the set_readline function with the new encoding.
320c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   Return 1 on success, 0 on failure.  */
321c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
322c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
323c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielcheck_bom(int get_char(struct tok_state *),
324c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel          void unget_char(int, struct tok_state *),
325c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel          int set_readline(struct tok_state *, const char *),
326c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel          struct tok_state *tok)
327c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
328c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int ch1, ch2, ch3;
329c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    ch1 = get_char(tok);
330c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->decoding_state = 1;
331c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (ch1 == EOF) {
332c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return 1;
333c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    } else if (ch1 == 0xEF) {
334c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        ch2 = get_char(tok);
335c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (ch2 != 0xBB) {
336c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            unget_char(ch2, tok);
337c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            unget_char(ch1, tok);
338c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return 1;
339c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
340c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        ch3 = get_char(tok);
341c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (ch3 != 0xBF) {
342c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            unget_char(ch3, tok);
343c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            unget_char(ch2, tok);
344c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            unget_char(ch1, tok);
345c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return 1;
346c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
347c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#if 0
348c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Disable support for UTF-16 BOMs until a decision
349c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel       is made whether this needs to be supported.  */
350c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    } else if (ch1 == 0xFE) {
351c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        ch2 = get_char(tok);
352c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (ch2 != 0xFF) {
353c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            unget_char(ch2, tok);
354c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            unget_char(ch1, tok);
355c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return 1;
356c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
357c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (!set_readline(tok, "utf-16-be"))
358c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return 0;
359c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->decoding_state = -1;
360c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    } else if (ch1 == 0xFF) {
361c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        ch2 = get_char(tok);
362c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (ch2 != 0xFE) {
363c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            unget_char(ch2, tok);
364c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            unget_char(ch1, tok);
365c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return 1;
366c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
367c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (!set_readline(tok, "utf-16-le"))
368c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return 0;
369c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->decoding_state = -1;
370c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
371c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    } else {
372c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        unget_char(ch1, tok);
373c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return 1;
374c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
375c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->encoding != NULL)
376c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyMem_FREE(tok->encoding);
377c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->encoding = new_string("utf-8", 5);     /* resulting is in utf-8 */
378c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return 1;
379c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
380c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
381c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Read a line of text from TOK into S, using the stream in TOK.
382c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   Return NULL on failure, else S.
383c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
384c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   On entry, tok->decoding_buffer will be one of:
385c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel     1) NULL: need to call tok->decoding_readline to get a new line
386c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel     2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
387c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel       stored the result in tok->decoding_buffer
388c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel     3) PyStringObject *: previous call to fp_readl did not have enough room
389c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel       (in the s buffer) to copy entire contents of the line read
390c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel       by tok->decoding_readline.  tok->decoding_buffer has the overflow.
391c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel       In this case, fp_readl is called in a loop (with an expanded buffer)
392c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel       until the buffer ends with a '\n' (or until the end of the file is
393c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel       reached): see tok_nextc and its calls to decoding_fgets.
394c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel*/
395c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
396c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char *
397c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielfp_readl(char *s, int size, struct tok_state *tok)
398c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
399c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef Py_USING_UNICODE
400c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* In a non-Unicode built, this should never be called. */
401c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_FatalError("fp_readl should not be called in this build.");
402c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return NULL; /* Keep compiler happy (not reachable) */
403c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#else
404c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyObject* utf8 = NULL;
405c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyObject* buf = tok->decoding_buffer;
406c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    char *str;
407c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_ssize_t utf8len;
408c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
409c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Ask for one less byte so we can terminate it */
410c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    assert(size > 0);
411c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    size--;
412c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
413c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (buf == NULL) {
414c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        buf = PyObject_CallObject(tok->decoding_readline, NULL);
415c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (buf == NULL)
416c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return error_ret(tok);
417c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (!PyUnicode_Check(buf)) {
418c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            Py_DECREF(buf);
419c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            PyErr_SetString(PyExc_SyntaxError,
420c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                            "codec did not return a unicode object");
421c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return error_ret(tok);
422c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
423c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    } else {
424c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->decoding_buffer = NULL;
425c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (PyString_CheckExact(buf))
426c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            utf8 = buf;
427c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
428c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (utf8 == NULL) {
429c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        utf8 = PyUnicode_AsUTF8String(buf);
430c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        Py_DECREF(buf);
431c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (utf8 == NULL)
432c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return error_ret(tok);
433c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
434c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    str = PyString_AsString(utf8);
435c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    utf8len = PyString_GET_SIZE(utf8);
436c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (utf8len > size) {
437c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size);
438c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (tok->decoding_buffer == NULL) {
439c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            Py_DECREF(utf8);
440c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return error_ret(tok);
441c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
442c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        utf8len = size;
443c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
444c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    memcpy(s, str, utf8len);
445c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    s[utf8len] = '\0';
446c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_DECREF(utf8);
447c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (utf8len == 0)
448c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NULL; /* EOF */
449c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return s;
450c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
451c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
452c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
453c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Set the readline function for TOK to a StreamReader's
454c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   readline function. The StreamReader is named ENC.
455c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
456c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   This function is called from check_bom and check_coding_spec.
457c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
458c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   ENC is usually identical to the future value of tok->encoding,
459c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   except for the (currently unsupported) case of UTF-16.
460c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
461c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   Return 1 on success, 0 on failure. */
462c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
463c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
464c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielfp_setreadl(struct tok_state *tok, const char* enc)
465c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
466c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyObject *reader, *stream, *readline;
467c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
468c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* XXX: constify filename argument. */
469c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL);
470c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (stream == NULL)
471c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return 0;
472c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
473c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    reader = PyCodec_StreamReader(enc, stream, NULL);
474c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_DECREF(stream);
475c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (reader == NULL)
476c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return 0;
477c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
478c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    readline = PyObject_GetAttrString(reader, "readline");
479c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_DECREF(reader);
480c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (readline == NULL)
481c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return 0;
482c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
483c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->decoding_readline = readline;
484c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return 1;
485c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
486c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
487c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Fetch the next byte from TOK. */
488c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
489c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int fp_getc(struct tok_state *tok) {
490c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return getc(tok->fp);
491c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
492c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
493c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Unfetch the last byte back into TOK.  */
494c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
495c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void fp_ungetc(int c, struct tok_state *tok) {
496c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    ungetc(c, tok->fp);
497c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
498c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
499c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Read a line of input from TOK. Determine encoding
500c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   if necessary.  */
501c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
502c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char *
503c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldecoding_fgets(char *s, int size, struct tok_state *tok)
504c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
505c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    char *line = NULL;
506c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int badchar = 0;
507c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    for (;;) {
508c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (tok->decoding_state < 0) {
509c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            /* We already have a codec associated with
510c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel               this input. */
511c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            line = fp_readl(s, size, tok);
512c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            break;
513c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        } else if (tok->decoding_state > 0) {
514c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            /* We want a 'raw' read. */
515c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            line = Py_UniversalNewlineFgets(s, size,
516c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                                            tok->fp, NULL);
517c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            break;
518c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        } else {
519c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            /* We have not yet determined the encoding.
520c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel               If an encoding is found, use the file-pointer
521c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel               reader functions from now on. */
522c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
523c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                return error_ret(tok);
524c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            assert(tok->decoding_state != 0);
525c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
526c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
527c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
528c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {
529c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return error_ret(tok);
530c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
531c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
532c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef PGEN
533c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* The default encoding is ASCII, so make sure we don't have any
534c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel       non-ASCII bytes in it. */
535c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (line && !tok->encoding) {
536c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        unsigned char *c;
537c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        for (c = (unsigned char *)line; *c; c++)
538c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (*c > 127) {
539c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                badchar = *c;
540c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                break;
541c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
542c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
543c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (badchar) {
544c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        char buf[500];
545c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        /* Need to add 1 to the line number, since this line
546c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel           has not been counted, yet.  */
547c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        sprintf(buf,
548c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            "Non-ASCII character '\\x%.2x' "
549c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            "in file %.200s on line %i, "
550c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            "but no encoding declared; "
551c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            "see http://python.org/dev/peps/pep-0263/ for details",
552c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            badchar, tok->filename, tok->lineno + 1);
553c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyErr_SetString(PyExc_SyntaxError, buf);
554c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return error_ret(tok);
555c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
556c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
557c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return line;
558c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
559c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
560c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
561c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldecoding_feof(struct tok_state *tok)
562c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
563c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->decoding_state >= 0) {
564c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return feof(tok->fp);
565c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    } else {
566c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyObject* buf = tok->decoding_buffer;
567c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (buf == NULL) {
568c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            buf = PyObject_CallObject(tok->decoding_readline, NULL);
569c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (buf == NULL) {
570c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                error_ret(tok);
571c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                return 1;
572c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            } else {
573c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->decoding_buffer = buf;
574c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
575c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
576c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return PyObject_Length(buf) == 0;
577c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
578c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
579c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
580c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Fetch a byte from TOK, using the string buffer. */
581c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
582c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
583c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielbuf_getc(struct tok_state *tok) {
584c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return Py_CHARMASK(*tok->str++);
585c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
586c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
587c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Unfetch a byte from TOK, using the string buffer. */
588c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
589c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void
590c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielbuf_ungetc(int c, struct tok_state *tok) {
591c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->str--;
592c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    assert(Py_CHARMASK(*tok->str) == c);        /* tok->cur may point to read-only segment */
593c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
594c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
595c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Set the readline function for TOK to ENC. For the string-based
596c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   tokenizer, this means to just record the encoding. */
597c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
598c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
599c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielbuf_setreadl(struct tok_state *tok, const char* enc) {
600c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->enc = enc;
601c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return 1;
602c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
603c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
604c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Return a UTF-8 encoding Python string object from the
605c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   C byte string STR, which is encoded with ENC. */
606c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
607c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_USING_UNICODE
608c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic PyObject *
609c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltranslate_into_utf8(const char* str, const char* enc) {
610c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyObject *utf8;
611c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);
612c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (buf == NULL)
613c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NULL;
614c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    utf8 = PyUnicode_AsUTF8String(buf);
615c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_DECREF(buf);
616c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return utf8;
617c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
618c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
619c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
620c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
621c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char *
622c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltranslate_newlines(const char *s, int exec_input, struct tok_state *tok) {
623c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length;
624c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    char *buf, *current;
625c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    char c = '\0';
626c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    buf = PyMem_MALLOC(needed_length);
627c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (buf == NULL) {
628c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->done = E_NOMEM;
629c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NULL;
630c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
631c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    for (current = buf; *s; s++, current++) {
632c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        c = *s;
633c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (skip_next_lf) {
634c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            skip_next_lf = 0;
635c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (c == '\n') {
636c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                c = *++s;
637c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (!c)
638c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    break;
639c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
640c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
641c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (c == '\r') {
642c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            skip_next_lf = 1;
643c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            c = '\n';
644c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
645c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        *current = c;
646c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
647c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* If this is exec input, add a newline to the end of the string if
648c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel       there isn't one already. */
649c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (exec_input && c != '\n') {
650c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        *current = '\n';
651c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        current++;
652c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
653c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    *current = '\0';
654c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    final_length = current - buf + 1;
655c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (final_length < needed_length && final_length)
656c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        /* should never fail */
657c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        buf = PyMem_REALLOC(buf, final_length);
658c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return buf;
659c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
660c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
661c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Decode a byte string STR for use as the buffer of TOK.
662c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   Look for encoding declarations inside STR, and record them
663c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   inside TOK.  */
664c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
665c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic const char *
666c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldecode_str(const char *input, int single, struct tok_state *tok)
667c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
668c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyObject* utf8 = NULL;
669c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    const char *str;
670c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    const char *s;
671c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    const char *newl[2] = {NULL, NULL};
672c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int lineno = 0;
673c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->input = str = translate_newlines(input, single, tok);
674c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (str == NULL)
675c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NULL;
676c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->enc = NULL;
677c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->str = str;
678c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
679c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return error_ret(tok);
680c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    str = tok->str;             /* string after BOM if any */
681c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    assert(str);
682c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_USING_UNICODE
683c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->enc != NULL) {
684c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        utf8 = translate_into_utf8(str, tok->enc);
685c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (utf8 == NULL)
686c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return error_ret(tok);
687c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        str = PyString_AsString(utf8);
688c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
689c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
690c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    for (s = str;; s++) {
691c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (*s == '\0') break;
692c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        else if (*s == '\n') {
693c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            assert(lineno < 2);
694c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            newl[lineno] = s;
695c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            lineno++;
696c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (lineno == 2) break;
697c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
698c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
699c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->enc = NULL;
700c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* need to check line 1 and 2 separately since check_coding_spec
701c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel       assumes a single line as input */
702c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (newl[0]) {
703c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
704c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return error_ret(tok);
705c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (tok->enc == NULL && !tok->read_coding_spec && newl[1]) {
706c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
707c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                                   tok, buf_setreadl))
708c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                return error_ret(tok);
709c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
710c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
711c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_USING_UNICODE
712c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->enc != NULL) {
713c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        assert(utf8 == NULL);
714c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        utf8 = translate_into_utf8(str, tok->enc);
715c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (utf8 == NULL)
716c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return error_ret(tok);
717c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        str = PyString_AsString(utf8);
718c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
719c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
720c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    assert(tok->decoding_buffer == NULL);
721c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->decoding_buffer = utf8; /* CAUTION */
722c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return str;
723c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
724c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
725c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif /* PGEN */
726c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
727c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Set up tokenizer for string */
728c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
729c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstruct tok_state *
730c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyTokenizer_FromString(const char *str, int exec_input)
731c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
732c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    struct tok_state *tok = tok_new();
733c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok == NULL)
734c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NULL;
735c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    str = (char *)decode_str(str, exec_input, tok);
736c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (str == NULL) {
737c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyTokenizer_Free(tok);
738c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NULL;
739c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
740c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
741c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* XXX: constify members. */
742c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
743c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return tok;
744c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
745c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
746c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
747c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Set up tokenizer for file */
748c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
749c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstruct tok_state *
750c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2)
751c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
752c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    struct tok_state *tok = tok_new();
753c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok == NULL)
754c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NULL;
755c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) {
756c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyTokenizer_Free(tok);
757c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NULL;
758c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
759c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->cur = tok->inp = tok->buf;
760c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->end = tok->buf + BUFSIZ;
761c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->fp = fp;
762c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->prompt = ps1;
763c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->nextprompt = ps2;
764c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return tok;
765c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
766c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
767c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
768c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Free a tok_state structure */
769c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
770c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielvoid
771c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyTokenizer_Free(struct tok_state *tok)
772c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
773c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->encoding != NULL)
774c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyMem_FREE(tok->encoding);
775c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef PGEN
776c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_XDECREF(tok->decoding_readline);
777c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_XDECREF(tok->decoding_buffer);
778c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
779c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->fp != NULL && tok->buf != NULL)
780c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyMem_FREE(tok->buf);
781c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->input)
782c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyMem_FREE((char *)tok->input);
783c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyMem_FREE(tok);
784c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
785c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
786c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#if !defined(PGEN) && defined(Py_USING_UNICODE)
787c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
788c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltok_stdin_decode(struct tok_state *tok, char **inp)
789c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
790c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyObject *enc, *sysstdin, *decoded, *utf8;
791c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    const char *encoding;
792c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    char *converted;
793c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
794c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (PySys_GetFile((char *)"stdin", NULL) != stdin)
795c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return 0;
796c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    sysstdin = PySys_GetObject("stdin");
797c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (sysstdin == NULL || !PyFile_Check(sysstdin))
798c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return 0;
799c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
800c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    enc = ((PyFileObject *)sysstdin)->f_encoding;
801c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (enc == NULL || !PyString_Check(enc))
802c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return 0;
803c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_INCREF(enc);
804c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
805c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    encoding = PyString_AsString(enc);
806c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL);
807c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (decoded == NULL)
808c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        goto error_clear;
809c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
810c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL);
811c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_DECREF(decoded);
812c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (utf8 == NULL)
813c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        goto error_clear;
814c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
815c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    assert(PyString_Check(utf8));
816c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    converted = new_string(PyString_AS_STRING(utf8),
817c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                           PyString_GET_SIZE(utf8));
818c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_DECREF(utf8);
819c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (converted == NULL)
820c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        goto error_nomem;
821c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
822c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyMem_FREE(*inp);
823c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    *inp = converted;
824c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->encoding != NULL)
825c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyMem_FREE(tok->encoding);
826c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->encoding = new_string(encoding, strlen(encoding));
827c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->encoding == NULL)
828c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        goto error_nomem;
829c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
830c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_DECREF(enc);
831c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return 0;
832c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
833c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielerror_nomem:
834c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_DECREF(enc);
835c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->done = E_NOMEM;
836c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return -1;
837c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
838c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielerror_clear:
839c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    Py_DECREF(enc);
840c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (!PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
841c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->done = E_ERROR;
842c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return -1;
843c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
844c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Fallback to iso-8859-1: for backward compatibility */
845c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyErr_Clear();
846c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return 0;
847c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
848c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
849c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
850c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Get next char, updating state; error code goes into tok->done */
851c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
852c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
853c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltok_nextc(register struct tok_state *tok)
854c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
855c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    for (;;) {
856c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (tok->cur != tok->inp) {
857c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return Py_CHARMASK(*tok->cur++); /* Fast path */
858c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
859c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (tok->done != E_OK)
860c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return EOF;
861c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (tok->fp == NULL) {
862c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            char *end = strchr(tok->inp, '\n');
863c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (end != NULL)
864c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                end++;
865c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else {
866c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                end = strchr(tok->inp, '\0');
867c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (end == tok->inp) {
868c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_EOF;
869c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    return EOF;
870c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
871c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
872c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (tok->start == NULL)
873c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->buf = tok->cur;
874c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            tok->line_start = tok->cur;
875c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            tok->lineno++;
876c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            tok->inp = end;
877c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return Py_CHARMASK(*tok->cur++);
878c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
879c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (tok->prompt != NULL) {
880c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
881c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (tok->nextprompt != NULL)
882c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->prompt = tok->nextprompt;
883c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (newtok == NULL)
884c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->done = E_INTR;
885c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else if (*newtok == '\0') {
886c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                PyMem_FREE(newtok);
887c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->done = E_EOF;
888c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
889c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#if !defined(PGEN) && defined(Py_USING_UNICODE)
890c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else if (tok_stdin_decode(tok, &newtok) != 0)
891c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                PyMem_FREE(newtok);
892c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
893c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else if (tok->start != NULL) {
894c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                size_t start = tok->start - tok->buf;
895c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                size_t oldlen = tok->cur - tok->buf;
896c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                size_t newlen = oldlen + strlen(newtok);
897c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                char *buf = tok->buf;
898c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                buf = (char *)PyMem_REALLOC(buf, newlen+1);
899c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->lineno++;
900c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (buf == NULL) {
901c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    PyMem_FREE(tok->buf);
902c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->buf = NULL;
903c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    PyMem_FREE(newtok);
904c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_NOMEM;
905c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    return EOF;
906c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
907c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->buf = buf;
908c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->cur = tok->buf + oldlen;
909c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->line_start = tok->cur;
910c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                strcpy(tok->buf + oldlen, newtok);
911c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                PyMem_FREE(newtok);
912c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->inp = tok->buf + newlen;
913c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->end = tok->inp + 1;
914c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->start = tok->buf + start;
915c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
916c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else {
917c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->lineno++;
918c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (tok->buf != NULL)
919c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    PyMem_FREE(tok->buf);
920c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->buf = newtok;
921c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->line_start = tok->buf;
922c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->cur = tok->buf;
923c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->line_start = tok->buf;
924c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->inp = strchr(tok->buf, '\0');
925c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->end = tok->inp + 1;
926c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
927c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
928c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        else {
929c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            int done = 0;
930c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            Py_ssize_t cur = 0;
931c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            char *pt;
932c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (tok->start == NULL) {
933c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (tok->buf == NULL) {
934c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->buf = (char *)
935c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        PyMem_MALLOC(BUFSIZ);
936c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    if (tok->buf == NULL) {
937c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        tok->done = E_NOMEM;
938c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        return EOF;
939c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    }
940c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->end = tok->buf + BUFSIZ;
941c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
942c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
943c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                          tok) == NULL) {
944c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_EOF;
945c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    done = 1;
946c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
947c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                else {
948c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_OK;
949c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->inp = strchr(tok->buf, '\0');
950c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    done = tok->inp[-1] == '\n';
951c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
952c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
953c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else {
954c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                cur = tok->cur - tok->buf;
955c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (decoding_feof(tok)) {
956c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_EOF;
957c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    done = 1;
958c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
959c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                else
960c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_OK;
961c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
962c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            tok->lineno++;
963c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            /* Read until '\n' or EOF */
964c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            while (!done) {
965c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                Py_ssize_t curstart = tok->start == NULL ? -1 :
966c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                          tok->start - tok->buf;
967c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                Py_ssize_t curvalid = tok->inp - tok->buf;
968c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                Py_ssize_t newsize = curvalid + BUFSIZ;
969c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                char *newbuf = tok->buf;
970c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                newbuf = (char *)PyMem_REALLOC(newbuf,
971c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                                               newsize);
972c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (newbuf == NULL) {
973c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_NOMEM;
974c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->cur = tok->inp;
975c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    return EOF;
976c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
977c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->buf = newbuf;
978c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->inp = tok->buf + curvalid;
979c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->end = tok->buf + newsize;
980c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->start = curstart < 0 ? NULL :
981c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                         tok->buf + curstart;
982c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (decoding_fgets(tok->inp,
983c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                               (int)(tok->end - tok->inp),
984c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                               tok) == NULL) {
985c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    /* Break out early on decoding
986c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                       errors, as tok->buf will be NULL
987c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                     */
988c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    if (tok->decoding_erred)
989c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        return EOF;
990c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    /* Last line does not end in \n,
991c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                       fake one */
992c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    strcpy(tok->inp, "\n");
993c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
994c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->inp = strchr(tok->inp, '\0');
995c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                done = tok->inp[-1] == '\n';
996c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
997c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (tok->buf != NULL) {
998c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->cur = tok->buf + cur;
999c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->line_start = tok->cur;
1000c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* replace "\r\n" with "\n" */
1001c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* For Mac leave the \r, giving a syntax error */
1002c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                pt = tok->inp - 2;
1003c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (pt >= tok->buf && *pt == '\r') {
1004c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    *pt++ = '\n';
1005c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    *pt = '\0';
1006c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->inp = pt;
1007c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1008c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1009c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1010c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (tok->done != E_OK) {
1011c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (tok->prompt != NULL)
1012c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                PySys_WriteStderr("\n");
1013c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            tok->cur = tok->inp;
1014c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return EOF;
1015c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1016c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1017c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /*NOTREACHED*/
1018c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
1019c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1020c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1021c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Back-up one character */
1022c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1023c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void
1024c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltok_backup(register struct tok_state *tok, register int c)
1025c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
1026c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (c != EOF) {
1027c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (--tok->cur < tok->buf)
1028c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            Py_FatalError("tok_backup: beginning of buffer");
1029c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (*tok->cur != c)
1030c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            *tok->cur = c;
1031c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1032c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
1033c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1034c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1035c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Return the token corresponding to a single character */
1036c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1037c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielint
1038c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyToken_OneChar(int c)
1039c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
1040c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    switch (c) {
1041c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '(':           return LPAR;
1042c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case ')':           return RPAR;
1043c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '[':           return LSQB;
1044c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case ']':           return RSQB;
1045c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case ':':           return COLON;
1046c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case ',':           return COMMA;
1047c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case ';':           return SEMI;
1048c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '+':           return PLUS;
1049c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '-':           return MINUS;
1050c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '*':           return STAR;
1051c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '/':           return SLASH;
1052c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '|':           return VBAR;
1053c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '&':           return AMPER;
1054c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '<':           return LESS;
1055c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '>':           return GREATER;
1056c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '=':           return EQUAL;
1057c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '.':           return DOT;
1058c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '%':           return PERCENT;
1059c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '`':           return BACKQUOTE;
1060c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '{':           return LBRACE;
1061c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '}':           return RBRACE;
1062c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '^':           return CIRCUMFLEX;
1063c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '~':           return TILDE;
1064c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '@':       return AT;
1065c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    default:            return OP;
1066c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1067c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
1068c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1069c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1070c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielint
1071c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyToken_TwoChars(int c1, int c2)
1072c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
1073c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    switch (c1) {
1074c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '=':
1075c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1076c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '=':               return EQEQUAL;
1077c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1078c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1079c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '!':
1080c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1081c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '=':               return NOTEQUAL;
1082c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1083c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1084c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '<':
1085c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1086c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '>':               return NOTEQUAL;
1087c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '=':               return LESSEQUAL;
1088c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '<':               return LEFTSHIFT;
1089c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1090c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1091c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '>':
1092c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1093c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '=':               return GREATEREQUAL;
1094c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '>':               return RIGHTSHIFT;
1095c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1096c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1097c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '+':
1098c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1099c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '=':               return PLUSEQUAL;
1100c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1101c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1102c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '-':
1103c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1104c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '=':               return MINEQUAL;
1105c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1106c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1107c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '*':
1108c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1109c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '*':               return DOUBLESTAR;
1110c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '=':               return STAREQUAL;
1111c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1112c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1113c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '/':
1114c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1115c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '/':               return DOUBLESLASH;
1116c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '=':               return SLASHEQUAL;
1117c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1118c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1119c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '|':
1120c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1121c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '=':               return VBAREQUAL;
1122c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1123c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1124c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '%':
1125c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1126c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '=':               return PERCENTEQUAL;
1127c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1128c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1129c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '&':
1130c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1131c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '=':               return AMPEREQUAL;
1132c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1133c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1134c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '^':
1135c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1136c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '=':               return CIRCUMFLEXEQUAL;
1137c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1138c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1139c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1140c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return OP;
1141c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
1142c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1143c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielint
1144c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyToken_ThreeChars(int c1, int c2, int c3)
1145c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
1146c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    switch (c1) {
1147c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '<':
1148c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1149c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '<':
1150c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            switch (c3) {
1151c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            case '=':
1152c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                return LEFTSHIFTEQUAL;
1153c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1154c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            break;
1155c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1156c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1157c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '>':
1158c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1159c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '>':
1160c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            switch (c3) {
1161c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            case '=':
1162c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                return RIGHTSHIFTEQUAL;
1163c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1164c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            break;
1165c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1166c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1167c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '*':
1168c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1169c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '*':
1170c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            switch (c3) {
1171c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            case '=':
1172c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                return DOUBLESTAREQUAL;
1173c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1174c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            break;
1175c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1176c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1177c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '/':
1178c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c2) {
1179c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case '/':
1180c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            switch (c3) {
1181c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            case '=':
1182c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                return DOUBLESLASHEQUAL;
1183c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1184c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            break;
1185c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1186c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1187c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1188c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return OP;
1189c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
1190c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1191c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
1192c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielindenterror(struct tok_state *tok)
1193c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
1194c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->alterror) {
1195c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->done = E_TABSPACE;
1196c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->cur = tok->inp;
1197c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return 1;
1198c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1199c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->altwarning) {
1200c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
1201c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                          "in indentation\n", tok->filename);
1202c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->altwarning = 0;
1203c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1204c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return 0;
1205c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
1206c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1207c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Get next token, after space stripping etc. */
1208c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1209c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
1210c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltok_get(register struct tok_state *tok, char **p_start, char **p_end)
1211c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
1212c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    register int c;
1213c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int blankline;
1214c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1215c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    *p_start = *p_end = NULL;
1216c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel  nextline:
1217c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->start = NULL;
1218c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    blankline = 0;
1219c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1220c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Get indentation level */
1221c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->atbol) {
1222c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        register int col = 0;
1223c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        register int altcol = 0;
1224c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->atbol = 0;
1225c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        for (;;) {
1226c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            c = tok_nextc(tok);
1227c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (c == ' ')
1228c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                col++, altcol++;
1229c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else if (c == '\t') {
1230c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                col = (col/tok->tabsize + 1) * tok->tabsize;
1231c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                altcol = (altcol/tok->alttabsize + 1)
1232c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    * tok->alttabsize;
1233c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1234c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else if (c == '\014') /* Control-L (formfeed) */
1235c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                col = altcol = 0; /* For Emacs users */
1236c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else
1237c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                break;
1238c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1239c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok_backup(tok, c);
1240c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (c == '#' || c == '\n') {
1241c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            /* Lines with only whitespace and/or comments
1242c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel               shouldn't affect the indentation and are
1243c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel               not passed to the parser as NEWLINE tokens,
1244c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel               except *totally* empty lines in interactive
1245c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel               mode, which signal the end of a command group. */
1246c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (col == 0 && c == '\n' && tok->prompt != NULL)
1247c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                blankline = 0; /* Let it through */
1248c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else
1249c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                blankline = 1; /* Ignore completely */
1250c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            /* We can't jump back right here since we still
1251c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel               may need to skip to the end of a comment */
1252c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1253c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (!blankline && tok->level == 0) {
1254c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (col == tok->indstack[tok->indent]) {
1255c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* No change */
1256c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (altcol != tok->altindstack[tok->indent]) {
1257c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    if (indenterror(tok))
1258c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        return ERRORTOKEN;
1259c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1260c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1261c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else if (col > tok->indstack[tok->indent]) {
1262c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* Indent -- always one */
1263c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (tok->indent+1 >= MAXINDENT) {
1264c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_TOODEEP;
1265c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->cur = tok->inp;
1266c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    return ERRORTOKEN;
1267c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1268c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (altcol <= tok->altindstack[tok->indent]) {
1269c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    if (indenterror(tok))
1270c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        return ERRORTOKEN;
1271c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1272c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->pendin++;
1273c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->indstack[++tok->indent] = col;
1274c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->altindstack[tok->indent] = altcol;
1275c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1276c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else /* col < tok->indstack[tok->indent] */ {
1277c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* Dedent -- any number, must be consistent */
1278c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                while (tok->indent > 0 &&
1279c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    col < tok->indstack[tok->indent]) {
1280c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->pendin--;
1281c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->indent--;
1282c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1283c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (col != tok->indstack[tok->indent]) {
1284c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_DEDENT;
1285c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->cur = tok->inp;
1286c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    return ERRORTOKEN;
1287c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1288c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (altcol != tok->altindstack[tok->indent]) {
1289c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    if (indenterror(tok))
1290c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        return ERRORTOKEN;
1291c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1292c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1293c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1294c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1295c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1296c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->start = tok->cur;
1297c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1298c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Return pending indents/dedents */
1299c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->pendin != 0) {
1300c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (tok->pendin < 0) {
1301c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            tok->pendin++;
1302c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return DEDENT;
1303c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1304c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        else {
1305c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            tok->pendin--;
1306c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return INDENT;
1307c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1308c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1309c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1310c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel again:
1311c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->start = NULL;
1312c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Skip spaces */
1313c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    do {
1314c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        c = tok_nextc(tok);
1315c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    } while (c == ' ' || c == '\t' || c == '\014');
1316c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1317c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Set start of current token */
1318c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    tok->start = tok->cur - 1;
1319c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1320c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Skip comment, while looking for tab-setting magic */
1321c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (c == '#') {
1322c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        static char *tabforms[] = {
1323c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            "tab-width:",                       /* Emacs */
1324c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            ":tabstop=",                        /* vim, full form */
1325c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            ":ts=",                             /* vim, abbreviated form */
1326c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            "set tabsize=",                     /* will vi never die? */
1327c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        /* more templates can be added here to support other editors */
1328c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        };
1329c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        char cbuf[80];
1330c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        char *tp, **cp;
1331c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tp = cbuf;
1332c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        do {
1333c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            *tp++ = c = tok_nextc(tok);
1334c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        } while (c != EOF && c != '\n' &&
1335c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                 (size_t)(tp - cbuf + 1) < sizeof(cbuf));
1336c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        *tp = '\0';
1337c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        for (cp = tabforms;
1338c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel             cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
1339c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel             cp++) {
1340c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if ((tp = strstr(cbuf, *cp))) {
1341c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                int newsize = atoi(tp + strlen(*cp));
1342c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1343c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (newsize >= 1 && newsize <= 40) {
1344c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->tabsize = newsize;
1345c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    if (Py_VerboseFlag)
1346c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        PySys_WriteStderr(
1347c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        "Tab size set to %d\n",
1348c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        newsize);
1349c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1350c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1351c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1352c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        while (c != EOF && c != '\n')
1353c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            c = tok_nextc(tok);
1354c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1355c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1356c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Check for EOF and errors now */
1357c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (c == EOF) {
1358c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
1359c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1360c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1361c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Identifier (most frequent token!) */
1362c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (Py_ISALPHA(c) || c == '_') {
1363c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        /* Process r"", u"" and ur"" */
1364c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        switch (c) {
1365c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case 'b':
1366c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case 'B':
1367c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            c = tok_nextc(tok);
1368c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (c == 'r' || c == 'R')
1369c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                c = tok_nextc(tok);
1370c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (c == '"' || c == '\'')
1371c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                goto letter_quote;
1372c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            break;
1373c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case 'r':
1374c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case 'R':
1375c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            c = tok_nextc(tok);
1376c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (c == '"' || c == '\'')
1377c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                goto letter_quote;
1378c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            break;
1379c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case 'u':
1380c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        case 'U':
1381c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            c = tok_nextc(tok);
1382c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (c == 'r' || c == 'R')
1383c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                c = tok_nextc(tok);
1384c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (c == '"' || c == '\'')
1385c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                goto letter_quote;
1386c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            break;
1387c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1388c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        while (c != EOF && (Py_ISALNUM(c) || c == '_')) {
1389c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            c = tok_nextc(tok);
1390c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1391c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok_backup(tok, c);
1392c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        *p_start = tok->start;
1393c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        *p_end = tok->cur;
1394c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NAME;
1395c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1396c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1397c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Newline */
1398c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (c == '\n') {
1399c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->atbol = 1;
1400c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (blankline || tok->level > 0)
1401c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            goto nextline;
1402c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        *p_start = tok->start;
1403c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        *p_end = tok->cur - 1; /* Leave '\n' out of the string */
1404c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->cont_line = 0;
1405c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NEWLINE;
1406c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1407c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1408c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Period or number starting with period? */
1409c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (c == '.') {
1410c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        c = tok_nextc(tok);
1411c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (isdigit(c)) {
1412c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            goto fraction;
1413c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1414c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        else {
1415c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            tok_backup(tok, c);
1416c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            *p_start = tok->start;
1417c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            *p_end = tok->cur;
1418c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return DOT;
1419c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1420c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1421c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1422c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Number */
1423c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (isdigit(c)) {
1424c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (c == '0') {
1425c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            /* Hex, octal or binary -- maybe. */
1426c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            c = tok_nextc(tok);
1427c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (c == '.')
1428c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                goto fraction;
1429c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef WITHOUT_COMPLEX
1430c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (c == 'j' || c == 'J')
1431c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                goto imaginary;
1432c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
1433c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (c == 'x' || c == 'X') {
1434c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1435c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* Hex */
1436c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                c = tok_nextc(tok);
1437c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (!isxdigit(c)) {
1438c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_TOKEN;
1439c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok_backup(tok, c);
1440c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    return ERRORTOKEN;
1441c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1442c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                do {
1443c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    c = tok_nextc(tok);
1444c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                } while (isxdigit(c));
1445c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1446c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else if (c == 'o' || c == 'O') {
1447c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* Octal */
1448c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                c = tok_nextc(tok);
1449c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (c < '0' || c >= '8') {
1450c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_TOKEN;
1451c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok_backup(tok, c);
1452c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    return ERRORTOKEN;
1453c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1454c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                do {
1455c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    c = tok_nextc(tok);
1456c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                } while ('0' <= c && c < '8');
1457c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1458c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else if (c == 'b' || c == 'B') {
1459c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* Binary */
1460c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                c = tok_nextc(tok);
1461c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (c != '0' && c != '1') {
1462c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_TOKEN;
1463c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok_backup(tok, c);
1464c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    return ERRORTOKEN;
1465c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1466c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                do {
1467c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    c = tok_nextc(tok);
1468c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                } while (c == '0' || c == '1');
1469c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1470c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else {
1471c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                int found_decimal = 0;
1472c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* Octal; c is first char of it */
1473c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* There's no 'isoctdigit' macro, sigh */
1474c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                while ('0' <= c && c < '8') {
1475c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    c = tok_nextc(tok);
1476c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1477c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (isdigit(c)) {
1478c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    found_decimal = 1;
1479c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    do {
1480c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        c = tok_nextc(tok);
1481c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    } while (isdigit(c));
1482c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1483c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (c == '.')
1484c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    goto fraction;
1485c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                else if (c == 'e' || c == 'E')
1486c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    goto exponent;
1487c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef WITHOUT_COMPLEX
1488c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                else if (c == 'j' || c == 'J')
1489c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    goto imaginary;
1490c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
1491c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                else if (found_decimal) {
1492c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_TOKEN;
1493c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok_backup(tok, c);
1494c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    return ERRORTOKEN;
1495c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1496c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1497c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (c == 'l' || c == 'L')
1498c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                c = tok_nextc(tok);
1499c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1500c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        else {
1501c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            /* Decimal */
1502c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            do {
1503c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                c = tok_nextc(tok);
1504c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            } while (isdigit(c));
1505c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (c == 'l' || c == 'L')
1506c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                c = tok_nextc(tok);
1507c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else {
1508c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* Accept floating point numbers. */
1509c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (c == '.') {
1510c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        fraction:
1511c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    /* Fraction */
1512c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    do {
1513c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        c = tok_nextc(tok);
1514c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    } while (isdigit(c));
1515c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1516c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (c == 'e' || c == 'E') {
1517c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    int e;
1518c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                  exponent:
1519c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    e = c;
1520c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    /* Exponent part */
1521c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    c = tok_nextc(tok);
1522c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    if (c == '+' || c == '-') {
1523c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        c = tok_nextc(tok);
1524c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        if (!isdigit(c)) {
1525c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                            tok->done = E_TOKEN;
1526c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                            tok_backup(tok, c);
1527c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                            return ERRORTOKEN;
1528c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        }
1529c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    } else if (!isdigit(c)) {
1530c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        tok_backup(tok, c);
1531c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        tok_backup(tok, e);
1532c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        *p_start = tok->start;
1533c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        *p_end = tok->cur;
1534c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        return NUMBER;
1535c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    }
1536c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    do {
1537c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        c = tok_nextc(tok);
1538c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    } while (isdigit(c));
1539c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1540c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef WITHOUT_COMPLEX
1541c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (c == 'j' || c == 'J')
1542c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    /* Imaginary part */
1543c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        imaginary:
1544c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    c = tok_nextc(tok);
1545c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
1546c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1547c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1548c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok_backup(tok, c);
1549c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        *p_start = tok->start;
1550c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        *p_end = tok->cur;
1551c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NUMBER;
1552c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1553c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1554c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel  letter_quote:
1555c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* String */
1556c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (c == '\'' || c == '"') {
1557c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        Py_ssize_t quote2 = tok->cur - tok->start + 1;
1558c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        int quote = c;
1559c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        int triple = 0;
1560c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        int tripcount = 0;
1561c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        for (;;) {
1562c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            c = tok_nextc(tok);
1563c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (c == '\n') {
1564c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (!triple) {
1565c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_EOLS;
1566c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok_backup(tok, c);
1567c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    return ERRORTOKEN;
1568c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1569c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tripcount = 0;
1570c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->cont_line = 1; /* multiline string. */
1571c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1572c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else if (c == EOF) {
1573c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (triple)
1574c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_EOFS;
1575c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                else
1576c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_EOLS;
1577c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok->cur = tok->inp;
1578c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                return ERRORTOKEN;
1579c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1580c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else if (c == quote) {
1581c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tripcount++;
1582c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (tok->cur - tok->start == quote2) {
1583c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    c = tok_nextc(tok);
1584c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    if (c == quote) {
1585c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        triple = 1;
1586c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        tripcount = 0;
1587c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        continue;
1588c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    }
1589c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok_backup(tok, c);
1590c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1591c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (!triple || tripcount == 3)
1592c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    break;
1593c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1594c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else if (c == '\\') {
1595c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tripcount = 0;
1596c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                c = tok_nextc(tok);
1597c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (c == EOF) {
1598c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->done = E_EOLS;
1599c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    tok->cur = tok->inp;
1600c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    return ERRORTOKEN;
1601c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1602c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1603c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else
1604c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tripcount = 0;
1605c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1606c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        *p_start = tok->start;
1607c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        *p_end = tok->cur;
1608c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return STRING;
1609c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1610c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1611c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Line continuation */
1612c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (c == '\\') {
1613c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        c = tok_nextc(tok);
1614c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (c != '\n') {
1615c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            tok->done = E_LINECONT;
1616c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            tok->cur = tok->inp;
1617c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return ERRORTOKEN;
1618c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1619c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->cont_line = 1;
1620c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        goto again; /* Read next line */
1621c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1622c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1623c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Check for two-character token */
1624c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    {
1625c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        int c2 = tok_nextc(tok);
1626c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        int token = PyToken_TwoChars(c, c2);
1627c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef PGEN
1628c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (Py_Py3kWarningFlag && token == NOTEQUAL && c == '<') {
1629c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (PyErr_WarnExplicit(PyExc_DeprecationWarning,
1630c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                                   "<> not supported in 3.x; use !=",
1631c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                                   tok->filename, tok->lineno,
1632c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                                   NULL, NULL)) {
1633c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                return ERRORTOKEN;
1634c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1635c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1636c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
1637c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (token != OP) {
1638c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            int c3 = tok_nextc(tok);
1639c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            int token3 = PyToken_ThreeChars(c, c2, c3);
1640c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (token3 != OP) {
1641c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                token = token3;
1642c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            } else {
1643c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                tok_backup(tok, c3);
1644c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1645c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            *p_start = tok->start;
1646c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            *p_end = tok->cur;
1647c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return token;
1648c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1649c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok_backup(tok, c2);
1650c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1651c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1652c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Keep track of parentheses nesting level */
1653c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    switch (c) {
1654c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '(':
1655c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '[':
1656c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '{':
1657c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->level++;
1658c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1659c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case ')':
1660c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case ']':
1661c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    case '}':
1662c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->level--;
1663c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        break;
1664c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1665c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1666c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Punctuation character */
1667c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    *p_start = tok->start;
1668c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    *p_end = tok->cur;
1669c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return PyToken_OneChar(c);
1670c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
1671c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1672c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielint
1673c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
1674c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
1675c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int result = tok_get(tok, p_start, p_end);
1676c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->decoding_erred) {
1677c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        result = ERRORTOKEN;
1678c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        tok->done = E_DECODE;
1679c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1680c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return result;
1681c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
1682c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1683c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* This function is only called from parsetok. However, it cannot live
1684c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   there, as it must be empty for PGEN, and we can check for PGEN only
1685c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel   in this file. */
1686c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1687c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#if defined(PGEN) || !defined(Py_USING_UNICODE)
1688c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielchar*
1689c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
1690c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
1691c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return NULL;
1692c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
1693c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#else
1694c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_USING_UNICODE
1695c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic PyObject *
1696c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldec_utf8(const char *enc, const char *text, size_t len) {
1697c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyObject *ret = NULL;
1698c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
1699c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (unicode_text) {
1700c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
1701c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        Py_DECREF(unicode_text);
1702c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1703c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (!ret) {
1704c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyErr_Clear();
1705c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1706c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return ret;
1707c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
1708c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielchar *
1709c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
1710c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
1711c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    char *text = NULL;
1712c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (tok->encoding) {
1713c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        /* convert source to original encondig */
1714c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
1715c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (lineobj != NULL) {
1716c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            int linelen = PyString_Size(lineobj);
1717c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            const char *line = PyString_AsString(lineobj);
1718c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            text = PyObject_MALLOC(linelen + 1);
1719c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (text != NULL && line != NULL) {
1720c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (linelen)
1721c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    strncpy(text, line, linelen);
1722c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                text[linelen] = '\0';
1723c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1724c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            Py_DECREF(lineobj);
1725c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1726c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            /* adjust error offset */
1727c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (*offset > 1) {
1728c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                PyObject *offsetobj = dec_utf8(tok->encoding,
1729c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                                               tok->buf, *offset-1);
1730c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (offsetobj) {
1731c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    *offset = PyString_Size(offsetobj) + 1;
1732c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    Py_DECREF(offsetobj);
1733c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
1734c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
1735c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1736c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
1737c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
1738c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return text;
1739c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1740c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
1741c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif /* defined(Py_USING_UNICODE) */
1742c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
1743c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1744c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1745c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_DEBUG
1746c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1747c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielvoid
1748c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltok_dump(int type, char *start, char *end)
1749c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
1750c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    printf("%s", _PyParser_TokenNames[type]);
1751c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (type == NAME || type == NUMBER || type == STRING || type == OP)
1752c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        printf("(%.*s)", (int)(end - start), start);
1753c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
1754c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
1755c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
1756