1c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 2c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Tokenizer implementation */ 3c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 4c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "Python.h" 5c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "pgenheaders.h" 6c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 7c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include <ctype.h> 8c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include <assert.h> 9c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 10c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "tokenizer.h" 11c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "errcode.h" 12c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 13c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef PGEN 14c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "unicodeobject.h" 15c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "stringobject.h" 16c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "fileobject.h" 17c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "codecs.h" 18c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "abstract.h" 19c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "pydebug.h" 20c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif /* PGEN */ 21c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 22c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielextern char *PyOS_Readline(FILE *, FILE *, char *); 23c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Return malloc'ed string including trailing \n; 24c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel empty malloc'ed string for EOF; 25c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel NULL if interrupted */ 26c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 27c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Don't ever change this -- it would break the portability of Python code */ 28c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#define TABSIZE 8 29c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 30c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Forward */ 31c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic struct tok_state *tok_new(void); 32c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int tok_nextc(struct tok_state *tok); 33c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void tok_backup(struct tok_state *tok, int c); 34c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 35c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Token names */ 36c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 37c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielchar *_PyParser_TokenNames[] = { 38c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "ENDMARKER", 39c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "NAME", 40c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "NUMBER", 41c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "STRING", 42c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "NEWLINE", 43c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "INDENT", 44c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "DEDENT", 45c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "LPAR", 46c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "RPAR", 47c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "LSQB", 48c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "RSQB", 49c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "COLON", 50c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "COMMA", 51c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "SEMI", 52c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "PLUS", 53c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "MINUS", 54c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "STAR", 55c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "SLASH", 56c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "VBAR", 57c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "AMPER", 58c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "LESS", 59c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "GREATER", 60c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "EQUAL", 61c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "DOT", 62c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "PERCENT", 63c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "BACKQUOTE", 64c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "LBRACE", 65c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "RBRACE", 66c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "EQEQUAL", 67c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "NOTEQUAL", 68c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "LESSEQUAL", 69c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "GREATEREQUAL", 70c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "TILDE", 71c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "CIRCUMFLEX", 72c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "LEFTSHIFT", 73c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "RIGHTSHIFT", 74c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "DOUBLESTAR", 75c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "PLUSEQUAL", 76c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "MINEQUAL", 77c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "STAREQUAL", 78c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "SLASHEQUAL", 79c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "PERCENTEQUAL", 80c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "AMPEREQUAL", 81c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "VBAREQUAL", 82c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "CIRCUMFLEXEQUAL", 83c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "LEFTSHIFTEQUAL", 84c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "RIGHTSHIFTEQUAL", 85c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "DOUBLESTAREQUAL", 86c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "DOUBLESLASH", 87c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "DOUBLESLASHEQUAL", 88c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "AT", 89c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* This table must match the #defines in token.h! */ 90c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "OP", 91c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "<ERRORTOKEN>", 92c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "<N_TOKENS>" 93c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}; 94c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 95c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Create and initialize a new tok_state structure */ 96c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 97c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic struct tok_state * 98c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltok_new(void) 99c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 100c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel struct tok_state *tok = (struct tok_state *)PyMem_MALLOC( 101c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel sizeof(struct tok_state)); 102c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok == NULL) 103c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; 104c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; 105c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_OK; 106c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->fp = NULL; 107c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->input = NULL; 108c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->tabsize = TABSIZE; 109c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->indent = 0; 110c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->indstack[0] = 0; 111c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->atbol = 1; 112c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->pendin = 0; 113c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->prompt = tok->nextprompt = NULL; 114c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->lineno = 0; 115c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->level = 0; 116c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->filename = NULL; 117c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->altwarning = 0; 118c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->alterror = 0; 119c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->alttabsize = 1; 120c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->altindstack[0] = 0; 121c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_state = 0; 122c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_erred = 0; 123c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->read_coding_spec = 0; 124c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->encoding = NULL; 125c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cont_line = 0; 126c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef PGEN 127c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_readline = NULL; 128c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_buffer = NULL; 129c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 130c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return tok; 131c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 132c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 133c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char * 134c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielnew_string(const char *s, Py_ssize_t len) 135c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 136c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char* result = (char *)PyMem_MALLOC(len + 1); 137c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (result != NULL) { 138c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel memcpy(result, s, len); 139c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel result[len] = '\0'; 140c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 141c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return result; 142c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 143c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 144c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef PGEN 145c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 146c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char * 147c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldecoding_fgets(char *s, int size, struct tok_state *tok) 148c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 149c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return fgets(s, size, tok->fp); 150c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 151c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 152c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 153c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldecoding_feof(struct tok_state *tok) 154c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 155c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return feof(tok->fp); 156c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 157c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 158c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char * 159c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldecode_str(const char *str, int exec_input, struct tok_state *tok) 160c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 161c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return new_string(str, strlen(str)); 162c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 163c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 164c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#else /* PGEN */ 165c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 166c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char * 167c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielerror_ret(struct tok_state *tok) /* XXX */ 168c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 169c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_erred = 1; 170c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ 171c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(tok->buf); 172c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->buf = NULL; 173c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; /* as if it were EOF */ 174c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 175c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 176c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 177c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char * 178c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielget_normal_name(char *s) /* for utf-8 and latin-1 */ 179c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 180c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char buf[13]; 181c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int i; 182c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (i = 0; i < 12; i++) { 183c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int c = s[i]; 184c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '\0') 185c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 186c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (c == '_') 187c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel buf[i] = '-'; 188c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else 189c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel buf[i] = tolower(c); 190c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 191c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel buf[i] = '\0'; 192c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (strcmp(buf, "utf-8") == 0 || 193c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strncmp(buf, "utf-8-", 6) == 0) 194c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return "utf-8"; 195c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (strcmp(buf, "latin-1") == 0 || 196c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strcmp(buf, "iso-8859-1") == 0 || 197c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strcmp(buf, "iso-latin-1") == 0 || 198c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strncmp(buf, "latin-1-", 8) == 0 || 199c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strncmp(buf, "iso-8859-1-", 11) == 0 || 200c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strncmp(buf, "iso-latin-1-", 12) == 0) 201c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return "iso-8859-1"; 202c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else 203c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return s; 204c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 205c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 206c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Return the coding spec in S, or NULL if none is found. */ 207c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 208c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char * 209c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielget_coding_spec(const char *s, Py_ssize_t size) 210c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 211c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_ssize_t i; 212c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Coding spec must be in a comment, and that comment must be 213c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel * the only statement on the source code line. */ 214c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (i = 0; i < size - 6; i++) { 215c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (s[i] == '#') 216c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 217c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014') 218c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; 219c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 220c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (; i < size - 6; i++) { /* XXX inefficient search */ 221c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel const char* t = s + i; 222c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (strncmp(t, "coding", 6) == 0) { 223c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel const char* begin = NULL; 224c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel t += 6; 225c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (t[0] != ':' && t[0] != '=') 226c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel continue; 227c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel do { 228c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel t++; 229c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } while (t[0] == '\x20' || t[0] == '\t'); 230c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 231c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel begin = t; 232c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel while (Py_ISALNUM(t[0]) || 233c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel t[0] == '-' || t[0] == '_' || t[0] == '.') 234c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel t++; 235c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 236c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (begin < t) { 237c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char* r = new_string(begin, t - begin); 238c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char* q = get_normal_name(r); 239c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (r != q) { 240c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(r); 241c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel r = new_string(q, strlen(q)); 242c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 243c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return r; 244c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 245c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 246c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 247c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; 248c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 249c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 250c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Check whether the line contains a coding spec. If it does, 251c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel invoke the set_readline function for the new encoding. 252c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel This function receives the tok_state and the new encoding. 253c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Return 1 on success, 0 on failure. */ 254c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 255c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 256c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielcheck_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, 257c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int set_readline(struct tok_state *, const char *)) 258c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 259c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char * cs; 260c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int r = 1; 261c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 262c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->cont_line) { 263c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* It's a continuation line, so it can't be a coding spec. */ 264c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->read_coding_spec = 1; 265c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 1; 266c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 267c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel cs = get_coding_spec(line, size); 268c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!cs) { 269c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_ssize_t i; 270c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (i = 0; i < size; i++) { 271c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (line[i] == '#' || line[i] == '\n' || line[i] == '\r') 272c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 273c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (line[i] != ' ' && line[i] != '\t' && line[i] != '\014') { 274c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Stop checking coding spec after a line containing 275c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel * anything except a comment. */ 276c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->read_coding_spec = 1; 277c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 278c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 279c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 280c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else { 281c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->read_coding_spec = 1; 282c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->encoding == NULL) { 283c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel assert(tok->decoding_state == 1); /* raw */ 284c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (strcmp(cs, "utf-8") == 0 || 285c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strcmp(cs, "iso-8859-1") == 0) { 286c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->encoding = cs; 287c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else { 288c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_USING_UNICODE 289c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel r = set_readline(tok, cs); 290c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (r) { 291c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->encoding = cs; 292c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_state = -1; 293c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 294c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else { 295c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyErr_Format(PyExc_SyntaxError, 296c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "encoding problem: %s", cs); 297c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(cs); 298c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 299c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#else 300c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Without Unicode support, we cannot 301c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel process the coding spec. Since there 302c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel won't be any Unicode literals, that 303c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel won't matter. */ 304c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(cs); 305c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 306c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 307c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else { /* then, compare cs with BOM */ 308c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel r = (strcmp(tok->encoding, cs) == 0); 309c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!r) 310c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyErr_Format(PyExc_SyntaxError, 311c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "encoding problem: %s with BOM", cs); 312c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(cs); 313c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 314c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 315c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return r; 316c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 317c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 318c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* See whether the file starts with a BOM. If it does, 319c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel invoke the set_readline function with the new encoding. 320c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Return 1 on success, 0 on failure. */ 321c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 322c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 323c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielcheck_bom(int get_char(struct tok_state *), 324c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel void unget_char(int, struct tok_state *), 325c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int set_readline(struct tok_state *, const char *), 326c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel struct tok_state *tok) 327c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 328c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int ch1, ch2, ch3; 329c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ch1 = get_char(tok); 330c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_state = 1; 331c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (ch1 == EOF) { 332c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 1; 333c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else if (ch1 == 0xEF) { 334c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ch2 = get_char(tok); 335c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (ch2 != 0xBB) { 336c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel unget_char(ch2, tok); 337c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel unget_char(ch1, tok); 338c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 1; 339c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 340c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ch3 = get_char(tok); 341c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (ch3 != 0xBF) { 342c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel unget_char(ch3, tok); 343c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel unget_char(ch2, tok); 344c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel unget_char(ch1, tok); 345c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 1; 346c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 347c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#if 0 348c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Disable support for UTF-16 BOMs until a decision 349c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel is made whether this needs to be supported. */ 350c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else if (ch1 == 0xFE) { 351c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ch2 = get_char(tok); 352c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (ch2 != 0xFF) { 353c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel unget_char(ch2, tok); 354c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel unget_char(ch1, tok); 355c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 1; 356c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 357c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!set_readline(tok, "utf-16-be")) 358c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 0; 359c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_state = -1; 360c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else if (ch1 == 0xFF) { 361c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ch2 = get_char(tok); 362c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (ch2 != 0xFE) { 363c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel unget_char(ch2, tok); 364c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel unget_char(ch1, tok); 365c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 1; 366c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 367c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!set_readline(tok, "utf-16-le")) 368c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 0; 369c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_state = -1; 370c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 371c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else { 372c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel unget_char(ch1, tok); 373c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 1; 374c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 375c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->encoding != NULL) 376c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(tok->encoding); 377c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */ 378c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 1; 379c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 380c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 381c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Read a line of text from TOK into S, using the stream in TOK. 382c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Return NULL on failure, else S. 383c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 384c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel On entry, tok->decoding_buffer will be one of: 385c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1) NULL: need to call tok->decoding_readline to get a new line 386c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and 387c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel stored the result in tok->decoding_buffer 388c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 3) PyStringObject *: previous call to fp_readl did not have enough room 389c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel (in the s buffer) to copy entire contents of the line read 390c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel by tok->decoding_readline. tok->decoding_buffer has the overflow. 391c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel In this case, fp_readl is called in a loop (with an expanded buffer) 392c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel until the buffer ends with a '\n' (or until the end of the file is 393c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel reached): see tok_nextc and its calls to decoding_fgets. 394c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel*/ 395c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 396c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char * 397c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielfp_readl(char *s, int size, struct tok_state *tok) 398c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 399c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef Py_USING_UNICODE 400c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* In a non-Unicode built, this should never be called. */ 401c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_FatalError("fp_readl should not be called in this build."); 402c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; /* Keep compiler happy (not reachable) */ 403c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#else 404c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyObject* utf8 = NULL; 405c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyObject* buf = tok->decoding_buffer; 406c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char *str; 407c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_ssize_t utf8len; 408c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 409c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Ask for one less byte so we can terminate it */ 410c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel assert(size > 0); 411c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel size--; 412c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 413c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (buf == NULL) { 414c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel buf = PyObject_CallObject(tok->decoding_readline, NULL); 415c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (buf == NULL) 416c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return error_ret(tok); 417c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!PyUnicode_Check(buf)) { 418c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(buf); 419c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyErr_SetString(PyExc_SyntaxError, 420c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "codec did not return a unicode object"); 421c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return error_ret(tok); 422c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 423c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else { 424c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_buffer = NULL; 425c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (PyString_CheckExact(buf)) 426c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel utf8 = buf; 427c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 428c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (utf8 == NULL) { 429c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel utf8 = PyUnicode_AsUTF8String(buf); 430c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(buf); 431c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (utf8 == NULL) 432c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return error_ret(tok); 433c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 434c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel str = PyString_AsString(utf8); 435c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel utf8len = PyString_GET_SIZE(utf8); 436c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (utf8len > size) { 437c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size); 438c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->decoding_buffer == NULL) { 439c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(utf8); 440c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return error_ret(tok); 441c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 442c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel utf8len = size; 443c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 444c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel memcpy(s, str, utf8len); 445c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel s[utf8len] = '\0'; 446c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(utf8); 447c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (utf8len == 0) 448c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; /* EOF */ 449c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return s; 450c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 451c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 452c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 453c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Set the readline function for TOK to a StreamReader's 454c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel readline function. The StreamReader is named ENC. 455c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 456c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel This function is called from check_bom and check_coding_spec. 457c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 458c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ENC is usually identical to the future value of tok->encoding, 459c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel except for the (currently unsupported) case of UTF-16. 460c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 461c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Return 1 on success, 0 on failure. */ 462c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 463c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 464c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielfp_setreadl(struct tok_state *tok, const char* enc) 465c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 466c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyObject *reader, *stream, *readline; 467c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 468c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* XXX: constify filename argument. */ 469c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL); 470c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (stream == NULL) 471c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 0; 472c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 473c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel reader = PyCodec_StreamReader(enc, stream, NULL); 474c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(stream); 475c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (reader == NULL) 476c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 0; 477c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 478c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel readline = PyObject_GetAttrString(reader, "readline"); 479c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(reader); 480c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (readline == NULL) 481c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 0; 482c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 483c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_readline = readline; 484c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 1; 485c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 486c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 487c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Fetch the next byte from TOK. */ 488c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 489c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int fp_getc(struct tok_state *tok) { 490c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return getc(tok->fp); 491c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 492c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 493c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Unfetch the last byte back into TOK. */ 494c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 495c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void fp_ungetc(int c, struct tok_state *tok) { 496c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ungetc(c, tok->fp); 497c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 498c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 499c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Read a line of input from TOK. Determine encoding 500c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if necessary. */ 501c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 502c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char * 503c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldecoding_fgets(char *s, int size, struct tok_state *tok) 504c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 505c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char *line = NULL; 506c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int badchar = 0; 507c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (;;) { 508c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->decoding_state < 0) { 509c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* We already have a codec associated with 510c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel this input. */ 511c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel line = fp_readl(s, size, tok); 512c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 513c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else if (tok->decoding_state > 0) { 514c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* We want a 'raw' read. */ 515c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel line = Py_UniversalNewlineFgets(s, size, 516c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->fp, NULL); 517c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 518c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else { 519c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* We have not yet determined the encoding. 520c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel If an encoding is found, use the file-pointer 521c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel reader functions from now on. */ 522c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) 523c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return error_ret(tok); 524c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel assert(tok->decoding_state != 0); 525c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 526c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 527c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) { 528c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) { 529c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return error_ret(tok); 530c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 531c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 532c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef PGEN 533c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* The default encoding is ASCII, so make sure we don't have any 534c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel non-ASCII bytes in it. */ 535c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (line && !tok->encoding) { 536c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel unsigned char *c; 537c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (c = (unsigned char *)line; *c; c++) 538c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (*c > 127) { 539c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel badchar = *c; 540c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 541c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 542c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 543c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (badchar) { 544c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char buf[500]; 545c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Need to add 1 to the line number, since this line 546c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel has not been counted, yet. */ 547c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel sprintf(buf, 548c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "Non-ASCII character '\\x%.2x' " 549c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "in file %.200s on line %i, " 550c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "but no encoding declared; " 551c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "see http://python.org/dev/peps/pep-0263/ for details", 552c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel badchar, tok->filename, tok->lineno + 1); 553c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyErr_SetString(PyExc_SyntaxError, buf); 554c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return error_ret(tok); 555c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 556c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 557c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return line; 558c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 559c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 560c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 561c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldecoding_feof(struct tok_state *tok) 562c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 563c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->decoding_state >= 0) { 564c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return feof(tok->fp); 565c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else { 566c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyObject* buf = tok->decoding_buffer; 567c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (buf == NULL) { 568c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel buf = PyObject_CallObject(tok->decoding_readline, NULL); 569c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (buf == NULL) { 570c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel error_ret(tok); 571c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 1; 572c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else { 573c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_buffer = buf; 574c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 575c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 576c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return PyObject_Length(buf) == 0; 577c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 578c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 579c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 580c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Fetch a byte from TOK, using the string buffer. */ 581c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 582c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 583c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielbuf_getc(struct tok_state *tok) { 584c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return Py_CHARMASK(*tok->str++); 585c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 586c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 587c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Unfetch a byte from TOK, using the string buffer. */ 588c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 589c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void 590c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielbuf_ungetc(int c, struct tok_state *tok) { 591c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->str--; 592c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */ 593c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 594c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 595c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Set the readline function for TOK to ENC. For the string-based 596c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tokenizer, this means to just record the encoding. */ 597c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 598c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 599c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielbuf_setreadl(struct tok_state *tok, const char* enc) { 600c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->enc = enc; 601c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 1; 602c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 603c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 604c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Return a UTF-8 encoding Python string object from the 605c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel C byte string STR, which is encoded with ENC. */ 606c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 607c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_USING_UNICODE 608c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic PyObject * 609c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltranslate_into_utf8(const char* str, const char* enc) { 610c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyObject *utf8; 611c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL); 612c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (buf == NULL) 613c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; 614c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel utf8 = PyUnicode_AsUTF8String(buf); 615c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(buf); 616c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return utf8; 617c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 618c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 619c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 620c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 621c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic char * 622c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltranslate_newlines(const char *s, int exec_input, struct tok_state *tok) { 623c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length; 624c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char *buf, *current; 625c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char c = '\0'; 626c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel buf = PyMem_MALLOC(needed_length); 627c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (buf == NULL) { 628c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_NOMEM; 629c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; 630c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 631c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (current = buf; *s; s++, current++) { 632c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = *s; 633c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (skip_next_lf) { 634c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel skip_next_lf = 0; 635c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '\n') { 636c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = *++s; 637c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!c) 638c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 639c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 640c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 641c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '\r') { 642c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel skip_next_lf = 1; 643c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = '\n'; 644c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 645c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *current = c; 646c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 647c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* If this is exec input, add a newline to the end of the string if 648c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel there isn't one already. */ 649c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (exec_input && c != '\n') { 650c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *current = '\n'; 651c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel current++; 652c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 653c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *current = '\0'; 654c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel final_length = current - buf + 1; 655c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (final_length < needed_length && final_length) 656c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* should never fail */ 657c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel buf = PyMem_REALLOC(buf, final_length); 658c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return buf; 659c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 660c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 661c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Decode a byte string STR for use as the buffer of TOK. 662c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Look for encoding declarations inside STR, and record them 663c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel inside TOK. */ 664c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 665c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic const char * 666c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldecode_str(const char *input, int single, struct tok_state *tok) 667c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 668c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyObject* utf8 = NULL; 669c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel const char *str; 670c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel const char *s; 671c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel const char *newl[2] = {NULL, NULL}; 672c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int lineno = 0; 673c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->input = str = translate_newlines(input, single, tok); 674c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (str == NULL) 675c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; 676c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->enc = NULL; 677c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->str = str; 678c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) 679c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return error_ret(tok); 680c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel str = tok->str; /* string after BOM if any */ 681c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel assert(str); 682c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_USING_UNICODE 683c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->enc != NULL) { 684c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel utf8 = translate_into_utf8(str, tok->enc); 685c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (utf8 == NULL) 686c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return error_ret(tok); 687c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel str = PyString_AsString(utf8); 688c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 689c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 690c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (s = str;; s++) { 691c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (*s == '\0') break; 692c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (*s == '\n') { 693c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel assert(lineno < 2); 694c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel newl[lineno] = s; 695c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel lineno++; 696c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (lineno == 2) break; 697c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 698c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 699c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->enc = NULL; 700c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* need to check line 1 and 2 separately since check_coding_spec 701c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel assumes a single line as input */ 702c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (newl[0]) { 703c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) 704c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return error_ret(tok); 705c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->enc == NULL && !tok->read_coding_spec && newl[1]) { 706c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!check_coding_spec(newl[0]+1, newl[1] - newl[0], 707c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok, buf_setreadl)) 708c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return error_ret(tok); 709c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 710c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 711c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_USING_UNICODE 712c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->enc != NULL) { 713c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel assert(utf8 == NULL); 714c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel utf8 = translate_into_utf8(str, tok->enc); 715c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (utf8 == NULL) 716c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return error_ret(tok); 717c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel str = PyString_AsString(utf8); 718c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 719c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 720c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel assert(tok->decoding_buffer == NULL); 721c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->decoding_buffer = utf8; /* CAUTION */ 722c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return str; 723c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 724c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 725c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif /* PGEN */ 726c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 727c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Set up tokenizer for string */ 728c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 729c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstruct tok_state * 730c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyTokenizer_FromString(const char *str, int exec_input) 731c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 732c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel struct tok_state *tok = tok_new(); 733c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok == NULL) 734c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; 735c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel str = (char *)decode_str(str, exec_input, tok); 736c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (str == NULL) { 737c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyTokenizer_Free(tok); 738c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; 739c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 740c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 741c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* XXX: constify members. */ 742c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->buf = tok->cur = tok->end = tok->inp = (char*)str; 743c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return tok; 744c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 745c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 746c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 747c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Set up tokenizer for file */ 748c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 749c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstruct tok_state * 750c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2) 751c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 752c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel struct tok_state *tok = tok_new(); 753c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok == NULL) 754c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; 755c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) { 756c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyTokenizer_Free(tok); 757c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; 758c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 759c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cur = tok->inp = tok->buf; 760c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->end = tok->buf + BUFSIZ; 761c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->fp = fp; 762c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->prompt = ps1; 763c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->nextprompt = ps2; 764c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return tok; 765c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 766c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 767c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 768c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Free a tok_state structure */ 769c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 770c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielvoid 771c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyTokenizer_Free(struct tok_state *tok) 772c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 773c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->encoding != NULL) 774c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(tok->encoding); 775c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef PGEN 776c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_XDECREF(tok->decoding_readline); 777c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_XDECREF(tok->decoding_buffer); 778c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 779c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->fp != NULL && tok->buf != NULL) 780c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(tok->buf); 781c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->input) 782c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE((char *)tok->input); 783c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(tok); 784c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 785c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 786c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#if !defined(PGEN) && defined(Py_USING_UNICODE) 787c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 788c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltok_stdin_decode(struct tok_state *tok, char **inp) 789c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 790c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyObject *enc, *sysstdin, *decoded, *utf8; 791c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel const char *encoding; 792c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char *converted; 793c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 794c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (PySys_GetFile((char *)"stdin", NULL) != stdin) 795c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 0; 796c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel sysstdin = PySys_GetObject("stdin"); 797c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (sysstdin == NULL || !PyFile_Check(sysstdin)) 798c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 0; 799c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 800c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel enc = ((PyFileObject *)sysstdin)->f_encoding; 801c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (enc == NULL || !PyString_Check(enc)) 802c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 0; 803c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_INCREF(enc); 804c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 805c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel encoding = PyString_AsString(enc); 806c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL); 807c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (decoded == NULL) 808c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto error_clear; 809c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 810c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL); 811c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(decoded); 812c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (utf8 == NULL) 813c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto error_clear; 814c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 815c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel assert(PyString_Check(utf8)); 816c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel converted = new_string(PyString_AS_STRING(utf8), 817c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyString_GET_SIZE(utf8)); 818c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(utf8); 819c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (converted == NULL) 820c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto error_nomem; 821c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 822c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(*inp); 823c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *inp = converted; 824c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->encoding != NULL) 825c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(tok->encoding); 826c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->encoding = new_string(encoding, strlen(encoding)); 827c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->encoding == NULL) 828c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto error_nomem; 829c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 830c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(enc); 831c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 0; 832c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 833c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielerror_nomem: 834c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(enc); 835c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_NOMEM; 836c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return -1; 837c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 838c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielerror_clear: 839c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(enc); 840c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { 841c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_ERROR; 842c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return -1; 843c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 844c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Fallback to iso-8859-1: for backward compatibility */ 845c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyErr_Clear(); 846c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 0; 847c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 848c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 849c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 850c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Get next char, updating state; error code goes into tok->done */ 851c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 852c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 853c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltok_nextc(register struct tok_state *tok) 854c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 855c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (;;) { 856c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->cur != tok->inp) { 857c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return Py_CHARMASK(*tok->cur++); /* Fast path */ 858c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 859c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->done != E_OK) 860c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return EOF; 861c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->fp == NULL) { 862c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char *end = strchr(tok->inp, '\n'); 863c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (end != NULL) 864c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel end++; 865c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else { 866c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel end = strchr(tok->inp, '\0'); 867c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (end == tok->inp) { 868c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_EOF; 869c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return EOF; 870c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 871c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 872c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->start == NULL) 873c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->buf = tok->cur; 874c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->line_start = tok->cur; 875c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->lineno++; 876c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->inp = end; 877c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return Py_CHARMASK(*tok->cur++); 878c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 879c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->prompt != NULL) { 880c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char *newtok = PyOS_Readline(stdin, stdout, tok->prompt); 881c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->nextprompt != NULL) 882c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->prompt = tok->nextprompt; 883c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (newtok == NULL) 884c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_INTR; 885c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (*newtok == '\0') { 886c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(newtok); 887c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_EOF; 888c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 889c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#if !defined(PGEN) && defined(Py_USING_UNICODE) 890c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (tok_stdin_decode(tok, &newtok) != 0) 891c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(newtok); 892c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 893c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (tok->start != NULL) { 894c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel size_t start = tok->start - tok->buf; 895c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel size_t oldlen = tok->cur - tok->buf; 896c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel size_t newlen = oldlen + strlen(newtok); 897c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char *buf = tok->buf; 898c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel buf = (char *)PyMem_REALLOC(buf, newlen+1); 899c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->lineno++; 900c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (buf == NULL) { 901c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(tok->buf); 902c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->buf = NULL; 903c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(newtok); 904c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_NOMEM; 905c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return EOF; 906c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 907c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->buf = buf; 908c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cur = tok->buf + oldlen; 909c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->line_start = tok->cur; 910c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strcpy(tok->buf + oldlen, newtok); 911c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(newtok); 912c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->inp = tok->buf + newlen; 913c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->end = tok->inp + 1; 914c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->start = tok->buf + start; 915c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 916c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else { 917c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->lineno++; 918c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->buf != NULL) 919c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(tok->buf); 920c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->buf = newtok; 921c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->line_start = tok->buf; 922c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cur = tok->buf; 923c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->line_start = tok->buf; 924c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->inp = strchr(tok->buf, '\0'); 925c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->end = tok->inp + 1; 926c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 927c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 928c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else { 929c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int done = 0; 930c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_ssize_t cur = 0; 931c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char *pt; 932c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->start == NULL) { 933c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->buf == NULL) { 934c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->buf = (char *) 935c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_MALLOC(BUFSIZ); 936c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->buf == NULL) { 937c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_NOMEM; 938c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return EOF; 939c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 940c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->end = tok->buf + BUFSIZ; 941c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 942c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf), 943c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok) == NULL) { 944c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_EOF; 945c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel done = 1; 946c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 947c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else { 948c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_OK; 949c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->inp = strchr(tok->buf, '\0'); 950c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel done = tok->inp[-1] == '\n'; 951c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 952c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 953c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else { 954c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel cur = tok->cur - tok->buf; 955c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (decoding_feof(tok)) { 956c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_EOF; 957c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel done = 1; 958c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 959c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else 960c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_OK; 961c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 962c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->lineno++; 963c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Read until '\n' or EOF */ 964c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel while (!done) { 965c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_ssize_t curstart = tok->start == NULL ? -1 : 966c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->start - tok->buf; 967c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_ssize_t curvalid = tok->inp - tok->buf; 968c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_ssize_t newsize = curvalid + BUFSIZ; 969c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char *newbuf = tok->buf; 970c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel newbuf = (char *)PyMem_REALLOC(newbuf, 971c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel newsize); 972c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (newbuf == NULL) { 973c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_NOMEM; 974c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cur = tok->inp; 975c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return EOF; 976c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 977c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->buf = newbuf; 978c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->inp = tok->buf + curvalid; 979c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->end = tok->buf + newsize; 980c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->start = curstart < 0 ? NULL : 981c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->buf + curstart; 982c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (decoding_fgets(tok->inp, 983c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel (int)(tok->end - tok->inp), 984c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok) == NULL) { 985c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Break out early on decoding 986c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel errors, as tok->buf will be NULL 987c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel */ 988c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->decoding_erred) 989c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return EOF; 990c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Last line does not end in \n, 991c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel fake one */ 992c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strcpy(tok->inp, "\n"); 993c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 994c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->inp = strchr(tok->inp, '\0'); 995c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel done = tok->inp[-1] == '\n'; 996c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 997c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->buf != NULL) { 998c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cur = tok->buf + cur; 999c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->line_start = tok->cur; 1000c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* replace "\r\n" with "\n" */ 1001c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* For Mac leave the \r, giving a syntax error */ 1002c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel pt = tok->inp - 2; 1003c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (pt >= tok->buf && *pt == '\r') { 1004c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *pt++ = '\n'; 1005c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *pt = '\0'; 1006c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->inp = pt; 1007c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1008c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1009c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1010c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->done != E_OK) { 1011c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->prompt != NULL) 1012c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PySys_WriteStderr("\n"); 1013c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cur = tok->inp; 1014c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return EOF; 1015c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1016c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1017c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /*NOTREACHED*/ 1018c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 1019c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1020c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1021c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Back-up one character */ 1022c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1023c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void 1024c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltok_backup(register struct tok_state *tok, register int c) 1025c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 1026c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c != EOF) { 1027c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (--tok->cur < tok->buf) 1028c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_FatalError("tok_backup: beginning of buffer"); 1029c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (*tok->cur != c) 1030c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *tok->cur = c; 1031c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1032c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 1033c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1034c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1035c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Return the token corresponding to a single character */ 1036c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1037c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielint 1038c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyToken_OneChar(int c) 1039c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 1040c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c) { 1041c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '(': return LPAR; 1042c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case ')': return RPAR; 1043c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '[': return LSQB; 1044c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case ']': return RSQB; 1045c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case ':': return COLON; 1046c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case ',': return COMMA; 1047c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case ';': return SEMI; 1048c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '+': return PLUS; 1049c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '-': return MINUS; 1050c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '*': return STAR; 1051c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '/': return SLASH; 1052c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '|': return VBAR; 1053c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '&': return AMPER; 1054c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '<': return LESS; 1055c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '>': return GREATER; 1056c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': return EQUAL; 1057c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '.': return DOT; 1058c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '%': return PERCENT; 1059c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '`': return BACKQUOTE; 1060c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '{': return LBRACE; 1061c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '}': return RBRACE; 1062c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '^': return CIRCUMFLEX; 1063c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '~': return TILDE; 1064c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '@': return AT; 1065c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel default: return OP; 1066c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1067c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 1068c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1069c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1070c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielint 1071c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyToken_TwoChars(int c1, int c2) 1072c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 1073c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c1) { 1074c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': 1075c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1076c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': return EQEQUAL; 1077c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1078c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1079c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '!': 1080c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1081c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': return NOTEQUAL; 1082c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1083c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1084c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '<': 1085c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1086c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '>': return NOTEQUAL; 1087c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': return LESSEQUAL; 1088c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '<': return LEFTSHIFT; 1089c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1090c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1091c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '>': 1092c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1093c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': return GREATEREQUAL; 1094c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '>': return RIGHTSHIFT; 1095c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1096c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1097c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '+': 1098c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1099c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': return PLUSEQUAL; 1100c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1101c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1102c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '-': 1103c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1104c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': return MINEQUAL; 1105c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1106c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1107c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '*': 1108c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1109c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '*': return DOUBLESTAR; 1110c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': return STAREQUAL; 1111c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1112c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1113c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '/': 1114c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1115c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '/': return DOUBLESLASH; 1116c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': return SLASHEQUAL; 1117c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1118c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1119c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '|': 1120c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1121c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': return VBAREQUAL; 1122c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1123c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1124c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '%': 1125c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1126c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': return PERCENTEQUAL; 1127c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1128c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1129c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '&': 1130c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1131c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': return AMPEREQUAL; 1132c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1133c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1134c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '^': 1135c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1136c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': return CIRCUMFLEXEQUAL; 1137c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1138c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1139c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1140c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return OP; 1141c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 1142c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1143c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielint 1144c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyToken_ThreeChars(int c1, int c2, int c3) 1145c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 1146c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c1) { 1147c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '<': 1148c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1149c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '<': 1150c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c3) { 1151c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': 1152c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return LEFTSHIFTEQUAL; 1153c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1154c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1155c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1156c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1157c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '>': 1158c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1159c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '>': 1160c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c3) { 1161c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': 1162c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return RIGHTSHIFTEQUAL; 1163c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1164c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1165c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1166c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1167c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '*': 1168c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1169c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '*': 1170c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c3) { 1171c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': 1172c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return DOUBLESTAREQUAL; 1173c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1174c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1175c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1176c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1177c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '/': 1178c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c2) { 1179c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '/': 1180c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c3) { 1181c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '=': 1182c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return DOUBLESLASHEQUAL; 1183c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1184c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1185c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1186c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1187c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1188c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return OP; 1189c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 1190c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1191c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 1192c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielindenterror(struct tok_state *tok) 1193c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 1194c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->alterror) { 1195c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_TABSPACE; 1196c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cur = tok->inp; 1197c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 1; 1198c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1199c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->altwarning) { 1200c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PySys_WriteStderr("%s: inconsistent use of tabs and spaces " 1201c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "in indentation\n", tok->filename); 1202c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->altwarning = 0; 1203c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1204c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 0; 1205c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 1206c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1207c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Get next token, after space stripping etc. */ 1208c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1209c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 1210c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltok_get(register struct tok_state *tok, char **p_start, char **p_end) 1211c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 1212c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register int c; 1213c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int blankline; 1214c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1215c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_start = *p_end = NULL; 1216c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel nextline: 1217c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->start = NULL; 1218c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel blankline = 0; 1219c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1220c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Get indentation level */ 1221c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->atbol) { 1222c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register int col = 0; 1223c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register int altcol = 0; 1224c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->atbol = 0; 1225c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (;;) { 1226c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1227c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == ' ') 1228c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel col++, altcol++; 1229c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (c == '\t') { 1230c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel col = (col/tok->tabsize + 1) * tok->tabsize; 1231c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel altcol = (altcol/tok->alttabsize + 1) 1232c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel * tok->alttabsize; 1233c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1234c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (c == '\014') /* Control-L (formfeed) */ 1235c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel col = altcol = 0; /* For Emacs users */ 1236c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else 1237c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1238c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1239c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c); 1240c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '#' || c == '\n') { 1241c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Lines with only whitespace and/or comments 1242c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel shouldn't affect the indentation and are 1243c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel not passed to the parser as NEWLINE tokens, 1244c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel except *totally* empty lines in interactive 1245c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel mode, which signal the end of a command group. */ 1246c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (col == 0 && c == '\n' && tok->prompt != NULL) 1247c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel blankline = 0; /* Let it through */ 1248c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else 1249c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel blankline = 1; /* Ignore completely */ 1250c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* We can't jump back right here since we still 1251c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel may need to skip to the end of a comment */ 1252c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1253c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!blankline && tok->level == 0) { 1254c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (col == tok->indstack[tok->indent]) { 1255c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* No change */ 1256c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (altcol != tok->altindstack[tok->indent]) { 1257c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (indenterror(tok)) 1258c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1259c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1260c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1261c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (col > tok->indstack[tok->indent]) { 1262c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Indent -- always one */ 1263c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->indent+1 >= MAXINDENT) { 1264c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_TOODEEP; 1265c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cur = tok->inp; 1266c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1267c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1268c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (altcol <= tok->altindstack[tok->indent]) { 1269c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (indenterror(tok)) 1270c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1271c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1272c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->pendin++; 1273c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->indstack[++tok->indent] = col; 1274c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->altindstack[tok->indent] = altcol; 1275c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1276c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else /* col < tok->indstack[tok->indent] */ { 1277c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Dedent -- any number, must be consistent */ 1278c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel while (tok->indent > 0 && 1279c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel col < tok->indstack[tok->indent]) { 1280c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->pendin--; 1281c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->indent--; 1282c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1283c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (col != tok->indstack[tok->indent]) { 1284c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_DEDENT; 1285c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cur = tok->inp; 1286c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1287c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1288c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (altcol != tok->altindstack[tok->indent]) { 1289c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (indenterror(tok)) 1290c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1291c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1292c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1293c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1294c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1295c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1296c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->start = tok->cur; 1297c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1298c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Return pending indents/dedents */ 1299c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->pendin != 0) { 1300c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->pendin < 0) { 1301c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->pendin++; 1302c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return DEDENT; 1303c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1304c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else { 1305c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->pendin--; 1306c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return INDENT; 1307c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1308c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1309c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1310c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel again: 1311c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->start = NULL; 1312c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Skip spaces */ 1313c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel do { 1314c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1315c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } while (c == ' ' || c == '\t' || c == '\014'); 1316c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1317c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Set start of current token */ 1318c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->start = tok->cur - 1; 1319c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1320c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Skip comment, while looking for tab-setting magic */ 1321c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '#') { 1322c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel static char *tabforms[] = { 1323c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "tab-width:", /* Emacs */ 1324c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ":tabstop=", /* vim, full form */ 1325c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ":ts=", /* vim, abbreviated form */ 1326c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "set tabsize=", /* will vi never die? */ 1327c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* more templates can be added here to support other editors */ 1328c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel }; 1329c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char cbuf[80]; 1330c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char *tp, **cp; 1331c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tp = cbuf; 1332c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel do { 1333c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *tp++ = c = tok_nextc(tok); 1334c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } while (c != EOF && c != '\n' && 1335c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel (size_t)(tp - cbuf + 1) < sizeof(cbuf)); 1336c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *tp = '\0'; 1337c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (cp = tabforms; 1338c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]); 1339c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel cp++) { 1340c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if ((tp = strstr(cbuf, *cp))) { 1341c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int newsize = atoi(tp + strlen(*cp)); 1342c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1343c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (newsize >= 1 && newsize <= 40) { 1344c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->tabsize = newsize; 1345c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (Py_VerboseFlag) 1346c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PySys_WriteStderr( 1347c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "Tab size set to %d\n", 1348c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel newsize); 1349c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1350c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1351c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1352c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel while (c != EOF && c != '\n') 1353c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1354c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1355c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1356c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Check for EOF and errors now */ 1357c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == EOF) { 1358c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN; 1359c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1360c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1361c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Identifier (most frequent token!) */ 1362c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (Py_ISALPHA(c) || c == '_') { 1363c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Process r"", u"" and ur"" */ 1364c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c) { 1365c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case 'b': 1366c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case 'B': 1367c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1368c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == 'r' || c == 'R') 1369c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1370c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '"' || c == '\'') 1371c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto letter_quote; 1372c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1373c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case 'r': 1374c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case 'R': 1375c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1376c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '"' || c == '\'') 1377c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto letter_quote; 1378c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1379c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case 'u': 1380c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case 'U': 1381c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1382c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == 'r' || c == 'R') 1383c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1384c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '"' || c == '\'') 1385c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto letter_quote; 1386c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1387c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1388c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel while (c != EOF && (Py_ISALNUM(c) || c == '_')) { 1389c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1390c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1391c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c); 1392c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_start = tok->start; 1393c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_end = tok->cur; 1394c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NAME; 1395c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1396c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1397c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Newline */ 1398c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '\n') { 1399c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->atbol = 1; 1400c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (blankline || tok->level > 0) 1401c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto nextline; 1402c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_start = tok->start; 1403c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_end = tok->cur - 1; /* Leave '\n' out of the string */ 1404c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cont_line = 0; 1405c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NEWLINE; 1406c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1407c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1408c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Period or number starting with period? */ 1409c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '.') { 1410c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1411c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (isdigit(c)) { 1412c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto fraction; 1413c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1414c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else { 1415c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c); 1416c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_start = tok->start; 1417c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_end = tok->cur; 1418c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return DOT; 1419c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1420c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1421c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1422c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Number */ 1423c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (isdigit(c)) { 1424c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '0') { 1425c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Hex, octal or binary -- maybe. */ 1426c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1427c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '.') 1428c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto fraction; 1429c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef WITHOUT_COMPLEX 1430c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == 'j' || c == 'J') 1431c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto imaginary; 1432c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 1433c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == 'x' || c == 'X') { 1434c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1435c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Hex */ 1436c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1437c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!isxdigit(c)) { 1438c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_TOKEN; 1439c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c); 1440c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1441c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1442c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel do { 1443c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1444c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } while (isxdigit(c)); 1445c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1446c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (c == 'o' || c == 'O') { 1447c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Octal */ 1448c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1449c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c < '0' || c >= '8') { 1450c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_TOKEN; 1451c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c); 1452c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1453c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1454c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel do { 1455c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1456c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } while ('0' <= c && c < '8'); 1457c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1458c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (c == 'b' || c == 'B') { 1459c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Binary */ 1460c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1461c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c != '0' && c != '1') { 1462c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_TOKEN; 1463c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c); 1464c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1465c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1466c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel do { 1467c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1468c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } while (c == '0' || c == '1'); 1469c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1470c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else { 1471c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int found_decimal = 0; 1472c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Octal; c is first char of it */ 1473c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* There's no 'isoctdigit' macro, sigh */ 1474c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel while ('0' <= c && c < '8') { 1475c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1476c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1477c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (isdigit(c)) { 1478c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel found_decimal = 1; 1479c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel do { 1480c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1481c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } while (isdigit(c)); 1482c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1483c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '.') 1484c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto fraction; 1485c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (c == 'e' || c == 'E') 1486c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto exponent; 1487c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef WITHOUT_COMPLEX 1488c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (c == 'j' || c == 'J') 1489c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto imaginary; 1490c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 1491c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (found_decimal) { 1492c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_TOKEN; 1493c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c); 1494c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1495c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1496c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1497c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == 'l' || c == 'L') 1498c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1499c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1500c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else { 1501c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Decimal */ 1502c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel do { 1503c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1504c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } while (isdigit(c)); 1505c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == 'l' || c == 'L') 1506c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1507c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else { 1508c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Accept floating point numbers. */ 1509c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '.') { 1510c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel fraction: 1511c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Fraction */ 1512c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel do { 1513c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1514c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } while (isdigit(c)); 1515c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1516c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == 'e' || c == 'E') { 1517c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int e; 1518c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel exponent: 1519c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel e = c; 1520c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Exponent part */ 1521c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1522c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '+' || c == '-') { 1523c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1524c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!isdigit(c)) { 1525c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_TOKEN; 1526c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c); 1527c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1528c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1529c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else if (!isdigit(c)) { 1530c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c); 1531c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, e); 1532c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_start = tok->start; 1533c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_end = tok->cur; 1534c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NUMBER; 1535c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1536c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel do { 1537c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1538c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } while (isdigit(c)); 1539c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1540c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef WITHOUT_COMPLEX 1541c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == 'j' || c == 'J') 1542c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Imaginary part */ 1543c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel imaginary: 1544c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1545c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 1546c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1547c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1548c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c); 1549c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_start = tok->start; 1550c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_end = tok->cur; 1551c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NUMBER; 1552c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1553c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1554c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel letter_quote: 1555c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* String */ 1556c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '\'' || c == '"') { 1557c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_ssize_t quote2 = tok->cur - tok->start + 1; 1558c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int quote = c; 1559c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int triple = 0; 1560c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int tripcount = 0; 1561c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (;;) { 1562c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1563c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '\n') { 1564c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!triple) { 1565c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_EOLS; 1566c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c); 1567c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1568c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1569c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tripcount = 0; 1570c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cont_line = 1; /* multiline string. */ 1571c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1572c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (c == EOF) { 1573c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (triple) 1574c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_EOFS; 1575c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else 1576c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_EOLS; 1577c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cur = tok->inp; 1578c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1579c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1580c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (c == quote) { 1581c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tripcount++; 1582c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->cur - tok->start == quote2) { 1583c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1584c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == quote) { 1585c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel triple = 1; 1586c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tripcount = 0; 1587c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel continue; 1588c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1589c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c); 1590c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1591c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!triple || tripcount == 3) 1592c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1593c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1594c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (c == '\\') { 1595c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tripcount = 0; 1596c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1597c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == EOF) { 1598c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_EOLS; 1599c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cur = tok->inp; 1600c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1601c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1602c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1603c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else 1604c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tripcount = 0; 1605c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1606c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_start = tok->start; 1607c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_end = tok->cur; 1608c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return STRING; 1609c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1610c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1611c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Line continuation */ 1612c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c == '\\') { 1613c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel c = tok_nextc(tok); 1614c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (c != '\n') { 1615c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_LINECONT; 1616c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cur = tok->inp; 1617c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1618c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1619c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->cont_line = 1; 1620c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel goto again; /* Read next line */ 1621c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1622c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1623c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Check for two-character token */ 1624c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel { 1625c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int c2 = tok_nextc(tok); 1626c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int token = PyToken_TwoChars(c, c2); 1627c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifndef PGEN 1628c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (Py_Py3kWarningFlag && token == NOTEQUAL && c == '<') { 1629c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (PyErr_WarnExplicit(PyExc_DeprecationWarning, 1630c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "<> not supported in 3.x; use !=", 1631c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->filename, tok->lineno, 1632c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel NULL, NULL)) { 1633c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ERRORTOKEN; 1634c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1635c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1636c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 1637c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (token != OP) { 1638c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int c3 = tok_nextc(tok); 1639c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int token3 = PyToken_ThreeChars(c, c2, c3); 1640c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (token3 != OP) { 1641c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel token = token3; 1642c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else { 1643c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c3); 1644c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1645c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_start = tok->start; 1646c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_end = tok->cur; 1647c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return token; 1648c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1649c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok_backup(tok, c2); 1650c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1651c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1652c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Keep track of parentheses nesting level */ 1653c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel switch (c) { 1654c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '(': 1655c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '[': 1656c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '{': 1657c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->level++; 1658c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1659c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case ')': 1660c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case ']': 1661c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel case '}': 1662c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->level--; 1663c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; 1664c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1665c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1666c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Punctuation character */ 1667c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_start = tok->start; 1668c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *p_end = tok->cur; 1669c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return PyToken_OneChar(c); 1670c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 1671c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1672c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielint 1673c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) 1674c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 1675c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int result = tok_get(tok, p_start, p_end); 1676c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->decoding_erred) { 1677c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel result = ERRORTOKEN; 1678c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->done = E_DECODE; 1679c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1680c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return result; 1681c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 1682c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1683c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* This function is only called from parsetok. However, it cannot live 1684c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel there, as it must be empty for PGEN, and we can check for PGEN only 1685c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel in this file. */ 1686c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1687c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#if defined(PGEN) || !defined(Py_USING_UNICODE) 1688c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielchar* 1689c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset) 1690c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 1691c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; 1692c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 1693c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#else 1694c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_USING_UNICODE 1695c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic PyObject * 1696c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldec_utf8(const char *enc, const char *text, size_t len) { 1697c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyObject *ret = NULL; 1698c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace"); 1699c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (unicode_text) { 1700c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace"); 1701c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(unicode_text); 1702c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1703c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!ret) { 1704c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyErr_Clear(); 1705c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1706c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ret; 1707c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 1708c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielchar * 1709c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset) 1710c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 1711c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char *text = NULL; 1712c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (tok->encoding) { 1713c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* convert source to original encondig */ 1714c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len); 1715c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (lineobj != NULL) { 1716c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int linelen = PyString_Size(lineobj); 1717c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel const char *line = PyString_AsString(lineobj); 1718c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel text = PyObject_MALLOC(linelen + 1); 1719c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (text != NULL && line != NULL) { 1720c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (linelen) 1721c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strncpy(text, line, linelen); 1722c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel text[linelen] = '\0'; 1723c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1724c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(lineobj); 1725c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1726c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* adjust error offset */ 1727c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (*offset > 1) { 1728c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyObject *offsetobj = dec_utf8(tok->encoding, 1729c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel tok->buf, *offset-1); 1730c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (offsetobj) { 1731c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *offset = PyString_Size(offsetobj) + 1; 1732c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_DECREF(offsetobj); 1733c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1734c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1735c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1736c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1737c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 1738c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return text; 1739c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1740c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 1741c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif /* defined(Py_USING_UNICODE) */ 1742c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 1743c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1744c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1745c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_DEBUG 1746c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1747c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielvoid 1748c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieltok_dump(int type, char *start, char *end) 1749c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 1750c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("%s", _PyParser_TokenNames[type]); 1751c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (type == NAME || type == NUMBER || type == STRING || type == OP) 1752c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("(%.*s)", (int)(end - start), start); 1753c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 1754c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 1755c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 1756