tokenizer.c revision 76b30d1688a7ba1ff1b01a3eb21bf4890f71d404
1
2/* Tokenizer implementation */
3
4#include "Python.h"
5#include "pgenheaders.h"
6
7#include <ctype.h>
8#include <assert.h>
9
10#include "tokenizer.h"
11#include "errcode.h"
12
13#ifndef PGEN
14#include "unicodeobject.h"
15#include "stringobject.h"
16#include "fileobject.h"
17#include "codecs.h"
18#include "abstract.h"
19#include "pydebug.h"
20#endif /* PGEN */
21
22extern char *PyOS_Readline(FILE *, FILE *, char *);
23/* Return malloc'ed string including trailing \n;
24   empty malloc'ed string for EOF;
25   NULL if interrupted */
26
27/* Don't ever change this -- it would break the portability of Python code */
28#define TABSIZE 8
29
30/* Convert a possibly signed character to a nonnegative int */
31/* XXX This assumes characters are 8 bits wide */
32#ifdef __CHAR_UNSIGNED__
33#define Py_CHARMASK(c)		(c)
34#else
35#define Py_CHARMASK(c)		((c) & 0xff)
36#endif
37
38/* Forward */
39static struct tok_state *tok_new(void);
40static int tok_nextc(struct tok_state *tok);
41static void tok_backup(struct tok_state *tok, int c);
42
43/* Token names */
44
45char *_PyParser_TokenNames[] = {
46	"ENDMARKER",
47	"NAME",
48	"NUMBER",
49	"STRING",
50	"NEWLINE",
51	"INDENT",
52	"DEDENT",
53	"LPAR",
54	"RPAR",
55	"LSQB",
56	"RSQB",
57	"COLON",
58	"COMMA",
59	"SEMI",
60	"PLUS",
61	"MINUS",
62	"STAR",
63	"SLASH",
64	"VBAR",
65	"AMPER",
66	"LESS",
67	"GREATER",
68	"EQUAL",
69	"DOT",
70	"PERCENT",
71	"BACKQUOTE",
72	"LBRACE",
73	"RBRACE",
74	"EQEQUAL",
75	"NOTEQUAL",
76	"LESSEQUAL",
77	"GREATEREQUAL",
78	"TILDE",
79	"CIRCUMFLEX",
80	"LEFTSHIFT",
81	"RIGHTSHIFT",
82	"DOUBLESTAR",
83	"PLUSEQUAL",
84	"MINEQUAL",
85	"STAREQUAL",
86	"SLASHEQUAL",
87	"PERCENTEQUAL",
88	"AMPEREQUAL",
89	"VBAREQUAL",
90	"CIRCUMFLEXEQUAL",
91	"LEFTSHIFTEQUAL",
92	"RIGHTSHIFTEQUAL",
93	"DOUBLESTAREQUAL",
94	"DOUBLESLASH",
95	"DOUBLESLASHEQUAL",
96	"AT",
97	/* This table must match the #defines in token.h! */
98	"OP",
99	"<ERRORTOKEN>",
100	"<N_TOKENS>"
101};
102
103
104/* Create and initialize a new tok_state structure */
105
106static struct tok_state *
107tok_new(void)
108{
109	struct tok_state *tok = (struct tok_state *)PyMem_MALLOC(
110                                                sizeof(struct tok_state));
111	if (tok == NULL)
112		return NULL;
113	tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
114	tok->done = E_OK;
115	tok->fp = NULL;
116	tok->tabsize = TABSIZE;
117	tok->indent = 0;
118	tok->indstack[0] = 0;
119	tok->atbol = 1;
120	tok->pendin = 0;
121	tok->prompt = tok->nextprompt = NULL;
122	tok->lineno = 0;
123	tok->level = 0;
124	tok->filename = NULL;
125	tok->altwarning = 0;
126	tok->alterror = 0;
127	tok->alttabsize = 1;
128	tok->altindstack[0] = 0;
129	tok->decoding_state = 0;
130	tok->decoding_erred = 0;
131	tok->read_coding_spec = 0;
132	tok->encoding = NULL;
133        tok->cont_line = 0;
134#ifndef PGEN
135	tok->decoding_readline = NULL;
136	tok->decoding_buffer = NULL;
137#endif
138	return tok;
139}
140
141#ifdef PGEN
142
143static char *
144decoding_fgets(char *s, int size, struct tok_state *tok)
145{
146	return fgets(s, size, tok->fp);
147}
148
149static int
150decoding_feof(struct tok_state *tok)
151{
152	return feof(tok->fp);
153}
154
155static const char *
156decode_str(const char *str, struct tok_state *tok)
157{
158	return str;
159}
160
161#else /* PGEN */
162
163static char *
164error_ret(struct tok_state *tok) /* XXX */
165{
166	tok->decoding_erred = 1;
167	if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
168		PyMem_FREE(tok->buf);
169	tok->buf = NULL;
170	return NULL;		/* as if it were EOF */
171}
172
173static char *
174new_string(const char *s, Py_ssize_t len)
175{
176	char* result = (char *)PyMem_MALLOC(len + 1);
177	if (result != NULL) {
178		memcpy(result, s, len);
179		result[len] = '\0';
180	}
181	return result;
182}
183
184static char *
185get_normal_name(char *s)	/* for utf-8 and latin-1 */
186{
187	char buf[13];
188	int i;
189	for (i = 0; i < 12; i++) {
190		int c = s[i];
191		if (c == '\0') break;
192		else if (c == '_') buf[i] = '-';
193		else buf[i] = tolower(c);
194	}
195	buf[i] = '\0';
196	if (strcmp(buf, "utf-8") == 0 ||
197	    strncmp(buf, "utf-8-", 6) == 0) return "utf-8";
198	else if (strcmp(buf, "latin-1") == 0 ||
199		 strcmp(buf, "iso-8859-1") == 0 ||
200		 strcmp(buf, "iso-latin-1") == 0 ||
201		 strncmp(buf, "latin-1-", 8) == 0 ||
202		 strncmp(buf, "iso-8859-1-", 11) == 0 ||
203		 strncmp(buf, "iso-latin-1-", 12) == 0) return "iso-8859-1";
204	else return s;
205}
206
207/* Return the coding spec in S, or NULL if none is found.  */
208
209static char *
210get_coding_spec(const char *s, Py_ssize_t size)
211{
212	Py_ssize_t i;
213	/* Coding spec must be in a comment, and that comment must be
214         * the only statement on the source code line. */
215        for (i = 0; i < size - 6; i++) {
216		if (s[i] == '#')
217			break;
218		if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
219			return NULL;
220	}
221	for (; i < size - 6; i++) { /* XXX inefficient search */
222		const char* t = s + i;
223		if (strncmp(t, "coding", 6) == 0) {
224			const char* begin = NULL;
225			t += 6;
226			if (t[0] != ':' && t[0] != '=')
227				continue;
228			do {
229				t++;
230			} while (t[0] == '\x20' || t[0] == '\t');
231
232			begin = t;
233			while (isalnum(Py_CHARMASK(t[0])) ||
234			       t[0] == '-' || t[0] == '_' || t[0] == '.')
235				t++;
236
237			if (begin < t) {
238				char* r = new_string(begin, t - begin);
239				char* q = get_normal_name(r);
240				if (r != q) {
241					PyMem_FREE(r);
242					r = new_string(q, strlen(q));
243				}
244				return r;
245			}
246		}
247	}
248	return NULL;
249}
250
251/* Check whether the line contains a coding spec. If it does,
252   invoke the set_readline function for the new encoding.
253   This function receives the tok_state and the new encoding.
254   Return 1 on success, 0 on failure.  */
255
256static int
257check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
258		  int set_readline(struct tok_state *, const char *))
259{
260	char * cs;
261	int r = 1;
262
263        if (tok->cont_line)
264		/* It's a continuation line, so it can't be a coding spec. */
265		return 1;
266	cs = get_coding_spec(line, size);
267	if (cs != NULL) {
268		tok->read_coding_spec = 1;
269		if (tok->encoding == NULL) {
270			assert(tok->decoding_state == 1); /* raw */
271			if (strcmp(cs, "utf-8") == 0 ||
272			    strcmp(cs, "iso-8859-1") == 0) {
273				tok->encoding = cs;
274			} else {
275#ifdef Py_USING_UNICODE
276				r = set_readline(tok, cs);
277				if (r) {
278					tok->encoding = cs;
279					tok->decoding_state = -1;
280				}
281				else
282					PyMem_FREE(cs);
283#else
284                                /* Without Unicode support, we cannot
285                                   process the coding spec. Since there
286                                   won't be any Unicode literals, that
287                                   won't matter. */
288				PyMem_FREE(cs);
289#endif
290			}
291		} else {	/* then, compare cs with BOM */
292			r = (strcmp(tok->encoding, cs) == 0);
293			PyMem_FREE(cs);
294		}
295	}
296	if (!r) {
297		cs = tok->encoding;
298		if (!cs)
299			cs = "with BOM";
300		PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs);
301	}
302	return r;
303}
304
305/* See whether the file starts with a BOM. If it does,
306   invoke the set_readline function with the new encoding.
307   Return 1 on success, 0 on failure.  */
308
309static int
310check_bom(int get_char(struct tok_state *),
311	  void unget_char(int, struct tok_state *),
312	  int set_readline(struct tok_state *, const char *),
313	  struct tok_state *tok)
314{
315	int ch = get_char(tok);
316	tok->decoding_state = 1;
317	if (ch == EOF) {
318		return 1;
319	} else if (ch == 0xEF) {
320		ch = get_char(tok); if (ch != 0xBB) goto NON_BOM;
321		ch = get_char(tok); if (ch != 0xBF) goto NON_BOM;
322#if 0
323	/* Disable support for UTF-16 BOMs until a decision
324	   is made whether this needs to be supported.  */
325	} else if (ch == 0xFE) {
326		ch = get_char(tok); if (ch != 0xFF) goto NON_BOM;
327		if (!set_readline(tok, "utf-16-be")) return 0;
328		tok->decoding_state = -1;
329	} else if (ch == 0xFF) {
330		ch = get_char(tok); if (ch != 0xFE) goto NON_BOM;
331		if (!set_readline(tok, "utf-16-le")) return 0;
332		tok->decoding_state = -1;
333#endif
334	} else {
335		unget_char(ch, tok);
336		return 1;
337	}
338	if (tok->encoding != NULL)
339		PyMem_FREE(tok->encoding);
340	tok->encoding = new_string("utf-8", 5);	/* resulting is in utf-8 */
341	return 1;
342  NON_BOM:
343	/* any token beginning with '\xEF', '\xFE', '\xFF' is a bad token */
344	unget_char(0xFF, tok);	/* XXX this will cause a syntax error */
345	return 1;
346}
347
348/* Read a line of text from TOK into S, using the stream in TOK.
349   Return NULL on failure, else S.
350
351   On entry, tok->decoding_buffer will be one of:
352     1) NULL: need to call tok->decoding_readline to get a new line
353     2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
354           stored the result in tok->decoding_buffer
355     3) PyStringObject *: previous call to fp_readl did not have enough room
356           (in the s buffer) to copy entire contents of the line read
357           by tok->decoding_readline.  tok->decoding_buffer has the overflow.
358           In this case, fp_readl is called in a loop (with an expanded buffer)
359           until the buffer ends with a '\n' (or until the end of the file is
360           reached): see tok_nextc and its calls to decoding_fgets.
361*/
362
363static char *
364fp_readl(char *s, int size, struct tok_state *tok)
365{
366#ifndef Py_USING_UNICODE
367	/* In a non-Unicode built, this should never be called. */
368	Py_FatalError("fp_readl should not be called in this build.");
369	return NULL; /* Keep compiler happy (not reachable) */
370#else
371	PyObject* utf8 = NULL;
372	PyObject* buf = tok->decoding_buffer;
373	char *str;
374	Py_ssize_t utf8len;
375
376	/* Ask for one less byte so we can terminate it */
377	assert(size > 0);
378	size--;
379
380	if (buf == NULL) {
381		buf = PyObject_CallObject(tok->decoding_readline, NULL);
382		if (buf == NULL)
383			return error_ret(tok);
384	} else {
385		tok->decoding_buffer = NULL;
386		if (PyString_CheckExact(buf))
387			utf8 = buf;
388	}
389	if (utf8 == NULL) {
390		utf8 = PyUnicode_AsUTF8String(buf);
391		Py_DECREF(buf);
392		if (utf8 == NULL)
393			return error_ret(tok);
394	}
395	str = PyString_AsString(utf8);
396	utf8len = PyString_GET_SIZE(utf8);
397	if (utf8len > size) {
398		tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size);
399		if (tok->decoding_buffer == NULL) {
400			Py_DECREF(utf8);
401			return error_ret(tok);
402		}
403		utf8len = size;
404	}
405	memcpy(s, str, utf8len);
406	s[utf8len] = '\0';
407	Py_DECREF(utf8);
408	if (utf8len == 0) return NULL; /* EOF */
409	return s;
410#endif
411}
412
413/* Set the readline function for TOK to a StreamReader's
414   readline function. The StreamReader is named ENC.
415
416   This function is called from check_bom and check_coding_spec.
417
418   ENC is usually identical to the future value of tok->encoding,
419   except for the (currently unsupported) case of UTF-16.
420
421   Return 1 on success, 0 on failure. */
422
423static int
424fp_setreadl(struct tok_state *tok, const char* enc)
425{
426	PyObject *reader, *stream, *readline;
427
428	/* XXX: constify filename argument. */
429	stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL);
430	if (stream == NULL)
431		return 0;
432
433	reader = PyCodec_StreamReader(enc, stream, NULL);
434	Py_DECREF(stream);
435	if (reader == NULL)
436		return 0;
437
438	readline = PyObject_GetAttrString(reader, "readline");
439	Py_DECREF(reader);
440	if (readline == NULL)
441		return 0;
442
443	tok->decoding_readline = readline;
444	return 1;
445}
446
447/* Fetch the next byte from TOK. */
448
449static int fp_getc(struct tok_state *tok) {
450	return getc(tok->fp);
451}
452
453/* Unfetch the last byte back into TOK.  */
454
455static void fp_ungetc(int c, struct tok_state *tok) {
456	ungetc(c, tok->fp);
457}
458
459/* Read a line of input from TOK. Determine encoding
460   if necessary.  */
461
462static char *
463decoding_fgets(char *s, int size, struct tok_state *tok)
464{
465	char *line = NULL;
466	int badchar = 0;
467	for (;;) {
468		if (tok->decoding_state < 0) {
469			/* We already have a codec associated with
470			   this input. */
471			line = fp_readl(s, size, tok);
472			break;
473		} else if (tok->decoding_state > 0) {
474			/* We want a 'raw' read. */
475			line = Py_UniversalNewlineFgets(s, size,
476							tok->fp, NULL);
477			break;
478		} else {
479			/* We have not yet determined the encoding.
480			   If an encoding is found, use the file-pointer
481			   reader functions from now on. */
482			if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
483				return error_ret(tok);
484			assert(tok->decoding_state != 0);
485		}
486	}
487	if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
488		if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {
489			return error_ret(tok);
490		}
491	}
492#ifndef PGEN
493	/* The default encoding is ASCII, so make sure we don't have any
494           non-ASCII bytes in it. */
495	if (line && !tok->encoding) {
496		unsigned char *c;
497		for (c = (unsigned char *)line; *c; c++)
498			if (*c > 127) {
499				badchar = *c;
500				break;
501			}
502	}
503	if (badchar) {
504		char buf[500];
505		/* Need to add 1 to the line number, since this line
506		   has not been counted, yet.  */
507		sprintf(buf,
508			"Non-ASCII character '\\x%.2x' "
509			"in file %.200s on line %i, "
510			"but no encoding declared; "
511			"see http://www.python.org/peps/pep-0263.html for details",
512			badchar, tok->filename, tok->lineno + 1);
513		PyErr_SetString(PyExc_SyntaxError, buf);
514		return error_ret(tok);
515	}
516#endif
517	return line;
518}
519
520static int
521decoding_feof(struct tok_state *tok)
522{
523	if (tok->decoding_state >= 0) {
524		return feof(tok->fp);
525	} else {
526		PyObject* buf = tok->decoding_buffer;
527		if (buf == NULL) {
528			buf = PyObject_CallObject(tok->decoding_readline, NULL);
529			if (buf == NULL) {
530				error_ret(tok);
531				return 1;
532			} else {
533				tok->decoding_buffer = buf;
534			}
535		}
536		return PyObject_Length(buf) == 0;
537	}
538}
539
540/* Fetch a byte from TOK, using the string buffer. */
541
542static int
543buf_getc(struct tok_state *tok) {
544	return Py_CHARMASK(*tok->str++);
545}
546
547/* Unfetch a byte from TOK, using the string buffer. */
548
549static void
550buf_ungetc(int c, struct tok_state *tok) {
551	tok->str--;
552	assert(Py_CHARMASK(*tok->str) == c);	/* tok->cur may point to read-only segment */
553}
554
555/* Set the readline function for TOK to ENC. For the string-based
556   tokenizer, this means to just record the encoding. */
557
558static int
559buf_setreadl(struct tok_state *tok, const char* enc) {
560	tok->enc = enc;
561	return 1;
562}
563
564/* Return a UTF-8 encoding Python string object from the
565   C byte string STR, which is encoded with ENC. */
566
567#ifdef Py_USING_UNICODE
568static PyObject *
569translate_into_utf8(const char* str, const char* enc) {
570	PyObject *utf8;
571	PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);
572	if (buf == NULL)
573		return NULL;
574	utf8 = PyUnicode_AsUTF8String(buf);
575	Py_DECREF(buf);
576	return utf8;
577}
578#endif
579
580/* Decode a byte string STR for use as the buffer of TOK.
581   Look for encoding declarations inside STR, and record them
582   inside TOK.  */
583
584static const char *
585decode_str(const char *str, struct tok_state *tok)
586{
587	PyObject* utf8 = NULL;
588	const char *s;
589	int lineno = 0;
590	tok->enc = NULL;
591	tok->str = str;
592	if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
593		return error_ret(tok);
594	str = tok->str;		/* string after BOM if any */
595	assert(str);
596#ifdef Py_USING_UNICODE
597	if (tok->enc != NULL) {
598		utf8 = translate_into_utf8(str, tok->enc);
599		if (utf8 == NULL)
600			return error_ret(tok);
601		str = PyString_AsString(utf8);
602	}
603#endif
604	for (s = str;; s++) {
605		if (*s == '\0') break;
606		else if (*s == '\n') {
607			lineno++;
608			if (lineno == 2) break;
609		}
610	}
611	tok->enc = NULL;
612	if (!check_coding_spec(str, s - str, tok, buf_setreadl))
613		return error_ret(tok);
614#ifdef Py_USING_UNICODE
615	if (tok->enc != NULL) {
616		assert(utf8 == NULL);
617		utf8 = translate_into_utf8(str, tok->enc);
618		if (utf8 == NULL) {
619			PyErr_Format(PyExc_SyntaxError,
620				"unknown encoding: %s", tok->enc);
621			return error_ret(tok);
622		}
623		str = PyString_AsString(utf8);
624	}
625#endif
626	assert(tok->decoding_buffer == NULL);
627	tok->decoding_buffer = utf8; /* CAUTION */
628	return str;
629}
630
631#endif /* PGEN */
632
633/* Set up tokenizer for string */
634
635struct tok_state *
636PyTokenizer_FromString(const char *str)
637{
638	struct tok_state *tok = tok_new();
639	if (tok == NULL)
640		return NULL;
641	str = (char *)decode_str(str, tok);
642	if (str == NULL) {
643		PyTokenizer_Free(tok);
644		return NULL;
645	}
646
647	/* XXX: constify members. */
648	tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
649	return tok;
650}
651
652
653/* Set up tokenizer for file */
654
655struct tok_state *
656PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2)
657{
658	struct tok_state *tok = tok_new();
659	if (tok == NULL)
660		return NULL;
661	if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) {
662		PyTokenizer_Free(tok);
663		return NULL;
664	}
665	tok->cur = tok->inp = tok->buf;
666	tok->end = tok->buf + BUFSIZ;
667	tok->fp = fp;
668	tok->prompt = ps1;
669	tok->nextprompt = ps2;
670	return tok;
671}
672
673
674/* Free a tok_state structure */
675
676void
677PyTokenizer_Free(struct tok_state *tok)
678{
679	if (tok->encoding != NULL)
680		PyMem_FREE(tok->encoding);
681#ifndef PGEN
682	Py_XDECREF(tok->decoding_readline);
683	Py_XDECREF(tok->decoding_buffer);
684#endif
685	if (tok->fp != NULL && tok->buf != NULL)
686		PyMem_FREE(tok->buf);
687	PyMem_FREE(tok);
688}
689
690#if !defined(PGEN) && defined(Py_USING_UNICODE)
691static int
692tok_stdin_decode(struct tok_state *tok, char **inp)
693{
694	PyObject *enc, *sysstdin, *decoded, *utf8;
695	const char *encoding;
696	char *converted;
697
698	if (PySys_GetFile((char *)"stdin", NULL) != stdin)
699		return 0;
700	sysstdin = PySys_GetObject("stdin");
701	if (sysstdin == NULL || !PyFile_Check(sysstdin))
702		return 0;
703
704	enc = ((PyFileObject *)sysstdin)->f_encoding;
705	if (enc == NULL || !PyString_Check(enc))
706		return 0;
707	Py_INCREF(enc);
708
709	encoding = PyString_AsString(enc);
710	decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL);
711	if (decoded == NULL)
712		goto error_clear;
713
714	utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL);
715	Py_DECREF(decoded);
716	if (utf8 == NULL)
717		goto error_clear;
718
719	assert(PyString_Check(utf8));
720	converted = new_string(PyString_AS_STRING(utf8),
721			       PyString_GET_SIZE(utf8));
722	Py_DECREF(utf8);
723	if (converted == NULL)
724		goto error_nomem;
725
726	PyMem_FREE(*inp);
727	*inp = converted;
728	if (tok->encoding != NULL)
729		PyMem_FREE(tok->encoding);
730	tok->encoding = new_string(encoding, strlen(encoding));
731	if (tok->encoding == NULL)
732		goto error_nomem;
733
734	Py_DECREF(enc);
735	return 0;
736
737error_nomem:
738	Py_DECREF(enc);
739	tok->done = E_NOMEM;
740	return -1;
741
742error_clear:
743	/* Fallback to iso-8859-1: for backward compatibility */
744	Py_DECREF(enc);
745	PyErr_Clear();
746	return 0;
747}
748#endif
749
750/* Get next char, updating state; error code goes into tok->done */
751
752static int
753tok_nextc(register struct tok_state *tok)
754{
755	for (;;) {
756		if (tok->cur != tok->inp) {
757			return Py_CHARMASK(*tok->cur++); /* Fast path */
758		}
759		if (tok->done != E_OK)
760			return EOF;
761		if (tok->fp == NULL) {
762			char *end = strchr(tok->inp, '\n');
763			if (end != NULL)
764				end++;
765			else {
766				end = strchr(tok->inp, '\0');
767				if (end == tok->inp) {
768					tok->done = E_EOF;
769					return EOF;
770				}
771			}
772			if (tok->start == NULL)
773				tok->buf = tok->cur;
774			tok->line_start = tok->cur;
775			tok->lineno++;
776			tok->inp = end;
777			return Py_CHARMASK(*tok->cur++);
778		}
779		if (tok->prompt != NULL) {
780			char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
781			if (tok->nextprompt != NULL)
782				tok->prompt = tok->nextprompt;
783			if (newtok == NULL)
784				tok->done = E_INTR;
785			else if (*newtok == '\0') {
786				PyMem_FREE(newtok);
787				tok->done = E_EOF;
788			}
789#if !defined(PGEN) && defined(Py_USING_UNICODE)
790			else if (tok_stdin_decode(tok, &newtok) != 0)
791				PyMem_FREE(newtok);
792#endif
793			else if (tok->start != NULL) {
794				size_t start = tok->start - tok->buf;
795				size_t oldlen = tok->cur - tok->buf;
796				size_t newlen = oldlen + strlen(newtok);
797				char *buf = tok->buf;
798				buf = (char *)PyMem_REALLOC(buf, newlen+1);
799				tok->lineno++;
800				if (buf == NULL) {
801					PyMem_FREE(tok->buf);
802					tok->buf = NULL;
803					PyMem_FREE(newtok);
804					tok->done = E_NOMEM;
805					return EOF;
806				}
807				tok->buf = buf;
808				tok->cur = tok->buf + oldlen;
809				tok->line_start = tok->cur;
810				strcpy(tok->buf + oldlen, newtok);
811				PyMem_FREE(newtok);
812				tok->inp = tok->buf + newlen;
813				tok->end = tok->inp + 1;
814				tok->start = tok->buf + start;
815			}
816			else {
817				tok->lineno++;
818				if (tok->buf != NULL)
819					PyMem_FREE(tok->buf);
820				tok->buf = newtok;
821				tok->line_start = tok->buf;
822				tok->cur = tok->buf;
823				tok->line_start = tok->buf;
824				tok->inp = strchr(tok->buf, '\0');
825				tok->end = tok->inp + 1;
826			}
827		}
828		else {
829			int done = 0;
830			Py_ssize_t cur = 0;
831			char *pt;
832			if (tok->start == NULL) {
833				if (tok->buf == NULL) {
834					tok->buf = (char *)
835						PyMem_MALLOC(BUFSIZ);
836					if (tok->buf == NULL) {
837						tok->done = E_NOMEM;
838						return EOF;
839					}
840					tok->end = tok->buf + BUFSIZ;
841				}
842				if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
843					  tok) == NULL) {
844					tok->done = E_EOF;
845					done = 1;
846				}
847				else {
848					tok->done = E_OK;
849					tok->inp = strchr(tok->buf, '\0');
850					done = tok->inp[-1] == '\n';
851				}
852			}
853			else {
854				cur = tok->cur - tok->buf;
855				if (decoding_feof(tok)) {
856					tok->done = E_EOF;
857					done = 1;
858				}
859				else
860					tok->done = E_OK;
861			}
862			tok->lineno++;
863			/* Read until '\n' or EOF */
864			while (!done) {
865				Py_ssize_t curstart = tok->start == NULL ? -1 :
866					          tok->start - tok->buf;
867				Py_ssize_t curvalid = tok->inp - tok->buf;
868				Py_ssize_t newsize = curvalid + BUFSIZ;
869				char *newbuf = tok->buf;
870				newbuf = (char *)PyMem_REALLOC(newbuf,
871							       newsize);
872				if (newbuf == NULL) {
873					tok->done = E_NOMEM;
874					tok->cur = tok->inp;
875					return EOF;
876				}
877				tok->buf = newbuf;
878				tok->inp = tok->buf + curvalid;
879				tok->end = tok->buf + newsize;
880				tok->start = curstart < 0 ? NULL :
881					     tok->buf + curstart;
882				if (decoding_fgets(tok->inp,
883					       (int)(tok->end - tok->inp),
884					       tok) == NULL) {
885					/* Break out early on decoding
886					   errors, as tok->buf will be NULL
887					 */
888					if (tok->decoding_erred)
889						return EOF;
890					/* Last line does not end in \n,
891					   fake one */
892					strcpy(tok->inp, "\n");
893				}
894				tok->inp = strchr(tok->inp, '\0');
895				done = tok->inp[-1] == '\n';
896			}
897			if (tok->buf != NULL) {
898				tok->cur = tok->buf + cur;
899				tok->line_start = tok->cur;
900				/* replace "\r\n" with "\n" */
901				/* For Mac leave the \r, giving a syntax error */
902				pt = tok->inp - 2;
903				if (pt >= tok->buf && *pt == '\r') {
904					*pt++ = '\n';
905					*pt = '\0';
906					tok->inp = pt;
907				}
908			}
909		}
910		if (tok->done != E_OK) {
911			if (tok->prompt != NULL)
912				PySys_WriteStderr("\n");
913			tok->cur = tok->inp;
914			return EOF;
915		}
916	}
917	/*NOTREACHED*/
918}
919
920
921/* Back-up one character */
922
923static void
924tok_backup(register struct tok_state *tok, register int c)
925{
926	if (c != EOF) {
927		if (--tok->cur < tok->buf)
928			Py_FatalError("tok_backup: begin of buffer");
929		if (*tok->cur != c)
930			*tok->cur = c;
931	}
932}
933
934
935/* Return the token corresponding to a single character */
936
937int
938PyToken_OneChar(int c)
939{
940	switch (c) {
941	case '(':	return LPAR;
942	case ')':	return RPAR;
943	case '[':	return LSQB;
944	case ']':	return RSQB;
945	case ':':	return COLON;
946	case ',':	return COMMA;
947	case ';':	return SEMI;
948	case '+':	return PLUS;
949	case '-':	return MINUS;
950	case '*':	return STAR;
951	case '/':	return SLASH;
952	case '|':	return VBAR;
953	case '&':	return AMPER;
954	case '<':	return LESS;
955	case '>':	return GREATER;
956	case '=':	return EQUAL;
957	case '.':	return DOT;
958	case '%':	return PERCENT;
959	case '`':	return BACKQUOTE;
960	case '{':	return LBRACE;
961	case '}':	return RBRACE;
962	case '^':	return CIRCUMFLEX;
963	case '~':	return TILDE;
964	case '@':       return AT;
965	default:	return OP;
966	}
967}
968
969
970int
971PyToken_TwoChars(int c1, int c2)
972{
973	switch (c1) {
974	case '=':
975		switch (c2) {
976		case '=':	return EQEQUAL;
977		}
978		break;
979	case '!':
980		switch (c2) {
981		case '=':	return NOTEQUAL;
982		}
983		break;
984	case '<':
985		switch (c2) {
986		case '>':	return NOTEQUAL;
987		case '=':	return LESSEQUAL;
988		case '<':	return LEFTSHIFT;
989		}
990		break;
991	case '>':
992		switch (c2) {
993		case '=':	return GREATEREQUAL;
994		case '>':	return RIGHTSHIFT;
995		}
996		break;
997	case '+':
998		switch (c2) {
999		case '=':	return PLUSEQUAL;
1000		}
1001		break;
1002	case '-':
1003		switch (c2) {
1004		case '=':	return MINEQUAL;
1005		}
1006		break;
1007	case '*':
1008		switch (c2) {
1009		case '*':	return DOUBLESTAR;
1010		case '=':	return STAREQUAL;
1011		}
1012		break;
1013	case '/':
1014		switch (c2) {
1015		case '/':	return DOUBLESLASH;
1016		case '=':	return SLASHEQUAL;
1017		}
1018		break;
1019	case '|':
1020		switch (c2) {
1021		case '=':	return VBAREQUAL;
1022		}
1023		break;
1024	case '%':
1025		switch (c2) {
1026		case '=':	return PERCENTEQUAL;
1027		}
1028		break;
1029	case '&':
1030		switch (c2) {
1031		case '=':	return AMPEREQUAL;
1032		}
1033		break;
1034	case '^':
1035		switch (c2) {
1036		case '=':	return CIRCUMFLEXEQUAL;
1037		}
1038		break;
1039	}
1040	return OP;
1041}
1042
1043int
1044PyToken_ThreeChars(int c1, int c2, int c3)
1045{
1046	switch (c1) {
1047	case '<':
1048		switch (c2) {
1049		case '<':
1050			switch (c3) {
1051			case '=':
1052				return LEFTSHIFTEQUAL;
1053			}
1054			break;
1055		}
1056		break;
1057	case '>':
1058		switch (c2) {
1059		case '>':
1060			switch (c3) {
1061			case '=':
1062				return RIGHTSHIFTEQUAL;
1063			}
1064			break;
1065		}
1066		break;
1067	case '*':
1068		switch (c2) {
1069		case '*':
1070			switch (c3) {
1071			case '=':
1072				return DOUBLESTAREQUAL;
1073			}
1074			break;
1075		}
1076		break;
1077	case '/':
1078		switch (c2) {
1079		case '/':
1080			switch (c3) {
1081			case '=':
1082				return DOUBLESLASHEQUAL;
1083			}
1084			break;
1085		}
1086		break;
1087	}
1088	return OP;
1089}
1090
1091static int
1092indenterror(struct tok_state *tok)
1093{
1094	if (tok->alterror) {
1095		tok->done = E_TABSPACE;
1096		tok->cur = tok->inp;
1097		return 1;
1098	}
1099	if (tok->altwarning) {
1100		PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
1101                                  "in indentation\n", tok->filename);
1102		tok->altwarning = 0;
1103	}
1104	return 0;
1105}
1106
1107
1108/* Get next token, after space stripping etc. */
1109
1110static int
1111tok_get(register struct tok_state *tok, char **p_start, char **p_end)
1112{
1113	register int c;
1114	int blankline;
1115
1116	*p_start = *p_end = NULL;
1117  nextline:
1118	tok->start = NULL;
1119	blankline = 0;
1120
1121	/* Get indentation level */
1122	if (tok->atbol) {
1123		register int col = 0;
1124		register int altcol = 0;
1125		tok->atbol = 0;
1126		for (;;) {
1127			c = tok_nextc(tok);
1128			if (c == ' ')
1129				col++, altcol++;
1130			else if (c == '\t') {
1131				col = (col/tok->tabsize + 1) * tok->tabsize;
1132				altcol = (altcol/tok->alttabsize + 1)
1133					* tok->alttabsize;
1134			}
1135			else if (c == '\014') /* Control-L (formfeed) */
1136				col = altcol = 0; /* For Emacs users */
1137			else
1138				break;
1139		}
1140		tok_backup(tok, c);
1141		if (c == '#' || c == '\n') {
1142			/* Lines with only whitespace and/or comments
1143			   shouldn't affect the indentation and are
1144			   not passed to the parser as NEWLINE tokens,
1145			   except *totally* empty lines in interactive
1146			   mode, which signal the end of a command group. */
1147			if (col == 0 && c == '\n' && tok->prompt != NULL)
1148				blankline = 0; /* Let it through */
1149			else
1150				blankline = 1; /* Ignore completely */
1151			/* We can't jump back right here since we still
1152			   may need to skip to the end of a comment */
1153		}
1154		if (!blankline && tok->level == 0) {
1155			if (col == tok->indstack[tok->indent]) {
1156				/* No change */
1157				if (altcol != tok->altindstack[tok->indent]) {
1158					if (indenterror(tok))
1159						return ERRORTOKEN;
1160				}
1161			}
1162			else if (col > tok->indstack[tok->indent]) {
1163				/* Indent -- always one */
1164				if (tok->indent+1 >= MAXINDENT) {
1165					tok->done = E_TOODEEP;
1166					tok->cur = tok->inp;
1167					return ERRORTOKEN;
1168				}
1169				if (altcol <= tok->altindstack[tok->indent]) {
1170					if (indenterror(tok))
1171						return ERRORTOKEN;
1172				}
1173				tok->pendin++;
1174				tok->indstack[++tok->indent] = col;
1175				tok->altindstack[tok->indent] = altcol;
1176			}
1177			else /* col < tok->indstack[tok->indent] */ {
1178				/* Dedent -- any number, must be consistent */
1179				while (tok->indent > 0 &&
1180					col < tok->indstack[tok->indent]) {
1181					tok->pendin--;
1182					tok->indent--;
1183				}
1184				if (col != tok->indstack[tok->indent]) {
1185					tok->done = E_DEDENT;
1186					tok->cur = tok->inp;
1187					return ERRORTOKEN;
1188				}
1189				if (altcol != tok->altindstack[tok->indent]) {
1190					if (indenterror(tok))
1191						return ERRORTOKEN;
1192				}
1193			}
1194		}
1195	}
1196
1197	tok->start = tok->cur;
1198
1199	/* Return pending indents/dedents */
1200	if (tok->pendin != 0) {
1201		if (tok->pendin < 0) {
1202			tok->pendin++;
1203			return DEDENT;
1204		}
1205		else {
1206			tok->pendin--;
1207			return INDENT;
1208		}
1209	}
1210
1211 again:
1212	tok->start = NULL;
1213	/* Skip spaces */
1214	do {
1215		c = tok_nextc(tok);
1216	} while (c == ' ' || c == '\t' || c == '\014');
1217
1218	/* Set start of current token */
1219	tok->start = tok->cur - 1;
1220
1221	/* Skip comment, while looking for tab-setting magic */
1222	if (c == '#') {
1223		static char *tabforms[] = {
1224			"tab-width:",		/* Emacs */
1225			":tabstop=",		/* vim, full form */
1226			":ts=",			/* vim, abbreviated form */
1227			"set tabsize=",		/* will vi never die? */
1228		/* more templates can be added here to support other editors */
1229		};
1230		char cbuf[80];
1231		char *tp, **cp;
1232		tp = cbuf;
1233		do {
1234			*tp++ = c = tok_nextc(tok);
1235		} while (c != EOF && c != '\n' &&
1236			 (size_t)(tp - cbuf + 1) < sizeof(cbuf));
1237		*tp = '\0';
1238		for (cp = tabforms;
1239		     cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
1240		     cp++) {
1241			if ((tp = strstr(cbuf, *cp))) {
1242				int newsize = atoi(tp + strlen(*cp));
1243
1244				if (newsize >= 1 && newsize <= 40) {
1245					tok->tabsize = newsize;
1246					if (Py_VerboseFlag)
1247					    PySys_WriteStderr(
1248						"Tab size set to %d\n",
1249						newsize);
1250				}
1251			}
1252		}
1253		while (c != EOF && c != '\n')
1254			c = tok_nextc(tok);
1255	}
1256
1257	/* Check for EOF and errors now */
1258	if (c == EOF) {
1259		return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
1260	}
1261
1262	/* Identifier (most frequent token!) */
1263	if (isalpha(c) || c == '_') {
1264		/* Process r"", u"" and ur"" */
1265		switch (c) {
1266		case 'r':
1267		case 'R':
1268			c = tok_nextc(tok);
1269			if (c == '"' || c == '\'')
1270				goto letter_quote;
1271			break;
1272		case 'u':
1273		case 'U':
1274			c = tok_nextc(tok);
1275			if (c == 'r' || c == 'R')
1276				c = tok_nextc(tok);
1277			if (c == '"' || c == '\'')
1278				goto letter_quote;
1279			break;
1280		}
1281		while (isalnum(c) || c == '_') {
1282			c = tok_nextc(tok);
1283		}
1284		tok_backup(tok, c);
1285		*p_start = tok->start;
1286		*p_end = tok->cur;
1287		return NAME;
1288	}
1289
1290	/* Newline */
1291	if (c == '\n') {
1292		tok->atbol = 1;
1293		if (blankline || tok->level > 0)
1294			goto nextline;
1295		*p_start = tok->start;
1296		*p_end = tok->cur - 1; /* Leave '\n' out of the string */
1297                tok->cont_line = 0;
1298		return NEWLINE;
1299	}
1300
1301	/* Period or number starting with period? */
1302	if (c == '.') {
1303		c = tok_nextc(tok);
1304		if (isdigit(c)) {
1305			goto fraction;
1306		}
1307		else {
1308			tok_backup(tok, c);
1309			*p_start = tok->start;
1310			*p_end = tok->cur;
1311			return DOT;
1312		}
1313	}
1314
1315	/* Number */
1316	if (isdigit(c)) {
1317		if (c == '0') {
1318			/* Hex or octal -- maybe. */
1319			c = tok_nextc(tok);
1320			if (c == '.')
1321				goto fraction;
1322#ifndef WITHOUT_COMPLEX
1323			if (c == 'j' || c == 'J')
1324				goto imaginary;
1325#endif
1326			if (c == 'x' || c == 'X') {
1327				/* Hex */
1328				do {
1329					c = tok_nextc(tok);
1330				} while (isxdigit(c));
1331			}
1332			else {
1333				int found_decimal = 0;
1334				/* Octal; c is first char of it */
1335				/* There's no 'isoctdigit' macro, sigh */
1336				while ('0' <= c && c < '8') {
1337					c = tok_nextc(tok);
1338				}
1339				if (isdigit(c)) {
1340					found_decimal = 1;
1341					do {
1342						c = tok_nextc(tok);
1343					} while (isdigit(c));
1344				}
1345				if (c == '.')
1346					goto fraction;
1347				else if (c == 'e' || c == 'E')
1348					goto exponent;
1349#ifndef WITHOUT_COMPLEX
1350				else if (c == 'j' || c == 'J')
1351					goto imaginary;
1352#endif
1353				else if (found_decimal) {
1354					tok->done = E_TOKEN;
1355					tok_backup(tok, c);
1356					return ERRORTOKEN;
1357				}
1358			}
1359			if (c == 'l' || c == 'L')
1360				c = tok_nextc(tok);
1361		}
1362		else {
1363			/* Decimal */
1364			do {
1365				c = tok_nextc(tok);
1366			} while (isdigit(c));
1367			if (c == 'l' || c == 'L')
1368				c = tok_nextc(tok);
1369			else {
1370				/* Accept floating point numbers. */
1371				if (c == '.') {
1372		fraction:
1373					/* Fraction */
1374					do {
1375						c = tok_nextc(tok);
1376					} while (isdigit(c));
1377				}
1378				if (c == 'e' || c == 'E') {
1379		exponent:
1380					/* Exponent part */
1381					c = tok_nextc(tok);
1382					if (c == '+' || c == '-')
1383						c = tok_nextc(tok);
1384					if (!isdigit(c)) {
1385						tok->done = E_TOKEN;
1386						tok_backup(tok, c);
1387						return ERRORTOKEN;
1388					}
1389					do {
1390						c = tok_nextc(tok);
1391					} while (isdigit(c));
1392				}
1393#ifndef WITHOUT_COMPLEX
1394				if (c == 'j' || c == 'J')
1395					/* Imaginary part */
1396		imaginary:
1397					c = tok_nextc(tok);
1398#endif
1399			}
1400		}
1401		tok_backup(tok, c);
1402		*p_start = tok->start;
1403		*p_end = tok->cur;
1404		return NUMBER;
1405	}
1406
1407  letter_quote:
1408	/* String */
1409	if (c == '\'' || c == '"') {
1410		Py_ssize_t quote2 = tok->cur - tok->start + 1;
1411		int quote = c;
1412		int triple = 0;
1413		int tripcount = 0;
1414		for (;;) {
1415			c = tok_nextc(tok);
1416			if (c == '\n') {
1417				if (!triple) {
1418					tok->done = E_EOLS;
1419					tok_backup(tok, c);
1420					return ERRORTOKEN;
1421				}
1422				tripcount = 0;
1423                                tok->cont_line = 1; /* multiline string. */
1424			}
1425			else if (c == EOF) {
1426				if (triple)
1427					tok->done = E_EOFS;
1428				else
1429					tok->done = E_EOLS;
1430				tok->cur = tok->inp;
1431				return ERRORTOKEN;
1432			}
1433			else if (c == quote) {
1434				tripcount++;
1435				if (tok->cur - tok->start == quote2) {
1436					c = tok_nextc(tok);
1437					if (c == quote) {
1438						triple = 1;
1439						tripcount = 0;
1440						continue;
1441					}
1442					tok_backup(tok, c);
1443				}
1444				if (!triple || tripcount == 3)
1445					break;
1446			}
1447			else if (c == '\\') {
1448				tripcount = 0;
1449				c = tok_nextc(tok);
1450				if (c == EOF) {
1451					tok->done = E_EOLS;
1452					tok->cur = tok->inp;
1453					return ERRORTOKEN;
1454				}
1455			}
1456			else
1457				tripcount = 0;
1458		}
1459		*p_start = tok->start;
1460		*p_end = tok->cur;
1461		return STRING;
1462	}
1463
1464	/* Line continuation */
1465	if (c == '\\') {
1466		c = tok_nextc(tok);
1467		if (c != '\n') {
1468			tok->done = E_LINECONT;
1469			tok->cur = tok->inp;
1470			return ERRORTOKEN;
1471		}
1472                tok->cont_line = 1;
1473		goto again; /* Read next line */
1474	}
1475
1476	/* Check for two-character token */
1477	{
1478		int c2 = tok_nextc(tok);
1479		int token = PyToken_TwoChars(c, c2);
1480#ifndef PGEN
1481		if (Py_Py3kWarningFlag && token == NOTEQUAL && c == '<') {
1482			if (PyErr_WarnExplicit(PyExc_DeprecationWarning,
1483					       "<> not supported in 3.x",
1484					       tok->filename, tok->lineno,
1485					       NULL, NULL)) {
1486				return ERRORTOKEN;
1487			}
1488		}
1489#endif
1490		if (token != OP) {
1491			int c3 = tok_nextc(tok);
1492			int token3 = PyToken_ThreeChars(c, c2, c3);
1493			if (token3 != OP) {
1494				token = token3;
1495			} else {
1496				tok_backup(tok, c3);
1497			}
1498			*p_start = tok->start;
1499			*p_end = tok->cur;
1500			return token;
1501		}
1502		tok_backup(tok, c2);
1503	}
1504
1505	/* Keep track of parentheses nesting level */
1506	switch (c) {
1507	case '(':
1508	case '[':
1509	case '{':
1510		tok->level++;
1511		break;
1512	case ')':
1513	case ']':
1514	case '}':
1515		tok->level--;
1516		break;
1517	}
1518
1519	/* Punctuation character */
1520	*p_start = tok->start;
1521	*p_end = tok->cur;
1522	return PyToken_OneChar(c);
1523}
1524
1525int
1526PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
1527{
1528	int result = tok_get(tok, p_start, p_end);
1529	if (tok->decoding_erred) {
1530		result = ERRORTOKEN;
1531		tok->done = E_DECODE;
1532	}
1533	return result;
1534}
1535
1536/* This function is only called from parsetok. However, it cannot live
1537   there, as it must be empty for PGEN, and we can check for PGEN only
1538   in this file. */
1539
1540#ifdef PGEN
1541char*
1542PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
1543{
1544	return NULL;
1545}
1546#else
1547#ifdef Py_USING_UNICODE
1548static PyObject *
1549dec_utf8(const char *enc, const char *text, size_t len) {
1550	PyObject *ret = NULL;
1551	PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
1552	if (unicode_text) {
1553		ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
1554		Py_DECREF(unicode_text);
1555	}
1556	if (!ret) {
1557		PyErr_Clear();
1558	}
1559	return ret;
1560}
1561
1562char *
1563PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
1564{
1565	char *text = NULL;
1566	if (tok->encoding) {
1567		/* convert source to original encondig */
1568		PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
1569		if (lineobj != NULL) {
1570			int linelen = PyString_Size(lineobj);
1571			const char *line = PyString_AsString(lineobj);
1572			text = PyObject_MALLOC(linelen + 1);
1573			if (text != NULL && line != NULL) {
1574				if (linelen)
1575					strncpy(text, line, linelen);
1576				text[linelen] = '\0';
1577			}
1578			Py_DECREF(lineobj);
1579
1580			/* adjust error offset */
1581			if (*offset > 1) {
1582				PyObject *offsetobj = dec_utf8(tok->encoding,
1583							       tok->buf, *offset-1);
1584				if (offsetobj) {
1585					*offset = PyString_Size(offsetobj) + 1;
1586					Py_DECREF(offsetobj);
1587				}
1588			}
1589
1590		}
1591	}
1592	return text;
1593
1594}
1595#endif /* defined(Py_USING_UNICODE) */
1596#endif
1597
1598
1599#ifdef Py_DEBUG
1600
1601void
1602tok_dump(int type, char *start, char *end)
1603{
1604	printf("%s", _PyParser_TokenNames[type]);
1605	if (type == NAME || type == NUMBER || type == STRING || type == OP)
1606		printf("(%.*s)", (int)(end - start), start);
1607}
1608
1609#endif
1610