1doctests = """
2Tests for the tokenize module.
3
4    >>> import glob, random, sys
5
6The tests can be really simple. Given a small fragment of source
7code, print out a table with tokens. The ENDMARK is omitted for
8brevity.
9
10    >>> dump_tokens("1 + 1")
11    NUMBER     '1'           (1, 0) (1, 1)
12    OP         '+'           (1, 2) (1, 3)
13    NUMBER     '1'           (1, 4) (1, 5)
14
15    >>> dump_tokens("if False:\\n"
16    ...             "    # NL\\n"
17    ...             "    True = False # NEWLINE\\n")
18    NAME       'if'          (1, 0) (1, 2)
19    NAME       'False'       (1, 3) (1, 8)
20    OP         ':'           (1, 8) (1, 9)
21    NEWLINE    '\\n'          (1, 9) (1, 10)
22    COMMENT    '# NL'        (2, 4) (2, 8)
23    NL         '\\n'          (2, 8) (2, 9)
24    INDENT     '    '        (3, 0) (3, 4)
25    NAME       'True'        (3, 4) (3, 8)
26    OP         '='           (3, 9) (3, 10)
27    NAME       'False'       (3, 11) (3, 16)
28    COMMENT    '# NEWLINE'   (3, 17) (3, 26)
29    NEWLINE    '\\n'          (3, 26) (3, 27)
30    DEDENT     ''            (4, 0) (4, 0)
31
32    >>> indent_error_file = \"""
33    ... def k(x):
34    ...     x += 2
35    ...   x += 5
36    ... \"""
37
38    >>> for tok in generate_tokens(StringIO(indent_error_file).readline): pass
39    Traceback (most recent call last):
40        ...
41    IndentationError: unindent does not match any outer indentation level
42
43Test roundtrip for `untokenize`. `f` is an open file or a string. The source
44code in f is tokenized, converted back to source code via tokenize.untokenize(),
45and tokenized again from the latter. The test fails if the second tokenization
46doesn't match the first.
47
48    >>> def roundtrip(f):
49    ...     if isinstance(f, str): f = StringIO(f)
50    ...     token_list = list(generate_tokens(f.readline))
51    ...     f.close()
52    ...     tokens1 = [tok[:2] for tok in token_list]
53    ...     new_text = untokenize(tokens1)
54    ...     readline = iter(new_text.splitlines(1)).next
55    ...     tokens2 = [tok[:2] for tok in generate_tokens(readline)]
56    ...     return tokens1 == tokens2
57    ...
58
59There are some standard formatting practices that are easy to get right.
60
61    >>> roundtrip("if x == 1:\\n"
62    ...           "    print x\\n")
63    True
64
65    >>> roundtrip("# This is a comment\\n# This also")
66    True
67
68Some people use different formatting conventions, which makes
69untokenize a little trickier. Note that this test involves trailing
70whitespace after the colon. Note that we use hex escapes to make the
71two trailing blanks apperant in the expected output.
72
73    >>> roundtrip("if x == 1 : \\n"
74    ...           "  print x\\n")
75    True
76
77    >>> f = test_support.findfile("tokenize_tests" + os.extsep + "txt")
78    >>> roundtrip(open(f))
79    True
80
81    >>> roundtrip("if x == 1:\\n"
82    ...           "    # A comment by itself.\\n"
83    ...           "    print x # Comment here, too.\\n"
84    ...           "    # Another comment.\\n"
85    ...           "after_if = True\\n")
86    True
87
88    >>> roundtrip("if (x # The comments need to go in the right place\\n"
89    ...           "    == 1):\\n"
90    ...           "    print 'x==1'\\n")
91    True
92
93    >>> roundtrip("class Test: # A comment here\\n"
94    ...           "  # A comment with weird indent\\n"
95    ...           "  after_com = 5\\n"
96    ...           "  def x(m): return m*5 # a one liner\\n"
97    ...           "  def y(m): # A whitespace after the colon\\n"
98    ...           "     return y*4 # 3-space indent\\n")
99    True
100
101Some error-handling code
102
103    >>> roundtrip("try: import somemodule\\n"
104    ...           "except ImportError: # comment\\n"
105    ...           "    print 'Can not import' # comment2\\n"
106    ...           "else:   print 'Loaded'\\n")
107    True
108
109Balancing continuation
110
111    >>> roundtrip("a = (3,4, \\n"
112    ...           "5,6)\\n"
113    ...           "y = [3, 4,\\n"
114    ...           "5]\\n"
115    ...           "z = {'a': 5,\\n"
116    ...           "'b':15, 'c':True}\\n"
117    ...           "x = len(y) + 5 - a[\\n"
118    ...           "3] - a[2]\\n"
119    ...           "+ len(z) - z[\\n"
120    ...           "'b']\\n")
121    True
122
123Ordinary integers and binary operators
124
125    >>> dump_tokens("0xff <= 255")
126    NUMBER     '0xff'        (1, 0) (1, 4)
127    OP         '<='          (1, 5) (1, 7)
128    NUMBER     '255'         (1, 8) (1, 11)
129    >>> dump_tokens("0b10 <= 255")
130    NUMBER     '0b10'        (1, 0) (1, 4)
131    OP         '<='          (1, 5) (1, 7)
132    NUMBER     '255'         (1, 8) (1, 11)
133    >>> dump_tokens("0o123 <= 0123")
134    NUMBER     '0o123'       (1, 0) (1, 5)
135    OP         '<='          (1, 6) (1, 8)
136    NUMBER     '0123'        (1, 9) (1, 13)
137    >>> dump_tokens("01234567 > ~0x15")
138    NUMBER     '01234567'    (1, 0) (1, 8)
139    OP         '>'           (1, 9) (1, 10)
140    OP         '~'           (1, 11) (1, 12)
141    NUMBER     '0x15'        (1, 12) (1, 16)
142    >>> dump_tokens("2134568 != 01231515")
143    NUMBER     '2134568'     (1, 0) (1, 7)
144    OP         '!='          (1, 8) (1, 10)
145    NUMBER     '01231515'    (1, 11) (1, 19)
146    >>> dump_tokens("(-124561-1) & 0200000000")
147    OP         '('           (1, 0) (1, 1)
148    OP         '-'           (1, 1) (1, 2)
149    NUMBER     '124561'      (1, 2) (1, 8)
150    OP         '-'           (1, 8) (1, 9)
151    NUMBER     '1'           (1, 9) (1, 10)
152    OP         ')'           (1, 10) (1, 11)
153    OP         '&'           (1, 12) (1, 13)
154    NUMBER     '0200000000'  (1, 14) (1, 24)
155    >>> dump_tokens("0xdeadbeef != -1")
156    NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
157    OP         '!='          (1, 11) (1, 13)
158    OP         '-'           (1, 14) (1, 15)
159    NUMBER     '1'           (1, 15) (1, 16)
160    >>> dump_tokens("0xdeadc0de & 012345")
161    NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
162    OP         '&'           (1, 11) (1, 12)
163    NUMBER     '012345'      (1, 13) (1, 19)
164    >>> dump_tokens("0xFF & 0x15 | 1234")
165    NUMBER     '0xFF'        (1, 0) (1, 4)
166    OP         '&'           (1, 5) (1, 6)
167    NUMBER     '0x15'        (1, 7) (1, 11)
168    OP         '|'           (1, 12) (1, 13)
169    NUMBER     '1234'        (1, 14) (1, 18)
170
171Long integers
172
173    >>> dump_tokens("x = 0L")
174    NAME       'x'           (1, 0) (1, 1)
175    OP         '='           (1, 2) (1, 3)
176    NUMBER     '0L'          (1, 4) (1, 6)
177    >>> dump_tokens("x = 0xfffffffffff")
178    NAME       'x'           (1, 0) (1, 1)
179    OP         '='           (1, 2) (1, 3)
180    NUMBER     '0xffffffffff (1, 4) (1, 17)
181    >>> dump_tokens("x = 123141242151251616110l")
182    NAME       'x'           (1, 0) (1, 1)
183    OP         '='           (1, 2) (1, 3)
184    NUMBER     '123141242151 (1, 4) (1, 26)
185    >>> dump_tokens("x = -15921590215012591L")
186    NAME       'x'           (1, 0) (1, 1)
187    OP         '='           (1, 2) (1, 3)
188    OP         '-'           (1, 4) (1, 5)
189    NUMBER     '159215902150 (1, 5) (1, 23)
190
191Floating point numbers
192
193    >>> dump_tokens("x = 3.14159")
194    NAME       'x'           (1, 0) (1, 1)
195    OP         '='           (1, 2) (1, 3)
196    NUMBER     '3.14159'     (1, 4) (1, 11)
197    >>> dump_tokens("x = 314159.")
198    NAME       'x'           (1, 0) (1, 1)
199    OP         '='           (1, 2) (1, 3)
200    NUMBER     '314159.'     (1, 4) (1, 11)
201    >>> dump_tokens("x = .314159")
202    NAME       'x'           (1, 0) (1, 1)
203    OP         '='           (1, 2) (1, 3)
204    NUMBER     '.314159'     (1, 4) (1, 11)
205    >>> dump_tokens("x = 3e14159")
206    NAME       'x'           (1, 0) (1, 1)
207    OP         '='           (1, 2) (1, 3)
208    NUMBER     '3e14159'     (1, 4) (1, 11)
209    >>> dump_tokens("x = 3E123")
210    NAME       'x'           (1, 0) (1, 1)
211    OP         '='           (1, 2) (1, 3)
212    NUMBER     '3E123'       (1, 4) (1, 9)
213    >>> dump_tokens("x+y = 3e-1230")
214    NAME       'x'           (1, 0) (1, 1)
215    OP         '+'           (1, 1) (1, 2)
216    NAME       'y'           (1, 2) (1, 3)
217    OP         '='           (1, 4) (1, 5)
218    NUMBER     '3e-1230'     (1, 6) (1, 13)
219    >>> dump_tokens("x = 3.14e159")
220    NAME       'x'           (1, 0) (1, 1)
221    OP         '='           (1, 2) (1, 3)
222    NUMBER     '3.14e159'    (1, 4) (1, 12)
223
224String literals
225
226    >>> dump_tokens("x = ''; y = \\\"\\\"")
227    NAME       'x'           (1, 0) (1, 1)
228    OP         '='           (1, 2) (1, 3)
229    STRING     "''"          (1, 4) (1, 6)
230    OP         ';'           (1, 6) (1, 7)
231    NAME       'y'           (1, 8) (1, 9)
232    OP         '='           (1, 10) (1, 11)
233    STRING     '""'          (1, 12) (1, 14)
234    >>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")
235    NAME       'x'           (1, 0) (1, 1)
236    OP         '='           (1, 2) (1, 3)
237    STRING     '\\'"\\''       (1, 4) (1, 7)
238    OP         ';'           (1, 7) (1, 8)
239    NAME       'y'           (1, 9) (1, 10)
240    OP         '='           (1, 11) (1, 12)
241    STRING     '"\\'"'        (1, 13) (1, 16)
242    >>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")
243    NAME       'x'           (1, 0) (1, 1)
244    OP         '='           (1, 2) (1, 3)
245    STRING     '"doesn\\'t "' (1, 4) (1, 14)
246    NAME       'shrink'      (1, 14) (1, 20)
247    STRING     '", does it"' (1, 20) (1, 31)
248    >>> dump_tokens("x = u'abc' + U'ABC'")
249    NAME       'x'           (1, 0) (1, 1)
250    OP         '='           (1, 2) (1, 3)
251    STRING     "u'abc'"      (1, 4) (1, 10)
252    OP         '+'           (1, 11) (1, 12)
253    STRING     "U'ABC'"      (1, 13) (1, 19)
254    >>> dump_tokens('y = u"ABC" + U"ABC"')
255    NAME       'y'           (1, 0) (1, 1)
256    OP         '='           (1, 2) (1, 3)
257    STRING     'u"ABC"'      (1, 4) (1, 10)
258    OP         '+'           (1, 11) (1, 12)
259    STRING     'U"ABC"'      (1, 13) (1, 19)
260    >>> dump_tokens("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'")
261    NAME       'x'           (1, 0) (1, 1)
262    OP         '='           (1, 2) (1, 3)
263    STRING     "ur'abc'"     (1, 4) (1, 11)
264    OP         '+'           (1, 12) (1, 13)
265    STRING     "Ur'ABC'"     (1, 14) (1, 21)
266    OP         '+'           (1, 22) (1, 23)
267    STRING     "uR'ABC'"     (1, 24) (1, 31)
268    OP         '+'           (1, 32) (1, 33)
269    STRING     "UR'ABC'"     (1, 34) (1, 41)
270    >>> dump_tokens('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"')
271    NAME       'y'           (1, 0) (1, 1)
272    OP         '='           (1, 2) (1, 3)
273    STRING     'ur"abc"'     (1, 4) (1, 11)
274    OP         '+'           (1, 12) (1, 13)
275    STRING     'Ur"ABC"'     (1, 14) (1, 21)
276    OP         '+'           (1, 22) (1, 23)
277    STRING     'uR"ABC"'     (1, 24) (1, 31)
278    OP         '+'           (1, 32) (1, 33)
279    STRING     'UR"ABC"'     (1, 34) (1, 41)
280
281    >>> dump_tokens("b'abc' + B'abc'")
282    STRING     "b'abc'"      (1, 0) (1, 6)
283    OP         '+'           (1, 7) (1, 8)
284    STRING     "B'abc'"      (1, 9) (1, 15)
285    >>> dump_tokens('b"abc" + B"abc"')
286    STRING     'b"abc"'      (1, 0) (1, 6)
287    OP         '+'           (1, 7) (1, 8)
288    STRING     'B"abc"'      (1, 9) (1, 15)
289    >>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'")
290    STRING     "br'abc'"     (1, 0) (1, 7)
291    OP         '+'           (1, 8) (1, 9)
292    STRING     "bR'abc'"     (1, 10) (1, 17)
293    OP         '+'           (1, 18) (1, 19)
294    STRING     "Br'abc'"     (1, 20) (1, 27)
295    OP         '+'           (1, 28) (1, 29)
296    STRING     "BR'abc'"     (1, 30) (1, 37)
297    >>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"')
298    STRING     'br"abc"'     (1, 0) (1, 7)
299    OP         '+'           (1, 8) (1, 9)
300    STRING     'bR"abc"'     (1, 10) (1, 17)
301    OP         '+'           (1, 18) (1, 19)
302    STRING     'Br"abc"'     (1, 20) (1, 27)
303    OP         '+'           (1, 28) (1, 29)
304    STRING     'BR"abc"'     (1, 30) (1, 37)
305
306Operators
307
308    >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
309    NAME       'def'         (1, 0) (1, 3)
310    NAME       'd22'         (1, 4) (1, 7)
311    OP         '('           (1, 7) (1, 8)
312    NAME       'a'           (1, 8) (1, 9)
313    OP         ','           (1, 9) (1, 10)
314    NAME       'b'           (1, 11) (1, 12)
315    OP         ','           (1, 12) (1, 13)
316    NAME       'c'           (1, 14) (1, 15)
317    OP         '='           (1, 15) (1, 16)
318    NUMBER     '2'           (1, 16) (1, 17)
319    OP         ','           (1, 17) (1, 18)
320    NAME       'd'           (1, 19) (1, 20)
321    OP         '='           (1, 20) (1, 21)
322    NUMBER     '2'           (1, 21) (1, 22)
323    OP         ','           (1, 22) (1, 23)
324    OP         '*'           (1, 24) (1, 25)
325    NAME       'k'           (1, 25) (1, 26)
326    OP         ')'           (1, 26) (1, 27)
327    OP         ':'           (1, 27) (1, 28)
328    NAME       'pass'        (1, 29) (1, 33)
329    >>> dump_tokens("def d01v_(a=1, *k, **w): pass")
330    NAME       'def'         (1, 0) (1, 3)
331    NAME       'd01v_'       (1, 4) (1, 9)
332    OP         '('           (1, 9) (1, 10)
333    NAME       'a'           (1, 10) (1, 11)
334    OP         '='           (1, 11) (1, 12)
335    NUMBER     '1'           (1, 12) (1, 13)
336    OP         ','           (1, 13) (1, 14)
337    OP         '*'           (1, 15) (1, 16)
338    NAME       'k'           (1, 16) (1, 17)
339    OP         ','           (1, 17) (1, 18)
340    OP         '**'          (1, 19) (1, 21)
341    NAME       'w'           (1, 21) (1, 22)
342    OP         ')'           (1, 22) (1, 23)
343    OP         ':'           (1, 23) (1, 24)
344    NAME       'pass'        (1, 25) (1, 29)
345
346Comparison
347
348    >>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
349    ...             "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")
350    NAME       'if'          (1, 0) (1, 2)
351    NUMBER     '1'           (1, 3) (1, 4)
352    OP         '<'           (1, 5) (1, 6)
353    NUMBER     '1'           (1, 7) (1, 8)
354    OP         '>'           (1, 9) (1, 10)
355    NUMBER     '1'           (1, 11) (1, 12)
356    OP         '=='          (1, 13) (1, 15)
357    NUMBER     '1'           (1, 16) (1, 17)
358    OP         '>='          (1, 18) (1, 20)
359    NUMBER     '5'           (1, 21) (1, 22)
360    OP         '<='          (1, 23) (1, 25)
361    NUMBER     '0x15'        (1, 26) (1, 30)
362    OP         '<='          (1, 31) (1, 33)
363    NUMBER     '0x12'        (1, 34) (1, 38)
364    OP         '!='          (1, 39) (1, 41)
365    NUMBER     '1'           (1, 42) (1, 43)
366    NAME       'and'         (1, 44) (1, 47)
367    NUMBER     '5'           (1, 48) (1, 49)
368    NAME       'in'          (1, 50) (1, 52)
369    NUMBER     '1'           (1, 53) (1, 54)
370    NAME       'not'         (1, 55) (1, 58)
371    NAME       'in'          (1, 59) (1, 61)
372    NUMBER     '1'           (1, 62) (1, 63)
373    NAME       'is'          (1, 64) (1, 66)
374    NUMBER     '1'           (1, 67) (1, 68)
375    NAME       'or'          (1, 69) (1, 71)
376    NUMBER     '5'           (1, 72) (1, 73)
377    NAME       'is'          (1, 74) (1, 76)
378    NAME       'not'         (1, 77) (1, 80)
379    NUMBER     '1'           (1, 81) (1, 82)
380    OP         ':'           (1, 82) (1, 83)
381    NAME       'pass'        (1, 84) (1, 88)
382
383Shift
384
385    >>> dump_tokens("x = 1 << 1 >> 5")
386    NAME       'x'           (1, 0) (1, 1)
387    OP         '='           (1, 2) (1, 3)
388    NUMBER     '1'           (1, 4) (1, 5)
389    OP         '<<'          (1, 6) (1, 8)
390    NUMBER     '1'           (1, 9) (1, 10)
391    OP         '>>'          (1, 11) (1, 13)
392    NUMBER     '5'           (1, 14) (1, 15)
393
394Additive
395
396    >>> dump_tokens("x = 1 - y + 15 - 01 + 0x124 + z + a[5]")
397    NAME       'x'           (1, 0) (1, 1)
398    OP         '='           (1, 2) (1, 3)
399    NUMBER     '1'           (1, 4) (1, 5)
400    OP         '-'           (1, 6) (1, 7)
401    NAME       'y'           (1, 8) (1, 9)
402    OP         '+'           (1, 10) (1, 11)
403    NUMBER     '15'          (1, 12) (1, 14)
404    OP         '-'           (1, 15) (1, 16)
405    NUMBER     '01'          (1, 17) (1, 19)
406    OP         '+'           (1, 20) (1, 21)
407    NUMBER     '0x124'       (1, 22) (1, 27)
408    OP         '+'           (1, 28) (1, 29)
409    NAME       'z'           (1, 30) (1, 31)
410    OP         '+'           (1, 32) (1, 33)
411    NAME       'a'           (1, 34) (1, 35)
412    OP         '['           (1, 35) (1, 36)
413    NUMBER     '5'           (1, 36) (1, 37)
414    OP         ']'           (1, 37) (1, 38)
415
416Multiplicative
417
418    >>> dump_tokens("x = 1//1*1/5*12%0x12")
419    NAME       'x'           (1, 0) (1, 1)
420    OP         '='           (1, 2) (1, 3)
421    NUMBER     '1'           (1, 4) (1, 5)
422    OP         '//'          (1, 5) (1, 7)
423    NUMBER     '1'           (1, 7) (1, 8)
424    OP         '*'           (1, 8) (1, 9)
425    NUMBER     '1'           (1, 9) (1, 10)
426    OP         '/'           (1, 10) (1, 11)
427    NUMBER     '5'           (1, 11) (1, 12)
428    OP         '*'           (1, 12) (1, 13)
429    NUMBER     '12'          (1, 13) (1, 15)
430    OP         '%'           (1, 15) (1, 16)
431    NUMBER     '0x12'        (1, 16) (1, 20)
432
433Unary
434
435    >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")
436    OP         '~'           (1, 0) (1, 1)
437    NUMBER     '1'           (1, 1) (1, 2)
438    OP         '^'           (1, 3) (1, 4)
439    NUMBER     '1'           (1, 5) (1, 6)
440    OP         '&'           (1, 7) (1, 8)
441    NUMBER     '1'           (1, 9) (1, 10)
442    OP         '|'           (1, 11) (1, 12)
443    NUMBER     '1'           (1, 12) (1, 13)
444    OP         '^'           (1, 14) (1, 15)
445    OP         '-'           (1, 16) (1, 17)
446    NUMBER     '1'           (1, 17) (1, 18)
447    >>> dump_tokens("-1*1/1+1*1//1 - ---1**1")
448    OP         '-'           (1, 0) (1, 1)
449    NUMBER     '1'           (1, 1) (1, 2)
450    OP         '*'           (1, 2) (1, 3)
451    NUMBER     '1'           (1, 3) (1, 4)
452    OP         '/'           (1, 4) (1, 5)
453    NUMBER     '1'           (1, 5) (1, 6)
454    OP         '+'           (1, 6) (1, 7)
455    NUMBER     '1'           (1, 7) (1, 8)
456    OP         '*'           (1, 8) (1, 9)
457    NUMBER     '1'           (1, 9) (1, 10)
458    OP         '//'          (1, 10) (1, 12)
459    NUMBER     '1'           (1, 12) (1, 13)
460    OP         '-'           (1, 14) (1, 15)
461    OP         '-'           (1, 16) (1, 17)
462    OP         '-'           (1, 17) (1, 18)
463    OP         '-'           (1, 18) (1, 19)
464    NUMBER     '1'           (1, 19) (1, 20)
465    OP         '**'          (1, 20) (1, 22)
466    NUMBER     '1'           (1, 22) (1, 23)
467
468Selector
469
470    >>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")
471    NAME       'import'      (1, 0) (1, 6)
472    NAME       'sys'         (1, 7) (1, 10)
473    OP         ','           (1, 10) (1, 11)
474    NAME       'time'        (1, 12) (1, 16)
475    NEWLINE    '\\n'          (1, 16) (1, 17)
476    NAME       'x'           (2, 0) (2, 1)
477    OP         '='           (2, 2) (2, 3)
478    NAME       'sys'         (2, 4) (2, 7)
479    OP         '.'           (2, 7) (2, 8)
480    NAME       'modules'     (2, 8) (2, 15)
481    OP         '['           (2, 15) (2, 16)
482    STRING     "'time'"      (2, 16) (2, 22)
483    OP         ']'           (2, 22) (2, 23)
484    OP         '.'           (2, 23) (2, 24)
485    NAME       'time'        (2, 24) (2, 28)
486    OP         '('           (2, 28) (2, 29)
487    OP         ')'           (2, 29) (2, 30)
488
489Methods
490
491    >>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")
492    OP         '@'           (1, 0) (1, 1)
493    NAME       'staticmethod (1, 1) (1, 13)
494    NEWLINE    '\\n'          (1, 13) (1, 14)
495    NAME       'def'         (2, 0) (2, 3)
496    NAME       'foo'         (2, 4) (2, 7)
497    OP         '('           (2, 7) (2, 8)
498    NAME       'x'           (2, 8) (2, 9)
499    OP         ','           (2, 9) (2, 10)
500    NAME       'y'           (2, 10) (2, 11)
501    OP         ')'           (2, 11) (2, 12)
502    OP         ':'           (2, 12) (2, 13)
503    NAME       'pass'        (2, 14) (2, 18)
504
505Backslash means line continuation, except for comments
506
507    >>> roundtrip("x=1+\\\\n"
508    ...           "1\\n"
509    ...           "# This is a comment\\\\n"
510    ...           "# This also\\n")
511    True
512    >>> roundtrip("# Comment \\\\nx = 0")
513    True
514
515Two string literals on the same line
516
517    >>> roundtrip("'' ''")
518    True
519
520Test roundtrip on random python modules.
521pass the '-ucpu' option to process the full directory.
522
523    >>>
524    >>> tempdir = os.path.dirname(f) or os.curdir
525    >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
526
527    >>> if not test_support.is_resource_enabled("cpu"):
528    ...     testfiles = random.sample(testfiles, 10)
529    ...
530    >>> for testfile in testfiles:
531    ...     if not roundtrip(open(testfile)):
532    ...         print "Roundtrip failed for file %s" % testfile
533    ...         break
534    ... else: True
535    True
536
537Evil tabs
538    >>> dump_tokens("def f():\\n\\tif x\\n        \\tpass")
539    NAME       'def'         (1, 0) (1, 3)
540    NAME       'f'           (1, 4) (1, 5)
541    OP         '('           (1, 5) (1, 6)
542    OP         ')'           (1, 6) (1, 7)
543    OP         ':'           (1, 7) (1, 8)
544    NEWLINE    '\\n'          (1, 8) (1, 9)
545    INDENT     '\\t'          (2, 0) (2, 1)
546    NAME       'if'          (2, 1) (2, 3)
547    NAME       'x'           (2, 4) (2, 5)
548    NEWLINE    '\\n'          (2, 5) (2, 6)
549    INDENT     '        \\t'  (3, 0) (3, 9)
550    NAME       'pass'        (3, 9) (3, 13)
551    DEDENT     ''            (4, 0) (4, 0)
552    DEDENT     ''            (4, 0) (4, 0)
553
554Pathological whitespace (http://bugs.python.org/issue16152)
555    >>> dump_tokens("@          ")
556    OP         '@'           (1, 0) (1, 1)
557"""
558
559
560from test import test_support
561from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
562                     STRING, ENDMARKER, tok_name)
563from StringIO import StringIO
564import os
565
566def dump_tokens(s):
567    """Print out the tokens in s in a table format.
568
569    The ENDMARKER is omitted.
570    """
571    f = StringIO(s)
572    for type, token, start, end, line in generate_tokens(f.readline):
573        if type == ENDMARKER:
574            break
575        type = tok_name[type]
576        print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())
577
578# This is an example from the docs, set up as a doctest.
579def decistmt(s):
580    """Substitute Decimals for floats in a string of statements.
581
582    >>> from decimal import Decimal
583    >>> s = 'print +21.3e-5*-.1234/81.7'
584    >>> decistmt(s)
585    "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"
586
587    The format of the exponent is inherited from the platform C library.
588    Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
589    we're only showing 12 digits, and the 13th isn't close to 5, the
590    rest of the output should be platform-independent.
591
592    >>> exec(s) #doctest: +ELLIPSIS
593    -3.21716034272e-0...7
594
595    Output from calculations with Decimal should be identical across all
596    platforms.
597
598    >>> exec(decistmt(s))
599    -3.217160342717258261933904529E-7
600    """
601
602    result = []
603    g = generate_tokens(StringIO(s).readline)   # tokenize the string
604    for toknum, tokval, _, _, _  in g:
605        if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
606            result.extend([
607                (NAME, 'Decimal'),
608                (OP, '('),
609                (STRING, repr(tokval)),
610                (OP, ')')
611            ])
612        else:
613            result.append((toknum, tokval))
614    return untokenize(result)
615
616
617__test__ = {"doctests" : doctests, 'decistmt': decistmt}
618
619
620def test_main():
621    from test import test_tokenize
622    test_support.run_doctest(test_tokenize, True)
623
624if __name__ == "__main__":
625    test_main()
626