test_textwrap.py revision ec1a0b3abe08fb9a3952e8f48231cda1f6d9b1f3
1#
2# Test suite for the textwrap module.
3#
4# Original tests written by Greg Ward <gward@python.net>.
5# Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
6# Currently maintained by Greg Ward.
7#
8# $Id$
9#
10
11import unittest
12from test import test_support
13
14from textwrap import TextWrapper, wrap, fill, dedent
15
16
17class BaseTestCase(unittest.TestCase):
18    '''Parent class with utility methods for textwrap tests.'''
19
20    def show(self, textin):
21        if isinstance(textin, list):
22            result = []
23            for i in range(len(textin)):
24                result.append("  %d: %r" % (i, textin[i]))
25            result = '\n'.join(result)
26        elif isinstance(textin, basestring):
27            result = "  %s\n" % repr(textin)
28        return result
29
30
31    def check(self, result, expect):
32        self.assertEqual(result, expect,
33            'expected:\n%s\nbut got:\n%s' % (
34                self.show(expect), self.show(result)))
35
36    def check_wrap(self, text, width, expect, **kwargs):
37        result = wrap(text, width, **kwargs)
38        self.check(result, expect)
39
40    def check_split(self, text, expect):
41        result = self.wrapper._split(text)
42        self.assertEqual(result, expect,
43                         "\nexpected %r\n"
44                         "but got  %r" % (expect, result))
45
46
47class WrapTestCase(BaseTestCase):
48
49    def setUp(self):
50        self.wrapper = TextWrapper(width=45)
51
52    def test_simple(self):
53        # Simple case: just words, spaces, and a bit of punctuation
54
55        text = "Hello there, how are you this fine day?  I'm glad to hear it!"
56
57        self.check_wrap(text, 12,
58                        ["Hello there,",
59                         "how are you",
60                         "this fine",
61                         "day?  I'm",
62                         "glad to hear",
63                         "it!"])
64        self.check_wrap(text, 42,
65                        ["Hello there, how are you this fine day?",
66                         "I'm glad to hear it!"])
67        self.check_wrap(text, 80, [text])
68
69    def test_empty_string(self):
70        # Check that wrapping the empty string returns an empty list.
71        self.check_wrap("", 6, [])
72        self.check_wrap("", 6, [], drop_whitespace=False)
73
74    def test_empty_string_with_initial_indent(self):
75        # Check that the empty string is not indented.
76        self.check_wrap("", 6, [], initial_indent="++")
77        self.check_wrap("", 6, [], initial_indent="++", drop_whitespace=False)
78
79    def test_whitespace(self):
80        # Whitespace munging and end-of-sentence detection
81
82        text = """\
83This is a paragraph that already has
84line breaks.  But some of its lines are much longer than the others,
85so it needs to be wrapped.
86Some lines are \ttabbed too.
87What a mess!
88"""
89
90        expect = ["This is a paragraph that already has line",
91                  "breaks.  But some of its lines are much",
92                  "longer than the others, so it needs to be",
93                  "wrapped.  Some lines are  tabbed too.  What a",
94                  "mess!"]
95
96        wrapper = TextWrapper(45, fix_sentence_endings=True)
97        result = wrapper.wrap(text)
98        self.check(result, expect)
99
100        result = wrapper.fill(text)
101        self.check(result, '\n'.join(expect))
102
103    def test_fix_sentence_endings(self):
104        wrapper = TextWrapper(60, fix_sentence_endings=True)
105
106        # SF #847346: ensure that fix_sentence_endings=True does the
107        # right thing even on input short enough that it doesn't need to
108        # be wrapped.
109        text = "A short line. Note the single space."
110        expect = ["A short line.  Note the single space."]
111        self.check(wrapper.wrap(text), expect)
112
113        # Test some of the hairy end cases that _fix_sentence_endings()
114        # is supposed to handle (the easy stuff is tested in
115        # test_whitespace() above).
116        text = "Well, Doctor? What do you think?"
117        expect = ["Well, Doctor?  What do you think?"]
118        self.check(wrapper.wrap(text), expect)
119
120        text = "Well, Doctor?\nWhat do you think?"
121        self.check(wrapper.wrap(text), expect)
122
123        text = 'I say, chaps! Anyone for "tennis?"\nHmmph!'
124        expect = ['I say, chaps!  Anyone for "tennis?"  Hmmph!']
125        self.check(wrapper.wrap(text), expect)
126
127        wrapper.width = 20
128        expect = ['I say, chaps!', 'Anyone for "tennis?"', 'Hmmph!']
129        self.check(wrapper.wrap(text), expect)
130
131        text = 'And she said, "Go to hell!"\nCan you believe that?'
132        expect = ['And she said, "Go to',
133                  'hell!"  Can you',
134                  'believe that?']
135        self.check(wrapper.wrap(text), expect)
136
137        wrapper.width = 60
138        expect = ['And she said, "Go to hell!"  Can you believe that?']
139        self.check(wrapper.wrap(text), expect)
140
141        text = 'File stdio.h is nice.'
142        expect = ['File stdio.h is nice.']
143        self.check(wrapper.wrap(text), expect)
144
145    def test_wrap_short(self):
146        # Wrapping to make short lines longer
147
148        text = "This is a\nshort paragraph."
149
150        self.check_wrap(text, 20, ["This is a short",
151                                   "paragraph."])
152        self.check_wrap(text, 40, ["This is a short paragraph."])
153
154
155    def test_wrap_short_1line(self):
156        # Test endcases
157
158        text = "This is a short line."
159
160        self.check_wrap(text, 30, ["This is a short line."])
161        self.check_wrap(text, 30, ["(1) This is a short line."],
162                        initial_indent="(1) ")
163
164
165    def test_hyphenated(self):
166        # Test breaking hyphenated words
167
168        text = ("this-is-a-useful-feature-for-"
169                "reformatting-posts-from-tim-peters'ly")
170
171        self.check_wrap(text, 40,
172                        ["this-is-a-useful-feature-for-",
173                         "reformatting-posts-from-tim-peters'ly"])
174        self.check_wrap(text, 41,
175                        ["this-is-a-useful-feature-for-",
176                         "reformatting-posts-from-tim-peters'ly"])
177        self.check_wrap(text, 42,
178                        ["this-is-a-useful-feature-for-reformatting-",
179                         "posts-from-tim-peters'ly"])
180
181    def test_hyphenated_numbers(self):
182        # Test that hyphenated numbers (eg. dates) are not broken like words.
183        text = ("Python 1.0.0 was released on 1994-01-26.  Python 1.0.1 was\n"
184                "released on 1994-02-15.")
185
186        self.check_wrap(text, 35, ['Python 1.0.0 was released on',
187                                   '1994-01-26.  Python 1.0.1 was',
188                                   'released on 1994-02-15.'])
189        self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.',
190                                   'Python 1.0.1 was released on 1994-02-15.'])
191
192        text = "I do all my shopping at 7-11."
193        self.check_wrap(text, 25, ["I do all my shopping at",
194                                   "7-11."])
195        self.check_wrap(text, 27, ["I do all my shopping at",
196                                   "7-11."])
197        self.check_wrap(text, 29, ["I do all my shopping at 7-11."])
198
199    def test_em_dash(self):
200        # Test text with em-dashes
201        text = "Em-dashes should be written -- thus."
202        self.check_wrap(text, 25,
203                        ["Em-dashes should be",
204                         "written -- thus."])
205
206        # Probe the boundaries of the properly written em-dash,
207        # ie. " -- ".
208        self.check_wrap(text, 29,
209                        ["Em-dashes should be written",
210                         "-- thus."])
211        expect = ["Em-dashes should be written --",
212                  "thus."]
213        self.check_wrap(text, 30, expect)
214        self.check_wrap(text, 35, expect)
215        self.check_wrap(text, 36,
216                        ["Em-dashes should be written -- thus."])
217
218        # The improperly written em-dash is handled too, because
219        # it's adjacent to non-whitespace on both sides.
220        text = "You can also do--this or even---this."
221        expect = ["You can also do",
222                  "--this or even",
223                  "---this."]
224        self.check_wrap(text, 15, expect)
225        self.check_wrap(text, 16, expect)
226        expect = ["You can also do--",
227                  "this or even---",
228                  "this."]
229        self.check_wrap(text, 17, expect)
230        self.check_wrap(text, 19, expect)
231        expect = ["You can also do--this or even",
232                  "---this."]
233        self.check_wrap(text, 29, expect)
234        self.check_wrap(text, 31, expect)
235        expect = ["You can also do--this or even---",
236                  "this."]
237        self.check_wrap(text, 32, expect)
238        self.check_wrap(text, 35, expect)
239
240        # All of the above behaviour could be deduced by probing the
241        # _split() method.
242        text = "Here's an -- em-dash and--here's another---and another!"
243        expect = ["Here's", " ", "an", " ", "--", " ", "em-", "dash", " ",
244                  "and", "--", "here's", " ", "another", "---",
245                  "and", " ", "another!"]
246        self.check_split(text, expect)
247
248        text = "and then--bam!--he was gone"
249        expect = ["and", " ", "then", "--", "bam!", "--",
250                  "he", " ", "was", " ", "gone"]
251        self.check_split(text, expect)
252
253
254    def test_unix_options (self):
255        # Test that Unix-style command-line options are wrapped correctly.
256        # Both Optik (OptionParser) and Docutils rely on this behaviour!
257
258        text = "You should use the -n option, or --dry-run in its long form."
259        self.check_wrap(text, 20,
260                        ["You should use the",
261                         "-n option, or --dry-",
262                         "run in its long",
263                         "form."])
264        self.check_wrap(text, 21,
265                        ["You should use the -n",
266                         "option, or --dry-run",
267                         "in its long form."])
268        expect = ["You should use the -n option, or",
269                  "--dry-run in its long form."]
270        self.check_wrap(text, 32, expect)
271        self.check_wrap(text, 34, expect)
272        self.check_wrap(text, 35, expect)
273        self.check_wrap(text, 38, expect)
274        expect = ["You should use the -n option, or --dry-",
275                  "run in its long form."]
276        self.check_wrap(text, 39, expect)
277        self.check_wrap(text, 41, expect)
278        expect = ["You should use the -n option, or --dry-run",
279                  "in its long form."]
280        self.check_wrap(text, 42, expect)
281
282        # Again, all of the above can be deduced from _split().
283        text = "the -n option, or --dry-run or --dryrun"
284        expect = ["the", " ", "-n", " ", "option,", " ", "or", " ",
285                  "--dry-", "run", " ", "or", " ", "--dryrun"]
286        self.check_split(text, expect)
287
288    def test_funky_hyphens (self):
289        # Screwy edge cases cooked up by David Goodger.  All reported
290        # in SF bug #596434.
291        self.check_split("what the--hey!", ["what", " ", "the", "--", "hey!"])
292        self.check_split("what the--", ["what", " ", "the--"])
293        self.check_split("what the--.", ["what", " ", "the--."])
294        self.check_split("--text--.", ["--text--."])
295
296        # When I first read bug #596434, this is what I thought David
297        # was talking about.  I was wrong; these have always worked
298        # fine.  The real problem is tested in test_funky_parens()
299        # below...
300        self.check_split("--option", ["--option"])
301        self.check_split("--option-opt", ["--option-", "opt"])
302        self.check_split("foo --option-opt bar",
303                         ["foo", " ", "--option-", "opt", " ", "bar"])
304
305    def test_punct_hyphens(self):
306        # Oh bother, SF #965425 found another problem with hyphens --
307        # hyphenated words in single quotes weren't handled correctly.
308        # In fact, the bug is that *any* punctuation around a hyphenated
309        # word was handled incorrectly, except for a leading "--", which
310        # was special-cased for Optik and Docutils.  So test a variety
311        # of styles of punctuation around a hyphenated word.
312        # (Actually this is based on an Optik bug report, #813077).
313        self.check_split("the 'wibble-wobble' widget",
314                         ['the', ' ', "'wibble-", "wobble'", ' ', 'widget'])
315        self.check_split('the "wibble-wobble" widget',
316                         ['the', ' ', '"wibble-', 'wobble"', ' ', 'widget'])
317        self.check_split("the (wibble-wobble) widget",
318                         ['the', ' ', "(wibble-", "wobble)", ' ', 'widget'])
319        self.check_split("the ['wibble-wobble'] widget",
320                         ['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
321
322    def test_funky_parens (self):
323        # Second part of SF bug #596434: long option strings inside
324        # parentheses.
325        self.check_split("foo (--option) bar",
326                         ["foo", " ", "(--option)", " ", "bar"])
327
328        # Related stuff -- make sure parens work in simpler contexts.
329        self.check_split("foo (bar) baz",
330                         ["foo", " ", "(bar)", " ", "baz"])
331        self.check_split("blah (ding dong), wubba",
332                         ["blah", " ", "(ding", " ", "dong),",
333                          " ", "wubba"])
334
335    def test_drop_whitespace_false(self):
336        # Check that drop_whitespace=False preserves whitespace.
337        # SF patch #1581073
338        text = " This is a    sentence with     much whitespace."
339        self.check_wrap(text, 10,
340                        [" This is a", "    ", "sentence ",
341                         "with     ", "much white", "space."],
342                        drop_whitespace=False)
343
344    def test_drop_whitespace_false_whitespace_only(self):
345        # Check that drop_whitespace=False preserves a whitespace-only string.
346        self.check_wrap("   ", 6, ["   "], drop_whitespace=False)
347
348    def test_drop_whitespace_false_whitespace_only_with_indent(self):
349        # Check that a whitespace-only string gets indented (when
350        # drop_whitespace is False).
351        self.check_wrap("   ", 6, ["     "], drop_whitespace=False,
352                        initial_indent="  ")
353
354    def test_drop_whitespace_whitespace_only(self):
355        # Check drop_whitespace on a whitespace-only string.
356        self.check_wrap("  ", 6, [])
357
358    def test_drop_whitespace_leading_whitespace(self):
359        # Check that drop_whitespace does not drop leading whitespace (if
360        # followed by non-whitespace).
361        # SF bug #622849 reported inconsistent handling of leading
362        # whitespace; let's test that a bit, shall we?
363        text = " This is a sentence with leading whitespace."
364        self.check_wrap(text, 50,
365                        [" This is a sentence with leading whitespace."])
366        self.check_wrap(text, 30,
367                        [" This is a sentence with", "leading whitespace."])
368
369    def test_drop_whitespace_whitespace_line(self):
370        # Check that drop_whitespace skips the whole line if a non-leading
371        # line consists only of whitespace.
372        text = "abcd    efgh"
373        # Include the result for drop_whitespace=False for comparison.
374        self.check_wrap(text, 6, ["abcd", "    ", "efgh"],
375                        drop_whitespace=False)
376        self.check_wrap(text, 6, ["abcd", "efgh"])
377
378    def test_drop_whitespace_whitespace_only_with_indent(self):
379        # Check that initial_indent is not applied to a whitespace-only
380        # string.  This checks a special case of the fact that dropping
381        # whitespace occurs before indenting.
382        self.check_wrap("  ", 6, [], initial_indent="++")
383
384    def test_drop_whitespace_whitespace_indent(self):
385        # Check that drop_whitespace does not drop whitespace indents.
386        # This checks a special case of the fact that dropping whitespace
387        # occurs before indenting.
388        self.check_wrap("abcd efgh", 6, ["  abcd", "  efgh"],
389                        initial_indent="  ", subsequent_indent="  ")
390
391    if test_support.have_unicode:
392        def test_unicode(self):
393            # *Very* simple test of wrapping Unicode strings.  I'm sure
394            # there's more to it than this, but let's at least make
395            # sure textwrap doesn't crash on Unicode input!
396            text = u"Hello there, how are you today?"
397            self.check_wrap(text, 50, [u"Hello there, how are you today?"])
398            self.check_wrap(text, 20, [u"Hello there, how are", "you today?"])
399            olines = self.wrapper.wrap(text)
400            self.assertIsInstance(olines, list)
401            self.assertIsInstance(olines[0], unicode)
402            otext = self.wrapper.fill(text)
403            self.assertIsInstance(otext, unicode)
404
405        def test_no_split_at_umlaut(self):
406            text = u"Die Empf\xe4nger-Auswahl"
407            self.check_wrap(text, 13, [u"Die", u"Empf\xe4nger-", u"Auswahl"])
408
409        def test_umlaut_followed_by_dash(self):
410            text = u"aa \xe4\xe4-\xe4\xe4"
411            self.check_wrap(text, 7, [u"aa \xe4\xe4-", u"\xe4\xe4"])
412
413    def test_split(self):
414        # Ensure that the standard _split() method works as advertised
415        # in the comments
416
417        text = "Hello there -- you goof-ball, use the -b option!"
418
419        result = self.wrapper._split(text)
420        self.check(result,
421             ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
422              "ball,", " ", "use", " ", "the", " ", "-b", " ",  "option!"])
423
424    def test_break_on_hyphens(self):
425        # Ensure that the break_on_hyphens attributes work
426        text = "yaba daba-doo"
427        self.check_wrap(text, 10, ["yaba daba-", "doo"],
428                        break_on_hyphens=True)
429        self.check_wrap(text, 10, ["yaba", "daba-doo"],
430                        break_on_hyphens=False)
431
432    def test_bad_width(self):
433        # Ensure that width <= 0 is caught.
434        text = "Whatever, it doesn't matter."
435        self.assertRaises(ValueError, wrap, text, 0)
436        self.assertRaises(ValueError, wrap, text, -1)
437
438
439class LongWordTestCase (BaseTestCase):
440    def setUp(self):
441        self.wrapper = TextWrapper()
442        self.text = '''\
443Did you say "supercalifragilisticexpialidocious?"
444How *do* you spell that odd word, anyways?
445'''
446
447    def test_break_long(self):
448        # Wrap text with long words and lots of punctuation
449
450        self.check_wrap(self.text, 30,
451                        ['Did you say "supercalifragilis',
452                         'ticexpialidocious?" How *do*',
453                         'you spell that odd word,',
454                         'anyways?'])
455        self.check_wrap(self.text, 50,
456                        ['Did you say "supercalifragilisticexpialidocious?"',
457                         'How *do* you spell that odd word, anyways?'])
458
459        # SF bug 797650.  Prevent an infinite loop by making sure that at
460        # least one character gets split off on every pass.
461        self.check_wrap('-'*10+'hello', 10,
462                        ['----------',
463                         '               h',
464                         '               e',
465                         '               l',
466                         '               l',
467                         '               o'],
468                        subsequent_indent = ' '*15)
469
470        # bug 1146.  Prevent a long word to be wrongly wrapped when the
471        # preceding word is exactly one character shorter than the width
472        self.check_wrap(self.text, 12,
473                        ['Did you say ',
474                         '"supercalifr',
475                         'agilisticexp',
476                         'ialidocious?',
477                         '" How *do*',
478                         'you spell',
479                         'that odd',
480                         'word,',
481                         'anyways?'])
482
483    def test_nobreak_long(self):
484        # Test with break_long_words disabled
485        self.wrapper.break_long_words = 0
486        self.wrapper.width = 30
487        expect = ['Did you say',
488                  '"supercalifragilisticexpialidocious?"',
489                  'How *do* you spell that odd',
490                  'word, anyways?'
491                  ]
492        result = self.wrapper.wrap(self.text)
493        self.check(result, expect)
494
495        # Same thing with kwargs passed to standalone wrap() function.
496        result = wrap(self.text, width=30, break_long_words=0)
497        self.check(result, expect)
498
499
500class IndentTestCases(BaseTestCase):
501
502    # called before each test method
503    def setUp(self):
504        self.text = '''\
505This paragraph will be filled, first without any indentation,
506and then with some (including a hanging indent).'''
507
508
509    def test_fill(self):
510        # Test the fill() method
511
512        expect = '''\
513This paragraph will be filled, first
514without any indentation, and then with
515some (including a hanging indent).'''
516
517        result = fill(self.text, 40)
518        self.check(result, expect)
519
520
521    def test_initial_indent(self):
522        # Test initial_indent parameter
523
524        expect = ["     This paragraph will be filled,",
525                  "first without any indentation, and then",
526                  "with some (including a hanging indent)."]
527        result = wrap(self.text, 40, initial_indent="     ")
528        self.check(result, expect)
529
530        expect = "\n".join(expect)
531        result = fill(self.text, 40, initial_indent="     ")
532        self.check(result, expect)
533
534
535    def test_subsequent_indent(self):
536        # Test subsequent_indent parameter
537
538        expect = '''\
539  * This paragraph will be filled, first
540    without any indentation, and then
541    with some (including a hanging
542    indent).'''
543
544        result = fill(self.text, 40,
545                      initial_indent="  * ", subsequent_indent="    ")
546        self.check(result, expect)
547
548
549# Despite the similar names, DedentTestCase is *not* the inverse
550# of IndentTestCase!
551class DedentTestCase(unittest.TestCase):
552
553    def assertUnchanged(self, text):
554        """assert that dedent() has no effect on 'text'"""
555        self.assertEqual(text, dedent(text))
556
557    def test_dedent_nomargin(self):
558        # No lines indented.
559        text = "Hello there.\nHow are you?\nOh good, I'm glad."
560        self.assertUnchanged(text)
561
562        # Similar, with a blank line.
563        text = "Hello there.\n\nBoo!"
564        self.assertUnchanged(text)
565
566        # Some lines indented, but overall margin is still zero.
567        text = "Hello there.\n  This is indented."
568        self.assertUnchanged(text)
569
570        # Again, add a blank line.
571        text = "Hello there.\n\n  Boo!\n"
572        self.assertUnchanged(text)
573
574    def test_dedent_even(self):
575        # All lines indented by two spaces.
576        text = "  Hello there.\n  How are ya?\n  Oh good."
577        expect = "Hello there.\nHow are ya?\nOh good."
578        self.assertEqual(expect, dedent(text))
579
580        # Same, with blank lines.
581        text = "  Hello there.\n\n  How are ya?\n  Oh good.\n"
582        expect = "Hello there.\n\nHow are ya?\nOh good.\n"
583        self.assertEqual(expect, dedent(text))
584
585        # Now indent one of the blank lines.
586        text = "  Hello there.\n  \n  How are ya?\n  Oh good.\n"
587        expect = "Hello there.\n\nHow are ya?\nOh good.\n"
588        self.assertEqual(expect, dedent(text))
589
590    def test_dedent_uneven(self):
591        # Lines indented unevenly.
592        text = '''\
593        def foo():
594            while 1:
595                return foo
596        '''
597        expect = '''\
598def foo():
599    while 1:
600        return foo
601'''
602        self.assertEqual(expect, dedent(text))
603
604        # Uneven indentation with a blank line.
605        text = "  Foo\n    Bar\n\n   Baz\n"
606        expect = "Foo\n  Bar\n\n Baz\n"
607        self.assertEqual(expect, dedent(text))
608
609        # Uneven indentation with a whitespace-only line.
610        text = "  Foo\n    Bar\n \n   Baz\n"
611        expect = "Foo\n  Bar\n\n Baz\n"
612        self.assertEqual(expect, dedent(text))
613
614    # dedent() should not mangle internal tabs
615    def test_dedent_preserve_internal_tabs(self):
616        text = "  hello\tthere\n  how are\tyou?"
617        expect = "hello\tthere\nhow are\tyou?"
618        self.assertEqual(expect, dedent(text))
619
620        # make sure that it preserves tabs when it's not making any
621        # changes at all
622        self.assertEqual(expect, dedent(expect))
623
624    # dedent() should not mangle tabs in the margin (i.e.
625    # tabs and spaces both count as margin, but are *not*
626    # considered equivalent)
627    def test_dedent_preserve_margin_tabs(self):
628        text = "  hello there\n\thow are you?"
629        self.assertUnchanged(text)
630
631        # same effect even if we have 8 spaces
632        text = "        hello there\n\thow are you?"
633        self.assertUnchanged(text)
634
635        # dedent() only removes whitespace that can be uniformly removed!
636        text = "\thello there\n\thow are you?"
637        expect = "hello there\nhow are you?"
638        self.assertEqual(expect, dedent(text))
639
640        text = "  \thello there\n  \thow are you?"
641        self.assertEqual(expect, dedent(text))
642
643        text = "  \t  hello there\n  \t  how are you?"
644        self.assertEqual(expect, dedent(text))
645
646        text = "  \thello there\n  \t  how are you?"
647        expect = "hello there\n  how are you?"
648        self.assertEqual(expect, dedent(text))
649
650
651def test_main():
652    test_support.run_unittest(WrapTestCase,
653                              LongWordTestCase,
654                              IndentTestCases,
655                              DedentTestCase)
656
657if __name__ == '__main__':
658    test_main()
659