10c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi"""Text wrapping and filling.
20c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi"""
30c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
40c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# Copyright (C) 1999-2001 Gregory P. Ward.
50c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# Copyright (C) 2002, 2003 Python Software Foundation.
60c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# Written by Greg Ward <gward@python.net>
70c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
80c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi__revision__ = "$Id$"
90c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport string, re
110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yitry:
130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    _unicode = unicode
140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiexcept NameError:
150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # If Python is built without Unicode support, the unicode type
160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # will not exist. Fake one.
170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    class _unicode(object):
180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        pass
190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# Do the right thing with boolean values for all known Python versions
210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# (so this module can be copied to projects that don't depend on Python
220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# 2.3, e.g. Optik and Docutils) by uncommenting the block of code below.
230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi#try:
240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi#    True, False
250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi#except NameError:
260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi#    (True, False) = (1, 0)
270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent']
290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# Hardcode the recognized whitespace characters to the US-ASCII
310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# whitespace characters.  The main reason for doing this is that in
320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# that character winds up in string.whitespace.  Respecting
340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# string.whitespace in those cases would 1) make textwrap treat 0xa0 the
350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# same as any other whitespace char, which is clearly wrong (it's a
360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# *non-breaking* space), 2) possibly cause problems with Unicode,
370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# since 0xa0 is not in range(128).
380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi_whitespace = '\t\n\x0b\x0c\r '
390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass TextWrapper:
410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    """
420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    Object for wrapping/filling text.  The public interface consists of
430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    the wrap() and fill() methods; the other methods are just there for
440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    subclasses to override in order to tweak the default behaviour.
450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    If you want to completely replace the main wrapping algorithm,
460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    you'll probably have to override _wrap_chunks().
470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    Several instance attributes control various aspects of wrapping:
490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi      width (default: 70)
500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        the maximum width of wrapped lines (unless break_long_words
510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        is false)
520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi      initial_indent (default: "")
530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        string that will be prepended to the first line of wrapped
540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        output.  Counts towards the line's width.
550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi      subsequent_indent (default: "")
560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        string that will be prepended to all lines save the first
570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        of wrapped output; also counts towards each line's width.
580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi      expand_tabs (default: true)
590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Expand tabs in input text to spaces before further processing.
600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Each tab will become 1 .. 8 spaces, depending on its position in
610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        its line.  If false, each tab is treated as a single character.
620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi      replace_whitespace (default: true)
630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Replace all whitespace characters in the input text by spaces
640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        after tab expansion.  Note that if expand_tabs is false and
650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        replace_whitespace is true, every tab will be converted to a
660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        single space!
670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi      fix_sentence_endings (default: false)
680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Ensure that sentence-ending punctuation is always followed
690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        by two spaces.  Off by default because the algorithm is
700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        (unavoidably) imperfect.
710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi      break_long_words (default: true)
720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Break words longer than 'width'.  If false, those words will not
730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        be broken, and some lines might be longer than 'width'.
740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi      break_on_hyphens (default: true)
750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Allow breaking hyphenated words. If true, wrapping will occur
760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        preferably on whitespaces and right after hyphens part of
770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        compound words.
780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi      drop_whitespace (default: true)
790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Drop leading and trailing whitespace from lines.
800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    """
810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))
830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    unicode_whitespace_trans = {}
850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    uspace = ord(u' ')
860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    for x in map(ord, _whitespace):
870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        unicode_whitespace_trans[x] = uspace
880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # This funky little regex is just the trick for splitting
900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # text up into word-wrappable chunks.  E.g.
910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    #   "Hello there -- you goof-ball, use the -b option!"
920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # splits into
930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    #   Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # (after stripping out empty strings).
950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    wordsep_re = re.compile(
960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        r'(\s+|'                                  # any whitespace
970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|'   # hyphenated words
980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash
990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # This less funky little regex just split on recognized spaces. E.g.
1010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    #   "Hello there -- you goof-ball, use the -b option!"
1020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # splits into
1030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    #   Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
1040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    wordsep_simple_re = re.compile(r'(\s+)')
1050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # XXX this is not locale- or charset-aware -- string.lowercase
1070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # is US-ASCII only (and therefore English-only)
1080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    sentence_end_re = re.compile(r'[%s]'              # lowercase letter
1090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                                 r'[\.\!\?]'          # sentence-ending punct.
1100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                                 r'[\"\']?'           # optional end-of-quote
1110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                                 r'\Z'                # end of chunk
1120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                                 % string.lowercase)
1130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def __init__(self,
1160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                 width=70,
1170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                 initial_indent="",
1180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                 subsequent_indent="",
1190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                 expand_tabs=True,
1200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                 replace_whitespace=True,
1210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                 fix_sentence_endings=False,
1220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                 break_long_words=True,
1230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                 drop_whitespace=True,
1240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                 break_on_hyphens=True):
1250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.width = width
1260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.initial_indent = initial_indent
1270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.subsequent_indent = subsequent_indent
1280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.expand_tabs = expand_tabs
1290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.replace_whitespace = replace_whitespace
1300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.fix_sentence_endings = fix_sentence_endings
1310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.break_long_words = break_long_words
1320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.drop_whitespace = drop_whitespace
1330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.break_on_hyphens = break_on_hyphens
1340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # recompile the regexes for Unicode mode -- done in this clumsy way for
1360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # backwards compatibility because it's rather common to monkey-patch
1370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # the TextWrapper class' wordsep_re attribute.
1380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.wordsep_re_uni = re.compile(self.wordsep_re.pattern, re.U)
1390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.wordsep_simple_re_uni = re.compile(
1400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.wordsep_simple_re.pattern, re.U)
1410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # -- Private methods -----------------------------------------------
1440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # (possibly useful for subclasses to override)
1450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def _munge_whitespace(self, text):
1470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """_munge_whitespace(text : string) -> string
1480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Munge whitespace in text: expand tabs and convert all other
1500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        whitespace characters to spaces.  Eg. " foo\tbar\n\nbaz"
1510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        becomes " foo    bar  baz".
1520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """
1530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if self.expand_tabs:
1540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            text = text.expandtabs()
1550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if self.replace_whitespace:
1560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if isinstance(text, str):
1570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                text = text.translate(self.whitespace_trans)
1580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            elif isinstance(text, _unicode):
1590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                text = text.translate(self.unicode_whitespace_trans)
1600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        return text
1610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def _split(self, text):
1640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """_split(text : string) -> [string]
1650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Split the text to wrap into indivisible chunks.  Chunks are
1670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        not quite the same as words; see _wrap_chunks() for full
1680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        details.  As an example, the text
1690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi          Look, goof-ball -- use the -b option!
1700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        breaks into the following chunks:
1710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi          'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
1720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi          'use', ' ', 'the', ' ', '-b', ' ', 'option!'
1730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if break_on_hyphens is True, or in:
1740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi          'Look,', ' ', 'goof-ball', ' ', '--', ' ',
1750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi          'use', ' ', 'the', ' ', '-b', ' ', option!'
1760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        otherwise.
1770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """
1780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if isinstance(text, _unicode):
1790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if self.break_on_hyphens:
1800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                pat = self.wordsep_re_uni
1810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            else:
1820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                pat = self.wordsep_simple_re_uni
1830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        else:
1840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if self.break_on_hyphens:
1850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                pat = self.wordsep_re
1860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            else:
1870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                pat = self.wordsep_simple_re
1880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        chunks = pat.split(text)
1890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        chunks = filter(None, chunks)  # remove empty chunks
1900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        return chunks
1910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def _fix_sentence_endings(self, chunks):
1930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """_fix_sentence_endings(chunks : [string])
1940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Correct for sentence endings buried in 'chunks'.  Eg. when the
1960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        original text contains "... foo.\nBar ...", munge_whitespace()
1970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        and split() will convert that to [..., "foo.", " ", "Bar", ...]
1980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        which has one too few spaces; this method simply changes the one
1990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        space to two.
2000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """
2010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        i = 0
2020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        patsearch = self.sentence_end_re.search
2030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        while i < len(chunks)-1:
2040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if chunks[i+1] == " " and patsearch(chunks[i]):
2050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                chunks[i+1] = "  "
2060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                i += 2
2070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            else:
2080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                i += 1
2090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """_handle_long_word(chunks : [string],
2120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                             cur_line : [string],
2130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                             cur_len : int, width : int)
2140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Handle a chunk of text (most likely a word, not whitespace) that
2160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        is too long to fit in any line.
2170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """
2180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Figure out when indent is larger than the specified width, and make
2190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # sure at least one character is stripped off on every pass
2200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if width < 1:
2210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            space_left = 1
2220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        else:
2230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            space_left = width - cur_len
2240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # If we're allowed to break long words, then do so: put as much
2260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # of the next chunk onto the current line as will fit.
2270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if self.break_long_words:
2280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            cur_line.append(reversed_chunks[-1][:space_left])
2290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            reversed_chunks[-1] = reversed_chunks[-1][space_left:]
2300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Otherwise, we have to preserve the long word intact.  Only add
2320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # it to the current line if there's nothing already there --
2330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # that minimizes how much we violate the width constraint.
2340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        elif not cur_line:
2350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            cur_line.append(reversed_chunks.pop())
2360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # If we're not allowed to break long words, and there's already
2380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # text on the current line, do nothing.  Next time through the
2390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # main loop of _wrap_chunks(), we'll wind up here again, but
2400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # cur_len will be zero, so the next line will be entirely
2410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # devoted to the long word that we can't handle right now.
2420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def _wrap_chunks(self, chunks):
2440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """_wrap_chunks(chunks : [string]) -> [string]
2450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Wrap a sequence of text chunks and return a list of lines of
2470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        length 'self.width' or less.  (If 'break_long_words' is false,
2480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        some lines may be longer than this.)  Chunks correspond roughly
2490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        to words and the whitespace between them: each chunk is
2500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        indivisible (modulo 'break_long_words'), but a line break can
2510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        come between any two chunks.  Chunks should not have internal
2520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        whitespace; ie. a chunk is either all whitespace or a "word".
2530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Whitespace chunks will be removed from the beginning and end of
2540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        lines, but apart from that whitespace is preserved.
2550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """
2560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        lines = []
2570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if self.width <= 0:
2580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            raise ValueError("invalid width %r (must be > 0)" % self.width)
2590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Arrange in reverse order so items can be efficiently popped
2610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # from a stack of chucks.
2620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        chunks.reverse()
2630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        while chunks:
2650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # Start the list of chunks that will make up the current line.
2670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # cur_len is just the length of all the chunks in cur_line.
2680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            cur_line = []
2690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            cur_len = 0
2700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # Figure out which static string will prefix this line.
2720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if lines:
2730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                indent = self.subsequent_indent
2740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            else:
2750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                indent = self.initial_indent
2760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # Maximum width for this line.
2780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            width = self.width - len(indent)
2790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # First chunk on line is whitespace -- drop it, unless this
2810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # is the very beginning of the text (ie. no lines started yet).
2820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                del chunks[-1]
2840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            while chunks:
2860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                l = len(chunks[-1])
2870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                # Can at least squeeze this chunk onto the current line.
2890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                if cur_len + l <= width:
2900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                    cur_line.append(chunks.pop())
2910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                    cur_len += l
2920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                # Nope, this line is full.
2940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                else:
2950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                    break
2960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # The current line is full, and the next chunk is too big to
2980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # fit on *any* line (not just this one).
2990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if chunks and len(chunks[-1]) > width:
3000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                self._handle_long_word(chunks, cur_line, cur_len, width)
3010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # If the last chunk on this line is all whitespace, drop it.
3030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
3040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                del cur_line[-1]
3050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # Convert current line back to a string and store it in list
3070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # of all lines (return value).
3080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if cur_line:
3090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                lines.append(indent + ''.join(cur_line))
3100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        return lines
3120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # -- Public interface ----------------------------------------------
3150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def wrap(self, text):
3170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """wrap(text : string) -> [string]
3180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Reformat the single paragraph in 'text' so it fits in lines of
3200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        no more than 'self.width' columns, and return a list of wrapped
3210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        lines.  Tabs in 'text' are expanded with string.expandtabs(),
3220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        and all other whitespace characters (including newline) are
3230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        converted to space.
3240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """
3250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        text = self._munge_whitespace(text)
3260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        chunks = self._split(text)
3270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if self.fix_sentence_endings:
3280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self._fix_sentence_endings(chunks)
3290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        return self._wrap_chunks(chunks)
3300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def fill(self, text):
3320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """fill(text : string) -> string
3330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        Reformat the single paragraph in 'text' to fit in lines of no
3350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        more than 'self.width' columns, and return a new string
3360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        containing the entire wrapped paragraph.
3370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        """
3380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        return "\n".join(self.wrap(text))
3390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# -- Convenience interface ---------------------------------------------
3420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef wrap(text, width=70, **kwargs):
3440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    """Wrap a single paragraph of text, returning a list of wrapped lines.
3450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    Reformat the single paragraph in 'text' so it fits in lines of no
3470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    more than 'width' columns, and return a list of wrapped lines.  By
3480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    default, tabs in 'text' are expanded with string.expandtabs(), and
3490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    all other whitespace characters (including newline) are converted to
3500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    space.  See TextWrapper class for available keyword args to customize
3510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    wrapping behaviour.
3520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    """
3530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    w = TextWrapper(width=width, **kwargs)
3540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    return w.wrap(text)
3550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef fill(text, width=70, **kwargs):
3570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    """Fill a single paragraph of text, returning a new string.
3580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    Reformat the single paragraph in 'text' to fit in lines of no more
3600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    than 'width' columns, and return a new string containing the entire
3610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    wrapped paragraph.  As with wrap(), tabs are expanded and other
3620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    whitespace characters converted to space.  See TextWrapper class for
3630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    available keyword args to customize wrapping behaviour.
3640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    """
3650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    w = TextWrapper(width=width, **kwargs)
3660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    return w.fill(text)
3670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# -- Loosely related functionality -------------------------------------
3700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
3720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
3730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef dedent(text):
3750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    """Remove any common leading whitespace from every line in `text`.
3760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    This can be used to make triple-quoted strings line up with the left
3780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    edge of the display, while still presenting them in the source code
3790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    in indented form.
3800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    Note that tabs and spaces are both treated as whitespace, but they
3820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    are not equal: the lines "  hello" and "\thello" are
3830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    considered to have no common leading whitespace.  (This behaviour is
3840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    new in Python 2.5; older versions of this module incorrectly
3850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    expanded tabs before searching for common leading whitespace.)
3860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    """
3870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # Look for the longest leading string of spaces and tabs common to
3880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # all lines.
3890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    margin = None
3900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    text = _whitespace_only_re.sub('', text)
3910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    indents = _leading_whitespace_re.findall(text)
3920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    for indent in indents:
3930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if margin is None:
3940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            margin = indent
3950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
3960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Current line more deeply indented than previous winner:
3970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # no change (previous winner is still on top).
3980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        elif indent.startswith(margin):
3990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            pass
4000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
4010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Current line consistent with and no deeper than previous winner:
4020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # it's the new winner.
4030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        elif margin.startswith(indent):
4040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            margin = indent
4050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
4060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Current line and previous winner have no common whitespace:
4070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # there is no margin.
4080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        else:
4090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            margin = ""
4100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            break
4110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
4120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # sanity check (testing/debugging only)
4130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    if 0 and margin:
4140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        for line in text.split("\n"):
4150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            assert not line or line.startswith(margin), \
4160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                   "line = %r, margin = %r" % (line, margin)
4170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
4180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    if margin:
4190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        text = re.sub(r'(?m)^' + margin, '', text)
4200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    return text
4210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
4220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiif __name__ == "__main__":
4230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    #print dedent("\tfoo\n\tbar")
4240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    #print dedent("  \thello there\n  \t  how are you?")
4250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    print dedent("Hello there.\n  This is indented.")
426