10c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi"""Text wrapping and filling. 20c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi""" 30c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 40c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# Copyright (C) 1999-2001 Gregory P. Ward. 50c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# Copyright (C) 2002, 2003 Python Software Foundation. 60c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# Written by Greg Ward <gward@python.net> 70c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 80c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi__revision__ = "$Id$" 90c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport string, re 110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yitry: 130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi _unicode = unicode 140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiexcept NameError: 150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # If Python is built without Unicode support, the unicode type 160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # will not exist. Fake one. 170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi class _unicode(object): 180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi pass 190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# Do the right thing with boolean values for all known Python versions 210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# (so this module can be copied to projects that don't depend on Python 220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# 2.3, e.g. Optik and Docutils) by uncommenting the block of code below. 230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi#try: 240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# True, False 250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi#except NameError: 260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# (True, False) = (1, 0) 270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent'] 290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# Hardcode the recognized whitespace characters to the US-ASCII 310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# whitespace characters. The main reason for doing this is that in 320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales 330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# that character winds up in string.whitespace. Respecting 340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# string.whitespace in those cases would 1) make textwrap treat 0xa0 the 350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# same as any other whitespace char, which is clearly wrong (it's a 360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# *non-breaking* space), 2) possibly cause problems with Unicode, 370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# since 0xa0 is not in range(128). 380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi_whitespace = '\t\n\x0b\x0c\r ' 390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass TextWrapper: 410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """ 420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Object for wrapping/filling text. The public interface consists of 430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi the wrap() and fill() methods; the other methods are just there for 440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi subclasses to override in order to tweak the default behaviour. 450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi If you want to completely replace the main wrapping algorithm, 460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi you'll probably have to override _wrap_chunks(). 470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Several instance attributes control various aspects of wrapping: 490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi width (default: 70) 500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi the maximum width of wrapped lines (unless break_long_words 510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi is false) 520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi initial_indent (default: "") 530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi string that will be prepended to the first line of wrapped 540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi output. Counts towards the line's width. 550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi subsequent_indent (default: "") 560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi string that will be prepended to all lines save the first 570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi of wrapped output; also counts towards each line's width. 580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi expand_tabs (default: true) 590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Expand tabs in input text to spaces before further processing. 600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Each tab will become 1 .. 8 spaces, depending on its position in 610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi its line. If false, each tab is treated as a single character. 620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi replace_whitespace (default: true) 630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Replace all whitespace characters in the input text by spaces 640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi after tab expansion. Note that if expand_tabs is false and 650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi replace_whitespace is true, every tab will be converted to a 660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi single space! 670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi fix_sentence_endings (default: false) 680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Ensure that sentence-ending punctuation is always followed 690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi by two spaces. Off by default because the algorithm is 700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi (unavoidably) imperfect. 710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi break_long_words (default: true) 720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Break words longer than 'width'. If false, those words will not 730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi be broken, and some lines might be longer than 'width'. 740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi break_on_hyphens (default: true) 750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Allow breaking hyphenated words. If true, wrapping will occur 760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi preferably on whitespaces and right after hyphens part of 770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi compound words. 780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi drop_whitespace (default: true) 790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Drop leading and trailing whitespace from lines. 800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """ 810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace)) 830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi unicode_whitespace_trans = {} 850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi uspace = ord(u' ') 860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi for x in map(ord, _whitespace): 870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi unicode_whitespace_trans[x] = uspace 880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # This funky little regex is just the trick for splitting 900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # text up into word-wrappable chunks. E.g. 910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # "Hello there -- you goof-ball, use the -b option!" 920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # splits into 930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! 940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # (after stripping out empty strings). 950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi wordsep_re = re.compile( 960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi r'(\s+|' # any whitespace 970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words 980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash 990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # This less funky little regex just split on recognized spaces. E.g. 1010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # "Hello there -- you goof-ball, use the -b option!" 1020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # splits into 1030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ 1040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi wordsep_simple_re = re.compile(r'(\s+)') 1050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # XXX this is not locale- or charset-aware -- string.lowercase 1070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # is US-ASCII only (and therefore English-only) 1080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi sentence_end_re = re.compile(r'[%s]' # lowercase letter 1090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi r'[\.\!\?]' # sentence-ending punct. 1100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi r'[\"\']?' # optional end-of-quote 1110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi r'\Z' # end of chunk 1120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi % string.lowercase) 1130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def __init__(self, 1160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi width=70, 1170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi initial_indent="", 1180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi subsequent_indent="", 1190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi expand_tabs=True, 1200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi replace_whitespace=True, 1210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi fix_sentence_endings=False, 1220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi break_long_words=True, 1230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi drop_whitespace=True, 1240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi break_on_hyphens=True): 1250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.width = width 1260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.initial_indent = initial_indent 1270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.subsequent_indent = subsequent_indent 1280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.expand_tabs = expand_tabs 1290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.replace_whitespace = replace_whitespace 1300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.fix_sentence_endings = fix_sentence_endings 1310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.break_long_words = break_long_words 1320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.drop_whitespace = drop_whitespace 1330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.break_on_hyphens = break_on_hyphens 1340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # recompile the regexes for Unicode mode -- done in this clumsy way for 1360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # backwards compatibility because it's rather common to monkey-patch 1370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # the TextWrapper class' wordsep_re attribute. 1380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.wordsep_re_uni = re.compile(self.wordsep_re.pattern, re.U) 1390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.wordsep_simple_re_uni = re.compile( 1400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.wordsep_simple_re.pattern, re.U) 1410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # -- Private methods ----------------------------------------------- 1440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # (possibly useful for subclasses to override) 1450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def _munge_whitespace(self, text): 1470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """_munge_whitespace(text : string) -> string 1480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Munge whitespace in text: expand tabs and convert all other 1500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi whitespace characters to spaces. Eg. " foo\tbar\n\nbaz" 1510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi becomes " foo bar baz". 1520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """ 1530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if self.expand_tabs: 1540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi text = text.expandtabs() 1550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if self.replace_whitespace: 1560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if isinstance(text, str): 1570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi text = text.translate(self.whitespace_trans) 1580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi elif isinstance(text, _unicode): 1590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi text = text.translate(self.unicode_whitespace_trans) 1600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return text 1610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def _split(self, text): 1640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """_split(text : string) -> [string] 1650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Split the text to wrap into indivisible chunks. Chunks are 1670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi not quite the same as words; see _wrap_chunks() for full 1680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi details. As an example, the text 1690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Look, goof-ball -- use the -b option! 1700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi breaks into the following chunks: 1710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', 1720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 'use', ' ', 'the', ' ', '-b', ' ', 'option!' 1730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if break_on_hyphens is True, or in: 1740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 'Look,', ' ', 'goof-ball', ' ', '--', ' ', 1750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 'use', ' ', 'the', ' ', '-b', ' ', option!' 1760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi otherwise. 1770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """ 1780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if isinstance(text, _unicode): 1790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if self.break_on_hyphens: 1800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi pat = self.wordsep_re_uni 1810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi else: 1820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi pat = self.wordsep_simple_re_uni 1830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi else: 1840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if self.break_on_hyphens: 1850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi pat = self.wordsep_re 1860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi else: 1870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi pat = self.wordsep_simple_re 1880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi chunks = pat.split(text) 1890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi chunks = filter(None, chunks) # remove empty chunks 1900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return chunks 1910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def _fix_sentence_endings(self, chunks): 1930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """_fix_sentence_endings(chunks : [string]) 1940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Correct for sentence endings buried in 'chunks'. Eg. when the 1960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi original text contains "... foo.\nBar ...", munge_whitespace() 1970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi and split() will convert that to [..., "foo.", " ", "Bar", ...] 1980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi which has one too few spaces; this method simply changes the one 1990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi space to two. 2000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """ 2010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi i = 0 2020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi patsearch = self.sentence_end_re.search 2030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi while i < len(chunks)-1: 2040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if chunks[i+1] == " " and patsearch(chunks[i]): 2050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi chunks[i+1] = " " 2060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi i += 2 2070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi else: 2080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi i += 1 2090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): 2110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """_handle_long_word(chunks : [string], 2120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi cur_line : [string], 2130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi cur_len : int, width : int) 2140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Handle a chunk of text (most likely a word, not whitespace) that 2160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi is too long to fit in any line. 2170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """ 2180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Figure out when indent is larger than the specified width, and make 2190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # sure at least one character is stripped off on every pass 2200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if width < 1: 2210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi space_left = 1 2220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi else: 2230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi space_left = width - cur_len 2240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # If we're allowed to break long words, then do so: put as much 2260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # of the next chunk onto the current line as will fit. 2270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if self.break_long_words: 2280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi cur_line.append(reversed_chunks[-1][:space_left]) 2290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi reversed_chunks[-1] = reversed_chunks[-1][space_left:] 2300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Otherwise, we have to preserve the long word intact. Only add 2320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # it to the current line if there's nothing already there -- 2330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # that minimizes how much we violate the width constraint. 2340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi elif not cur_line: 2350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi cur_line.append(reversed_chunks.pop()) 2360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # If we're not allowed to break long words, and there's already 2380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # text on the current line, do nothing. Next time through the 2390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # main loop of _wrap_chunks(), we'll wind up here again, but 2400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # cur_len will be zero, so the next line will be entirely 2410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # devoted to the long word that we can't handle right now. 2420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def _wrap_chunks(self, chunks): 2440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """_wrap_chunks(chunks : [string]) -> [string] 2450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Wrap a sequence of text chunks and return a list of lines of 2470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi length 'self.width' or less. (If 'break_long_words' is false, 2480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi some lines may be longer than this.) Chunks correspond roughly 2490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi to words and the whitespace between them: each chunk is 2500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi indivisible (modulo 'break_long_words'), but a line break can 2510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi come between any two chunks. Chunks should not have internal 2520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi whitespace; ie. a chunk is either all whitespace or a "word". 2530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Whitespace chunks will be removed from the beginning and end of 2540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi lines, but apart from that whitespace is preserved. 2550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """ 2560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi lines = [] 2570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if self.width <= 0: 2580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi raise ValueError("invalid width %r (must be > 0)" % self.width) 2590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Arrange in reverse order so items can be efficiently popped 2610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # from a stack of chucks. 2620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi chunks.reverse() 2630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi while chunks: 2650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Start the list of chunks that will make up the current line. 2670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # cur_len is just the length of all the chunks in cur_line. 2680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi cur_line = [] 2690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi cur_len = 0 2700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Figure out which static string will prefix this line. 2720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if lines: 2730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi indent = self.subsequent_indent 2740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi else: 2750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi indent = self.initial_indent 2760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Maximum width for this line. 2780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi width = self.width - len(indent) 2790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # First chunk on line is whitespace -- drop it, unless this 2810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # is the very beginning of the text (ie. no lines started yet). 2820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if self.drop_whitespace and chunks[-1].strip() == '' and lines: 2830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi del chunks[-1] 2840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi while chunks: 2860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi l = len(chunks[-1]) 2870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Can at least squeeze this chunk onto the current line. 2890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if cur_len + l <= width: 2900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi cur_line.append(chunks.pop()) 2910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi cur_len += l 2920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Nope, this line is full. 2940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi else: 2950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi break 2960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 2970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # The current line is full, and the next chunk is too big to 2980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # fit on *any* line (not just this one). 2990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if chunks and len(chunks[-1]) > width: 3000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self._handle_long_word(chunks, cur_line, cur_len, width) 3010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # If the last chunk on this line is all whitespace, drop it. 3030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': 3040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi del cur_line[-1] 3050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Convert current line back to a string and store it in list 3070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # of all lines (return value). 3080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if cur_line: 3090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi lines.append(indent + ''.join(cur_line)) 3100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return lines 3120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # -- Public interface ---------------------------------------------- 3150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def wrap(self, text): 3170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """wrap(text : string) -> [string] 3180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Reformat the single paragraph in 'text' so it fits in lines of 3200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi no more than 'self.width' columns, and return a list of wrapped 3210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi lines. Tabs in 'text' are expanded with string.expandtabs(), 3220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi and all other whitespace characters (including newline) are 3230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi converted to space. 3240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """ 3250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi text = self._munge_whitespace(text) 3260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi chunks = self._split(text) 3270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if self.fix_sentence_endings: 3280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self._fix_sentence_endings(chunks) 3290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return self._wrap_chunks(chunks) 3300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def fill(self, text): 3320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """fill(text : string) -> string 3330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Reformat the single paragraph in 'text' to fit in lines of no 3350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi more than 'self.width' columns, and return a new string 3360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi containing the entire wrapped paragraph. 3370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """ 3380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return "\n".join(self.wrap(text)) 3390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# -- Convenience interface --------------------------------------------- 3420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef wrap(text, width=70, **kwargs): 3440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """Wrap a single paragraph of text, returning a list of wrapped lines. 3450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Reformat the single paragraph in 'text' so it fits in lines of no 3470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi more than 'width' columns, and return a list of wrapped lines. By 3480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi default, tabs in 'text' are expanded with string.expandtabs(), and 3490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi all other whitespace characters (including newline) are converted to 3500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi space. See TextWrapper class for available keyword args to customize 3510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi wrapping behaviour. 3520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """ 3530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi w = TextWrapper(width=width, **kwargs) 3540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return w.wrap(text) 3550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef fill(text, width=70, **kwargs): 3570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """Fill a single paragraph of text, returning a new string. 3580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Reformat the single paragraph in 'text' to fit in lines of no more 3600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi than 'width' columns, and return a new string containing the entire 3610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi wrapped paragraph. As with wrap(), tabs are expanded and other 3620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi whitespace characters converted to space. See TextWrapper class for 3630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi available keyword args to customize wrapping behaviour. 3640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """ 3650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi w = TextWrapper(width=width, **kwargs) 3660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return w.fill(text) 3670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi# -- Loosely related functionality ------------------------------------- 3700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) 3720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) 3730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef dedent(text): 3750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """Remove any common leading whitespace from every line in `text`. 3760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi This can be used to make triple-quoted strings line up with the left 3780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi edge of the display, while still presenting them in the source code 3790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi in indented form. 3800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi Note that tabs and spaces are both treated as whitespace, but they 3820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi are not equal: the lines " hello" and "\thello" are 3830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi considered to have no common leading whitespace. (This behaviour is 3840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi new in Python 2.5; older versions of this module incorrectly 3850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi expanded tabs before searching for common leading whitespace.) 3860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi """ 3870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Look for the longest leading string of spaces and tabs common to 3880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # all lines. 3890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi margin = None 3900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi text = _whitespace_only_re.sub('', text) 3910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi indents = _leading_whitespace_re.findall(text) 3920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi for indent in indents: 3930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if margin is None: 3940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi margin = indent 3950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 3960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Current line more deeply indented than previous winner: 3970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # no change (previous winner is still on top). 3980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi elif indent.startswith(margin): 3990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi pass 4000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 4010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Current line consistent with and no deeper than previous winner: 4020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # it's the new winner. 4030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi elif margin.startswith(indent): 4040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi margin = indent 4050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 4060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Current line and previous winner have no common whitespace: 4070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # there is no margin. 4080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi else: 4090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi margin = "" 4100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi break 4110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 4120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # sanity check (testing/debugging only) 4130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if 0 and margin: 4140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi for line in text.split("\n"): 4150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi assert not line or line.startswith(margin), \ 4160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "line = %r, margin = %r" % (line, margin) 4170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 4180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if margin: 4190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi text = re.sub(r'(?m)^' + margin, '', text) 4200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return text 4210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 4220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiif __name__ == "__main__": 4230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi #print dedent("\tfoo\n\tbar") 4240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi #print dedent(" \thello there\n \t how are you?") 4250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi print dedent("Hello there.\n This is indented.") 426