1"""Extension to format a paragraph or selection to a max width.
2
3Does basic, standard text formatting, and also understands Python
4comment blocks. Thus, for editing Python source code, this
5extension is really only suitable for reformatting these comment
6blocks or triple-quoted strings.
7
8Known problems with comment reformatting:
9* If there is a selection marked, and the first line of the
10  selection is not complete, the block will probably not be detected
11  as comments, and will have the normal "text formatting" rules
12  applied.
13* If a comment block has leading whitespace that mixes tabs and
14  spaces, they will not be considered part of the same block.
15* Fancy comments, like this bulleted list, aren't handled :-)
16"""
17import re
18
19from idlelib.config import idleConf
20
21
22class FormatParagraph:
23
24    menudefs = [
25        ('format', [   # /s/edit/format   dscherer@cmu.edu
26            ('Format Paragraph', '<<format-paragraph>>'),
27         ])
28    ]
29
30    def __init__(self, editwin):
31        self.editwin = editwin
32
33    def close(self):
34        self.editwin = None
35
36    def format_paragraph_event(self, event, limit=None):
37        """Formats paragraph to a max width specified in idleConf.
38
39        If text is selected, format_paragraph_event will start breaking lines
40        at the max width, starting from the beginning selection.
41
42        If no text is selected, format_paragraph_event uses the current
43        cursor location to determine the paragraph (lines of text surrounded
44        by blank lines) and formats it.
45
46        The length limit parameter is for testing with a known value.
47        """
48        if limit is None:
49            # The default length limit is that defined by pep8
50            limit = idleConf.GetOption(
51                'extensions', 'FormatParagraph', 'max-width',
52                type='int', default=72)
53        text = self.editwin.text
54        first, last = self.editwin.get_selection_indices()
55        if first and last:
56            data = text.get(first, last)
57            comment_header = get_comment_header(data)
58        else:
59            first, last, comment_header, data = \
60                    find_paragraph(text, text.index("insert"))
61        if comment_header:
62            newdata = reformat_comment(data, limit, comment_header)
63        else:
64            newdata = reformat_paragraph(data, limit)
65        text.tag_remove("sel", "1.0", "end")
66
67        if newdata != data:
68            text.mark_set("insert", first)
69            text.undo_block_start()
70            text.delete(first, last)
71            text.insert(first, newdata)
72            text.undo_block_stop()
73        else:
74            text.mark_set("insert", last)
75        text.see("insert")
76        return "break"
77
78def find_paragraph(text, mark):
79    """Returns the start/stop indices enclosing the paragraph that mark is in.
80
81    Also returns the comment format string, if any, and paragraph of text
82    between the start/stop indices.
83    """
84    lineno, col = map(int, mark.split("."))
85    line = text.get("%d.0" % lineno, "%d.end" % lineno)
86
87    # Look for start of next paragraph if the index passed in is a blank line
88    while text.compare("%d.0" % lineno, "<", "end") and is_all_white(line):
89        lineno = lineno + 1
90        line = text.get("%d.0" % lineno, "%d.end" % lineno)
91    first_lineno = lineno
92    comment_header = get_comment_header(line)
93    comment_header_len = len(comment_header)
94
95    # Once start line found, search for end of paragraph (a blank line)
96    while get_comment_header(line)==comment_header and \
97              not is_all_white(line[comment_header_len:]):
98        lineno = lineno + 1
99        line = text.get("%d.0" % lineno, "%d.end" % lineno)
100    last = "%d.0" % lineno
101
102    # Search back to beginning of paragraph (first blank line before)
103    lineno = first_lineno - 1
104    line = text.get("%d.0" % lineno, "%d.end" % lineno)
105    while lineno > 0 and \
106              get_comment_header(line)==comment_header and \
107              not is_all_white(line[comment_header_len:]):
108        lineno = lineno - 1
109        line = text.get("%d.0" % lineno, "%d.end" % lineno)
110    first = "%d.0" % (lineno+1)
111
112    return first, last, comment_header, text.get(first, last)
113
114# This should perhaps be replaced with textwrap.wrap
115def reformat_paragraph(data, limit):
116    """Return data reformatted to specified width (limit)."""
117    lines = data.split("\n")
118    i = 0
119    n = len(lines)
120    while i < n and is_all_white(lines[i]):
121        i = i+1
122    if i >= n:
123        return data
124    indent1 = get_indent(lines[i])
125    if i+1 < n and not is_all_white(lines[i+1]):
126        indent2 = get_indent(lines[i+1])
127    else:
128        indent2 = indent1
129    new = lines[:i]
130    partial = indent1
131    while i < n and not is_all_white(lines[i]):
132        # XXX Should take double space after period (etc.) into account
133        words = re.split(r"(\s+)", lines[i])
134        for j in range(0, len(words), 2):
135            word = words[j]
136            if not word:
137                continue # Can happen when line ends in whitespace
138            if len((partial + word).expandtabs()) > limit and \
139                   partial != indent1:
140                new.append(partial.rstrip())
141                partial = indent2
142            partial = partial + word + " "
143            if j+1 < len(words) and words[j+1] != " ":
144                partial = partial + " "
145        i = i+1
146    new.append(partial.rstrip())
147    # XXX Should reformat remaining paragraphs as well
148    new.extend(lines[i:])
149    return "\n".join(new)
150
151def reformat_comment(data, limit, comment_header):
152    """Return data reformatted to specified width with comment header."""
153
154    # Remove header from the comment lines
155    lc = len(comment_header)
156    data = "\n".join(line[lc:] for line in data.split("\n"))
157    # Reformat to maxformatwidth chars or a 20 char width,
158    # whichever is greater.
159    format_width = max(limit - len(comment_header), 20)
160    newdata = reformat_paragraph(data, format_width)
161    # re-split and re-insert the comment header.
162    newdata = newdata.split("\n")
163    # If the block ends in a \n, we dont want the comment prefix
164    # inserted after it. (Im not sure it makes sense to reformat a
165    # comment block that is not made of complete lines, but whatever!)
166    # Can't think of a clean solution, so we hack away
167    block_suffix = ""
168    if not newdata[-1]:
169        block_suffix = "\n"
170        newdata = newdata[:-1]
171    return '\n'.join(comment_header+line for line in newdata) + block_suffix
172
173def is_all_white(line):
174    """Return True if line is empty or all whitespace."""
175
176    return re.match(r"^\s*$", line) is not None
177
178def get_indent(line):
179    """Return the initial space or tab indent of line."""
180    return re.match(r"^([ \t]*)", line).group()
181
182def get_comment_header(line):
183    """Return string with leading whitespace and '#' from line or ''.
184
185    A null return indicates that the line is not a comment line. A non-
186    null return, such as '    #', will be used to find the other lines of
187    a comment block with the same  indent.
188    """
189    m = re.match(r"^([ \t]*#*)", line)
190    if m is None: return ""
191    return m.group(1)
192
193
194if __name__ == "__main__":
195    import unittest
196    unittest.main('idlelib.idle_test.test_paragraph',
197            verbosity=2, exit=False)
198