1"""text_file
2
3provides the TextFile class, which gives an interface to text files
4that (optionally) takes care of stripping comments, ignoring blank
5lines, and joining lines with backslashes."""
6
7__revision__ = "$Id$"
8
9import sys
10
11
12class TextFile:
13
14    """Provides a file-like object that takes care of all the things you
15       commonly want to do when processing a text file that has some
16       line-by-line syntax: strip comments (as long as "#" is your
17       comment character), skip blank lines, join adjacent lines by
18       escaping the newline (ie. backslash at end of line), strip
19       leading and/or trailing whitespace.  All of these are optional
20       and independently controllable.
21
22       Provides a 'warn()' method so you can generate warning messages that
23       report physical line number, even if the logical line in question
24       spans multiple physical lines.  Also provides 'unreadline()' for
25       implementing line-at-a-time lookahead.
26
27       Constructor is called as:
28
29           TextFile (filename=None, file=None, **options)
30
31       It bombs (RuntimeError) if both 'filename' and 'file' are None;
32       'filename' should be a string, and 'file' a file object (or
33       something that provides 'readline()' and 'close()' methods).  It is
34       recommended that you supply at least 'filename', so that TextFile
35       can include it in warning messages.  If 'file' is not supplied,
36       TextFile creates its own using the 'open()' builtin.
37
38       The options are all boolean, and affect the value returned by
39       'readline()':
40         strip_comments [default: true]
41           strip from "#" to end-of-line, as well as any whitespace
42           leading up to the "#" -- unless it is escaped by a backslash
43         lstrip_ws [default: false]
44           strip leading whitespace from each line before returning it
45         rstrip_ws [default: true]
46           strip trailing whitespace (including line terminator!) from
47           each line before returning it
48         skip_blanks [default: true}
49           skip lines that are empty *after* stripping comments and
50           whitespace.  (If both lstrip_ws and rstrip_ws are false,
51           then some lines may consist of solely whitespace: these will
52           *not* be skipped, even if 'skip_blanks' is true.)
53         join_lines [default: false]
54           if a backslash is the last non-newline character on a line
55           after stripping comments and whitespace, join the following line
56           to it to form one "logical line"; if N consecutive lines end
57           with a backslash, then N+1 physical lines will be joined to
58           form one logical line.
59         collapse_join [default: false]
60           strip leading whitespace from lines that are joined to their
61           predecessor; only matters if (join_lines and not lstrip_ws)
62
63       Note that since 'rstrip_ws' can strip the trailing newline, the
64       semantics of 'readline()' must differ from those of the builtin file
65       object's 'readline()' method!  In particular, 'readline()' returns
66       None for end-of-file: an empty string might just be a blank line (or
67       an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is
68       not."""
69
70    default_options = { 'strip_comments': 1,
71                        'skip_blanks':    1,
72                        'lstrip_ws':      0,
73                        'rstrip_ws':      1,
74                        'join_lines':     0,
75                        'collapse_join':  0,
76                      }
77
78    def __init__ (self, filename=None, file=None, **options):
79        """Construct a new TextFile object.  At least one of 'filename'
80           (a string) and 'file' (a file-like object) must be supplied.
81           They keyword argument options are described above and affect
82           the values returned by 'readline()'."""
83
84        if filename is None and file is None:
85            raise RuntimeError, \
86                  "you must supply either or both of 'filename' and 'file'"
87
88        # set values for all options -- either from client option hash
89        # or fallback to default_options
90        for opt in self.default_options.keys():
91            if opt in options:
92                setattr (self, opt, options[opt])
93
94            else:
95                setattr (self, opt, self.default_options[opt])
96
97        # sanity check client option hash
98        for opt in options.keys():
99            if opt not in self.default_options:
100                raise KeyError, "invalid TextFile option '%s'" % opt
101
102        if file is None:
103            self.open (filename)
104        else:
105            self.filename = filename
106            self.file = file
107            self.current_line = 0       # assuming that file is at BOF!
108
109        # 'linebuf' is a stack of lines that will be emptied before we
110        # actually read from the file; it's only populated by an
111        # 'unreadline()' operation
112        self.linebuf = []
113
114
115    def open (self, filename):
116        """Open a new file named 'filename'.  This overrides both the
117           'filename' and 'file' arguments to the constructor."""
118
119        self.filename = filename
120        self.file = open (self.filename, 'r')
121        self.current_line = 0
122
123
124    def close (self):
125        """Close the current file and forget everything we know about it
126           (filename, current line number)."""
127
128        self.file.close ()
129        self.file = None
130        self.filename = None
131        self.current_line = None
132
133
134    def gen_error (self, msg, line=None):
135        outmsg = []
136        if line is None:
137            line = self.current_line
138        outmsg.append(self.filename + ", ")
139        if isinstance(line, (list, tuple)):
140            outmsg.append("lines %d-%d: " % tuple (line))
141        else:
142            outmsg.append("line %d: " % line)
143        outmsg.append(str(msg))
144        return ''.join(outmsg)
145
146
147    def error (self, msg, line=None):
148        raise ValueError, "error: " + self.gen_error(msg, line)
149
150    def warn (self, msg, line=None):
151        """Print (to stderr) a warning message tied to the current logical
152           line in the current file.  If the current logical line in the
153           file spans multiple physical lines, the warning refers to the
154           whole range, eg. "lines 3-5".  If 'line' supplied, it overrides
155           the current line number; it may be a list or tuple to indicate a
156           range of physical lines, or an integer for a single physical
157           line."""
158        sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n")
159
160
161    def readline (self):
162        """Read and return a single logical line from the current file (or
163           from an internal buffer if lines have previously been "unread"
164           with 'unreadline()').  If the 'join_lines' option is true, this
165           may involve reading multiple physical lines concatenated into a
166           single string.  Updates the current line number, so calling
167           'warn()' after 'readline()' emits a warning about the physical
168           line(s) just read.  Returns None on end-of-file, since the empty
169           string can occur if 'rstrip_ws' is true but 'strip_blanks' is
170           not."""
171
172        # If any "unread" lines waiting in 'linebuf', return the top
173        # one.  (We don't actually buffer read-ahead data -- lines only
174        # get put in 'linebuf' if the client explicitly does an
175        # 'unreadline()'.
176        if self.linebuf:
177            line = self.linebuf[-1]
178            del self.linebuf[-1]
179            return line
180
181        buildup_line = ''
182
183        while 1:
184            # read the line, make it None if EOF
185            line = self.file.readline()
186            if line == '': line = None
187
188            if self.strip_comments and line:
189
190                # Look for the first "#" in the line.  If none, never
191                # mind.  If we find one and it's the first character, or
192                # is not preceded by "\", then it starts a comment --
193                # strip the comment, strip whitespace before it, and
194                # carry on.  Otherwise, it's just an escaped "#", so
195                # unescape it (and any other escaped "#"'s that might be
196                # lurking in there) and otherwise leave the line alone.
197
198                pos = line.find("#")
199                if pos == -1:           # no "#" -- no comments
200                    pass
201
202                # It's definitely a comment -- either "#" is the first
203                # character, or it's elsewhere and unescaped.
204                elif pos == 0 or line[pos-1] != "\\":
205                    # Have to preserve the trailing newline, because it's
206                    # the job of a later step (rstrip_ws) to remove it --
207                    # and if rstrip_ws is false, we'd better preserve it!
208                    # (NB. this means that if the final line is all comment
209                    # and has no trailing newline, we will think that it's
210                    # EOF; I think that's OK.)
211                    eol = (line[-1] == '\n') and '\n' or ''
212                    line = line[0:pos] + eol
213
214                    # If all that's left is whitespace, then skip line
215                    # *now*, before we try to join it to 'buildup_line' --
216                    # that way constructs like
217                    #   hello \\
218                    #   # comment that should be ignored
219                    #   there
220                    # result in "hello there".
221                    if line.strip() == "":
222                        continue
223
224                else:                   # it's an escaped "#"
225                    line = line.replace("\\#", "#")
226
227
228            # did previous line end with a backslash? then accumulate
229            if self.join_lines and buildup_line:
230                # oops: end of file
231                if line is None:
232                    self.warn ("continuation line immediately precedes "
233                               "end-of-file")
234                    return buildup_line
235
236                if self.collapse_join:
237                    line = line.lstrip()
238                line = buildup_line + line
239
240                # careful: pay attention to line number when incrementing it
241                if isinstance(self.current_line, list):
242                    self.current_line[1] = self.current_line[1] + 1
243                else:
244                    self.current_line = [self.current_line,
245                                         self.current_line+1]
246            # just an ordinary line, read it as usual
247            else:
248                if line is None:        # eof
249                    return None
250
251                # still have to be careful about incrementing the line number!
252                if isinstance(self.current_line, list):
253                    self.current_line = self.current_line[1] + 1
254                else:
255                    self.current_line = self.current_line + 1
256
257
258            # strip whitespace however the client wants (leading and
259            # trailing, or one or the other, or neither)
260            if self.lstrip_ws and self.rstrip_ws:
261                line = line.strip()
262            elif self.lstrip_ws:
263                line = line.lstrip()
264            elif self.rstrip_ws:
265                line = line.rstrip()
266
267            # blank line (whether we rstrip'ed or not)? skip to next line
268            # if appropriate
269            if (line == '' or line == '\n') and self.skip_blanks:
270                continue
271
272            if self.join_lines:
273                if line[-1] == '\\':
274                    buildup_line = line[:-1]
275                    continue
276
277                if line[-2:] == '\\\n':
278                    buildup_line = line[0:-2] + '\n'
279                    continue
280
281            # well, I guess there's some actual content there: return it
282            return line
283
284    # readline ()
285
286
287    def readlines (self):
288        """Read and return the list of all logical lines remaining in the
289           current file."""
290
291        lines = []
292        while 1:
293            line = self.readline()
294            if line is None:
295                return lines
296            lines.append (line)
297
298
299    def unreadline (self, line):
300        """Push 'line' (a string) onto an internal buffer that will be
301           checked by future 'readline()' calls.  Handy for implementing
302           a parser with line-at-a-time lookahead."""
303
304        self.linebuf.append (line)
305