1ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh"""HTML 2.0 parser.
2ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
3ffab958fd8d42ed7227d83007350e61555a1fa36Andrew HsiehSee the HTML 2.0 specification:
4ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehhttp://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_toc.html
5ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh"""
6ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
7ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehfrom warnings import warnpy3k
8ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehwarnpy3k("the htmllib module has been removed in Python 3.0",
9ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh         stacklevel=2)
10ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehdel warnpy3k
11ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
12ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport sgmllib
13ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
14ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehfrom formatter import AS_IS
15ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
16ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh__all__ = ["HTMLParser", "HTMLParseError"]
17ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
18ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
19ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehclass HTMLParseError(sgmllib.SGMLParseError):
20ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """Error raised when an HTML document can't be parsed."""
21ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
22ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
23ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehclass HTMLParser(sgmllib.SGMLParser):
24ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """This is the basic HTML parser class.
25ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
26ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    It supports all entity names required by the XHTML 1.0 Recommendation.
27ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    It also defines handlers for all HTML 2.0 and many HTML 3.0 and 3.2
28ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    elements.
29ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
30ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """
31ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
32ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    from htmlentitydefs import entitydefs
33ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
34ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __init__(self, formatter, verbose=0):
35ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Creates an instance of the HTMLParser class.
36ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
37ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        The formatter parameter is the formatter instance associated with
38ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        the parser.
39ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
40ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
41ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        sgmllib.SGMLParser.__init__(self, verbose)
42ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter = formatter
43ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
44ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def error(self, message):
45ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        raise HTMLParseError(message)
46ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
47ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def reset(self):
48ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        sgmllib.SGMLParser.reset(self)
49ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.savedata = None
50ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.isindex = 0
51ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.title = None
52ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.base = None
53ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.anchor = None
54ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.anchorlist = []
55ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.nofill = 0
56ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.list_stack = []
57ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
58ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # ------ Methods used internally; some may be overridden
59ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
60ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --- Formatter interface, taking care of 'savedata' mode;
61ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # shouldn't need to be overridden
62ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
63ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def handle_data(self, data):
64ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.savedata is not None:
65ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.savedata = self.savedata + data
66ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        else:
67ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if self.nofill:
68ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.formatter.add_literal_data(data)
69ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            else:
70ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.formatter.add_flowing_data(data)
71ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
72ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --- Hooks to save data; shouldn't need to be overridden
73ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
74ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def save_bgn(self):
75ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Begins saving character data in a buffer instead of sending it
76ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        to the formatter object.
77ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
78ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Retrieve the stored data via the save_end() method.  Use of the
79ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        save_bgn() / save_end() pair may not be nested.
80ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
81ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
82ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.savedata = ''
83ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
84ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def save_end(self):
85ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Ends buffering character data and returns all data saved since
86ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        the preceding call to the save_bgn() method.
87ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
88ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        If the nofill flag is false, whitespace is collapsed to single
89ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        spaces.  A call to this method without a preceding call to the
90ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        save_bgn() method will raise a TypeError exception.
91ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
92ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
93ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        data = self.savedata
94ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.savedata = None
95ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if not self.nofill:
96ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            data = ' '.join(data.split())
97ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return data
98ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
99ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --- Hooks for anchors; should probably be overridden
100ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
101ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def anchor_bgn(self, href, name, type):
102ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """This method is called at the start of an anchor region.
103ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
104ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        The arguments correspond to the attributes of the <A> tag with
105ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        the same names.  The default implementation maintains a list of
106ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        hyperlinks (defined by the HREF attribute for <A> tags) within
107ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        the document.  The list of hyperlinks is available as the data
108ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        attribute anchorlist.
109ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
110ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
111ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.anchor = href
112ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.anchor:
113ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.anchorlist.append(href)
114ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
115ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def anchor_end(self):
116ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """This method is called at the end of an anchor region.
117ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
118ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        The default implementation adds a textual footnote marker using an
119ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        index into the list of hyperlinks created by the anchor_bgn()method.
120ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
121ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
122ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.anchor:
123ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.handle_data("[%d]" % len(self.anchorlist))
124ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.anchor = None
125ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
126ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --- Hook for images; should probably be overridden
127ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
128ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def handle_image(self, src, alt, *args):
129ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """This method is called to handle images.
130ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
131ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        The default implementation simply passes the alt value to the
132ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        handle_data() method.
133ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
134ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
135ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.handle_data(alt)
136ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
137ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --------- Top level elememts
138ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
139ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_html(self, attrs): pass
140ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_html(self): pass
141ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
142ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_head(self, attrs): pass
143ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_head(self): pass
144ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
145ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_body(self, attrs): pass
146ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_body(self): pass
147ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
148ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # ------ Head elements
149ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
150ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_title(self, attrs):
151ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.save_bgn()
152ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
153ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_title(self):
154ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.title = self.save_end()
155ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
156ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def do_base(self, attrs):
157ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for a, v in attrs:
158ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if a == 'href':
159ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.base = v
160ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
161ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def do_isindex(self, attrs):
162ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.isindex = 1
163ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
164ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def do_link(self, attrs):
165ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pass
166ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
167ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def do_meta(self, attrs):
168ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pass
169ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
170ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def do_nextid(self, attrs): # Deprecated
171ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pass
172ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
173ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # ------ Body elements
174ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
175ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --- Headings
176ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
177ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_h1(self, attrs):
178ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
179ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_font(('h1', 0, 1, 0))
180ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
181ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_h1(self):
182ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
183ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_font()
184ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
185ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_h2(self, attrs):
186ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
187ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_font(('h2', 0, 1, 0))
188ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
189ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_h2(self):
190ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
191ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_font()
192ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
193ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_h3(self, attrs):
194ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
195ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_font(('h3', 0, 1, 0))
196ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
197ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_h3(self):
198ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
199ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_font()
200ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
201ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_h4(self, attrs):
202ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
203ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_font(('h4', 0, 1, 0))
204ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
205ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_h4(self):
206ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
207ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_font()
208ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
209ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_h5(self, attrs):
210ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
211ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_font(('h5', 0, 1, 0))
212ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
213ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_h5(self):
214ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
215ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_font()
216ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
217ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_h6(self, attrs):
218ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
219ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_font(('h6', 0, 1, 0))
220ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
221ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_h6(self):
222ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
223ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_font()
224ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
225ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --- Block Structuring Elements
226ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
227ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def do_p(self, attrs):
228ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
229ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
230ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_pre(self, attrs):
231ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
232ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1))
233ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.nofill = self.nofill + 1
234ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
235ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_pre(self):
236ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
237ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_font()
238ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.nofill = max(0, self.nofill - 1)
239ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
240ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_xmp(self, attrs):
241ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.start_pre(attrs)
242ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.setliteral('xmp') # Tell SGML parser
243ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
244ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_xmp(self):
245ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.end_pre()
246ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
247ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_listing(self, attrs):
248ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.start_pre(attrs)
249ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.setliteral('listing') # Tell SGML parser
250ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
251ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_listing(self):
252ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.end_pre()
253ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
254ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_address(self, attrs):
255ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(0)
256ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS))
257ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
258ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_address(self):
259ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(0)
260ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_font()
261ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
262ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_blockquote(self, attrs):
263ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
264ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_margin('blockquote')
265ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
266ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_blockquote(self):
267ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
268ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_margin()
269ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
270ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --- List Elements
271ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
272ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_ul(self, attrs):
273ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(not self.list_stack)
274ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_margin('ul')
275ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.list_stack.append(['ul', '*', 0])
276ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
277ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_ul(self):
278ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.list_stack: del self.list_stack[-1]
279ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(not self.list_stack)
280ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_margin()
281ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
282ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def do_li(self, attrs):
283ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(0)
284ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.list_stack:
285ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            [dummy, label, counter] = top = self.list_stack[-1]
286ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            top[2] = counter = counter+1
287ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        else:
288ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            label, counter = '*', 0
289ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.add_label_data(label, counter)
290ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
291ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_ol(self, attrs):
292ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(not self.list_stack)
293ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_margin('ol')
294ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        label = '1.'
295ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for a, v in attrs:
296ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if a == 'type':
297ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if len(v) == 1: v = v + '.'
298ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                label = v
299ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.list_stack.append(['ol', label, 0])
300ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
301ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_ol(self):
302ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.list_stack: del self.list_stack[-1]
303ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(not self.list_stack)
304ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_margin()
305ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
306ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_menu(self, attrs):
307ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.start_ul(attrs)
308ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
309ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_menu(self):
310ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.end_ul()
311ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
312ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_dir(self, attrs):
313ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.start_ul(attrs)
314ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
315ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_dir(self):
316ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.end_ul()
317ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
318ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_dl(self, attrs):
319ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(1)
320ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.list_stack.append(['dl', '', 0])
321ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
322ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_dl(self):
323ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.ddpop(1)
324ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.list_stack: del self.list_stack[-1]
325ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
326ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def do_dt(self, attrs):
327ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.ddpop()
328ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
329ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def do_dd(self, attrs):
330ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.ddpop()
331ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_margin('dd')
332ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.list_stack.append(['dd', '', 0])
333ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
334ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def ddpop(self, bl=0):
335ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.end_paragraph(bl)
336ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.list_stack:
337ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if self.list_stack[-1][0] == 'dd':
338ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                del self.list_stack[-1]
339ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.formatter.pop_margin()
340ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
341ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --- Phrase Markup
342ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
343ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # Idiomatic Elements
344ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
345ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_cite(self, attrs): self.start_i(attrs)
346ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_cite(self): self.end_i()
347ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
348ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_code(self, attrs): self.start_tt(attrs)
349ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_code(self): self.end_tt()
350ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
351ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_em(self, attrs): self.start_i(attrs)
352ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_em(self): self.end_i()
353ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
354ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_kbd(self, attrs): self.start_tt(attrs)
355ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_kbd(self): self.end_tt()
356ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
357ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_samp(self, attrs): self.start_tt(attrs)
358ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_samp(self): self.end_tt()
359ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
360ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_strong(self, attrs): self.start_b(attrs)
361ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_strong(self): self.end_b()
362ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
363ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_var(self, attrs): self.start_i(attrs)
364ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_var(self): self.end_i()
365ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
366ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # Typographic Elements
367ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
368ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_i(self, attrs):
369ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS))
370ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_i(self):
371ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_font()
372ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
373ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_b(self, attrs):
374ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_font((AS_IS, AS_IS, 1, AS_IS))
375ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_b(self):
376ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_font()
377ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
378ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_tt(self, attrs):
379ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1))
380ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_tt(self):
381ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.pop_font()
382ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
383ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def start_a(self, attrs):
384ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        href = ''
385ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        name = ''
386ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        type = ''
387ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for attrname, value in attrs:
388ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            value = value.strip()
389ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if attrname == 'href':
390ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                href = value
391ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if attrname == 'name':
392ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                name = value
393ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if attrname == 'type':
394ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                type = value.lower()
395ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.anchor_bgn(href, name, type)
396ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
397ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def end_a(self):
398ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.anchor_end()
399ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
400ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --- Line Break
401ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
402ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def do_br(self, attrs):
403ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.add_line_break()
404ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
405ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --- Horizontal Rule
406ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
407ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def do_hr(self, attrs):
408ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.formatter.add_hor_rule()
409ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
410ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --- Image
411ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
412ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def do_img(self, attrs):
413ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        align = ''
414ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        alt = '(image)'
415ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        ismap = ''
416ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        src = ''
417ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        width = 0
418ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        height = 0
419ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for attrname, value in attrs:
420ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if attrname == 'align':
421ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                align = value
422ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if attrname == 'alt':
423ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                alt = value
424ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if attrname == 'ismap':
425ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                ismap = value
426ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if attrname == 'src':
427ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                src = value
428ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if attrname == 'width':
429ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                try: width = int(value)
430ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                except ValueError: pass
431ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if attrname == 'height':
432ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                try: height = int(value)
433ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                except ValueError: pass
434ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.handle_image(src, alt, ismap, align, width, height)
435ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
436ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --- Really Old Unofficial Deprecated Stuff
437ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
438ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def do_plaintext(self, attrs):
439ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.start_pre(attrs)
440ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.setnomoretags() # Tell SGML parser
441ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
442ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # --- Unhandled tags
443ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
444ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def unknown_starttag(self, tag, attrs):
445ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pass
446ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
447ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def unknown_endtag(self, tag):
448ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pass
449ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
450ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
451ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehdef test(args = None):
452ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    import sys, formatter
453ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
454ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    if not args:
455ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        args = sys.argv[1:]
456ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
457ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    silent = args and args[0] == '-s'
458ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    if silent:
459ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        del args[0]
460ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
461ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    if args:
462ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        file = args[0]
463ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    else:
464ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        file = 'test.html'
465ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
466ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    if file == '-':
467ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        f = sys.stdin
468ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    else:
469ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        try:
470ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            f = open(file, 'r')
471ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        except IOError, msg:
472ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            print file, ":", msg
473ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            sys.exit(1)
474ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
475ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    data = f.read()
476ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
477ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    if f is not sys.stdin:
478ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        f.close()
479ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
480ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    if silent:
481ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        f = formatter.NullFormatter()
482ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    else:
483ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        f = formatter.AbstractFormatter(formatter.DumbWriter())
484ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
485ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    p = HTMLParser(f)
486ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    p.feed(data)
487ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    p.close()
488ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
489ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
490ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehif __name__ == '__main__':
491ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    test()
492