1edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep"""HTML 2.0 parser.
2edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
3edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepSee the HTML 2.0 specification:
4edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoephttp://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_toc.html
5edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep"""
6edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
7edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepfrom warnings import warnpy3k
8edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepwarnpy3k("the htmllib module has been removed in Python 3.0",
9edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep         stacklevel=2)
10edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdel warnpy3k
11edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
12edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport sgmllib
13edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
14edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepfrom formatter import AS_IS
15edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
16edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep__all__ = ["HTMLParser", "HTMLParseError"]
17edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
18edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
19edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass HTMLParseError(sgmllib.SGMLParseError):
20edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """Error raised when an HTML document can't be parsed."""
21edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
22edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
23edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass HTMLParser(sgmllib.SGMLParser):
24edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """This is the basic HTML parser class.
25edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
26edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    It supports all entity names required by the XHTML 1.0 Recommendation.
27edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    It also defines handlers for all HTML 2.0 and many HTML 3.0 and 3.2
28edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    elements.
29edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
30edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
31edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
32edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    from htmlentitydefs import entitydefs
33edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
34edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self, formatter, verbose=0):
35edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """Creates an instance of the HTMLParser class.
36edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
37edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        The formatter parameter is the formatter instance associated with
38edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        the parser.
39edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
40edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
41edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sgmllib.SGMLParser.__init__(self, verbose)
42edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter = formatter
43edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
44edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def error(self, message):
45edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        raise HTMLParseError(message)
46edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
47edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def reset(self):
48edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sgmllib.SGMLParser.reset(self)
49edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.savedata = None
50edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.isindex = 0
51edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.title = None
52edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.base = None
53edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.anchor = None
54edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.anchorlist = []
55edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.nofill = 0
56edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.list_stack = []
57edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
58edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # ------ Methods used internally; some may be overridden
59edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
60edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --- Formatter interface, taking care of 'savedata' mode;
61edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # shouldn't need to be overridden
62edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
63edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def handle_data(self, data):
64edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.savedata is not None:
65edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.savedata = self.savedata + data
66edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        else:
67edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if self.nofill:
68edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.formatter.add_literal_data(data)
69edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            else:
70edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.formatter.add_flowing_data(data)
71edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
72edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --- Hooks to save data; shouldn't need to be overridden
73edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
74edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def save_bgn(self):
75edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """Begins saving character data in a buffer instead of sending it
76edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        to the formatter object.
77edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
78edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Retrieve the stored data via the save_end() method.  Use of the
79edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        save_bgn() / save_end() pair may not be nested.
80edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
81edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
82edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.savedata = ''
83edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
84edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def save_end(self):
85edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """Ends buffering character data and returns all data saved since
86edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        the preceding call to the save_bgn() method.
87edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
88edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        If the nofill flag is false, whitespace is collapsed to single
89edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        spaces.  A call to this method without a preceding call to the
90edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        save_bgn() method will raise a TypeError exception.
91edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
92edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
93edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data = self.savedata
94edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.savedata = None
95edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if not self.nofill:
96edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            data = ' '.join(data.split())
97edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return data
98edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
99edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --- Hooks for anchors; should probably be overridden
100edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
101edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def anchor_bgn(self, href, name, type):
102edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """This method is called at the start of an anchor region.
103edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
104edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        The arguments correspond to the attributes of the <A> tag with
105edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        the same names.  The default implementation maintains a list of
106edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        hyperlinks (defined by the HREF attribute for <A> tags) within
107edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        the document.  The list of hyperlinks is available as the data
108edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        attribute anchorlist.
109edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
110edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
111edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.anchor = href
112edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.anchor:
113edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.anchorlist.append(href)
114edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
115edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def anchor_end(self):
116edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """This method is called at the end of an anchor region.
117edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
118edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        The default implementation adds a textual footnote marker using an
119edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        index into the list of hyperlinks created by the anchor_bgn()method.
120edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
121edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
122edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.anchor:
123edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.handle_data("[%d]" % len(self.anchorlist))
124edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.anchor = None
125edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
126edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --- Hook for images; should probably be overridden
127edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
128edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def handle_image(self, src, alt, *args):
129edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """This method is called to handle images.
130edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
131edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        The default implementation simply passes the alt value to the
132edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handle_data() method.
133edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
134edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
135edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.handle_data(alt)
136edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
137edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --------- Top level elememts
138edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
139edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_html(self, attrs): pass
140edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_html(self): pass
141edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
142edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_head(self, attrs): pass
143edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_head(self): pass
144edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
145edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_body(self, attrs): pass
146edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_body(self): pass
147edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
148edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # ------ Head elements
149edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
150edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_title(self, attrs):
151edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.save_bgn()
152edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
153edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_title(self):
154edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.title = self.save_end()
155edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
156edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def do_base(self, attrs):
157edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for a, v in attrs:
158edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if a == 'href':
159edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.base = v
160edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
161edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def do_isindex(self, attrs):
162edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.isindex = 1
163edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
164edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def do_link(self, attrs):
165edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        pass
166edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
167edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def do_meta(self, attrs):
168edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        pass
169edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
170edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def do_nextid(self, attrs): # Deprecated
171edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        pass
172edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
173edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # ------ Body elements
174edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
175edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --- Headings
176edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
177edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_h1(self, attrs):
178edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
179edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_font(('h1', 0, 1, 0))
180edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
181edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_h1(self):
182edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
183edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_font()
184edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
185edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_h2(self, attrs):
186edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
187edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_font(('h2', 0, 1, 0))
188edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
189edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_h2(self):
190edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
191edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_font()
192edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
193edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_h3(self, attrs):
194edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
195edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_font(('h3', 0, 1, 0))
196edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
197edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_h3(self):
198edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
199edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_font()
200edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
201edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_h4(self, attrs):
202edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
203edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_font(('h4', 0, 1, 0))
204edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
205edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_h4(self):
206edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
207edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_font()
208edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
209edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_h5(self, attrs):
210edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
211edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_font(('h5', 0, 1, 0))
212edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
213edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_h5(self):
214edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
215edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_font()
216edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
217edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_h6(self, attrs):
218edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
219edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_font(('h6', 0, 1, 0))
220edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
221edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_h6(self):
222edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
223edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_font()
224edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
225edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --- Block Structuring Elements
226edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
227edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def do_p(self, attrs):
228edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
229edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
230edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_pre(self, attrs):
231edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
232edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1))
233edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.nofill = self.nofill + 1
234edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
235edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_pre(self):
236edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
237edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_font()
238edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.nofill = max(0, self.nofill - 1)
239edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
240edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_xmp(self, attrs):
241edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.start_pre(attrs)
242edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.setliteral('xmp') # Tell SGML parser
243edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
244edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_xmp(self):
245edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.end_pre()
246edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
247edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_listing(self, attrs):
248edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.start_pre(attrs)
249edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.setliteral('listing') # Tell SGML parser
250edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
251edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_listing(self):
252edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.end_pre()
253edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
254edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_address(self, attrs):
255edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(0)
256edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS))
257edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
258edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_address(self):
259edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(0)
260edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_font()
261edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
262edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_blockquote(self, attrs):
263edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
264edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_margin('blockquote')
265edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
266edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_blockquote(self):
267edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
268edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_margin()
269edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
270edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --- List Elements
271edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
272edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_ul(self, attrs):
273edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(not self.list_stack)
274edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_margin('ul')
275edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.list_stack.append(['ul', '*', 0])
276edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
277edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_ul(self):
278edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.list_stack: del self.list_stack[-1]
279edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(not self.list_stack)
280edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_margin()
281edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
282edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def do_li(self, attrs):
283edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(0)
284edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.list_stack:
285edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            [dummy, label, counter] = top = self.list_stack[-1]
286edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            top[2] = counter = counter+1
287edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        else:
288edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            label, counter = '*', 0
289edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.add_label_data(label, counter)
290edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
291edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_ol(self, attrs):
292edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(not self.list_stack)
293edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_margin('ol')
294edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        label = '1.'
295edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for a, v in attrs:
296edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if a == 'type':
297edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if len(v) == 1: v = v + '.'
298edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                label = v
299edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.list_stack.append(['ol', label, 0])
300edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
301edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_ol(self):
302edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.list_stack: del self.list_stack[-1]
303edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(not self.list_stack)
304edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_margin()
305edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
306edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_menu(self, attrs):
307edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.start_ul(attrs)
308edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
309edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_menu(self):
310edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.end_ul()
311edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
312edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_dir(self, attrs):
313edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.start_ul(attrs)
314edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
315edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_dir(self):
316edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.end_ul()
317edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
318edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_dl(self, attrs):
319edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(1)
320edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.list_stack.append(['dl', '', 0])
321edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
322edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_dl(self):
323edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.ddpop(1)
324edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.list_stack: del self.list_stack[-1]
325edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
326edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def do_dt(self, attrs):
327edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.ddpop()
328edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
329edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def do_dd(self, attrs):
330edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.ddpop()
331edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_margin('dd')
332edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.list_stack.append(['dd', '', 0])
333edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
334edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def ddpop(self, bl=0):
335edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.end_paragraph(bl)
336edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.list_stack:
337edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if self.list_stack[-1][0] == 'dd':
338edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                del self.list_stack[-1]
339edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.formatter.pop_margin()
340edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
341edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --- Phrase Markup
342edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
343edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # Idiomatic Elements
344edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
345edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_cite(self, attrs): self.start_i(attrs)
346edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_cite(self): self.end_i()
347edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
348edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_code(self, attrs): self.start_tt(attrs)
349edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_code(self): self.end_tt()
350edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
351edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_em(self, attrs): self.start_i(attrs)
352edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_em(self): self.end_i()
353edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
354edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_kbd(self, attrs): self.start_tt(attrs)
355edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_kbd(self): self.end_tt()
356edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
357edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_samp(self, attrs): self.start_tt(attrs)
358edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_samp(self): self.end_tt()
359edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
360edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_strong(self, attrs): self.start_b(attrs)
361edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_strong(self): self.end_b()
362edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
363edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_var(self, attrs): self.start_i(attrs)
364edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_var(self): self.end_i()
365edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
366edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # Typographic Elements
367edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
368edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_i(self, attrs):
369edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS))
370edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_i(self):
371edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_font()
372edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
373edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_b(self, attrs):
374edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_font((AS_IS, AS_IS, 1, AS_IS))
375edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_b(self):
376edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_font()
377edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
378edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_tt(self, attrs):
379edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1))
380edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_tt(self):
381edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.pop_font()
382edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
383edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def start_a(self, attrs):
384edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        href = ''
385edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        name = ''
386edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        type = ''
387edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for attrname, value in attrs:
388edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            value = value.strip()
389edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if attrname == 'href':
390edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                href = value
391edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if attrname == 'name':
392edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                name = value
393edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if attrname == 'type':
394edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                type = value.lower()
395edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.anchor_bgn(href, name, type)
396edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
397edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def end_a(self):
398edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.anchor_end()
399edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
400edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --- Line Break
401edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
402edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def do_br(self, attrs):
403edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.add_line_break()
404edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
405edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --- Horizontal Rule
406edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
407edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def do_hr(self, attrs):
408edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.formatter.add_hor_rule()
409edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
410edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --- Image
411edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
412edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def do_img(self, attrs):
413edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        align = ''
414edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        alt = '(image)'
415edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        ismap = ''
416edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        src = ''
417edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        width = 0
418edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        height = 0
419edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for attrname, value in attrs:
420edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if attrname == 'align':
421edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                align = value
422edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if attrname == 'alt':
423edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                alt = value
424edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if attrname == 'ismap':
425edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                ismap = value
426edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if attrname == 'src':
427edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                src = value
428edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if attrname == 'width':
429edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                try: width = int(value)
430edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                except ValueError: pass
431edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if attrname == 'height':
432edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                try: height = int(value)
433edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                except ValueError: pass
434edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.handle_image(src, alt, ismap, align, width, height)
435edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
436edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --- Really Old Unofficial Deprecated Stuff
437edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
438edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def do_plaintext(self, attrs):
439edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.start_pre(attrs)
440edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.setnomoretags() # Tell SGML parser
441edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
442edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # --- Unhandled tags
443edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
444edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def unknown_starttag(self, tag, attrs):
445edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        pass
446edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
447edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def unknown_endtag(self, tag):
448edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        pass
449edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
450edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
451edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef test(args = None):
452edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    import sys, formatter
453edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
454edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if not args:
455edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        args = sys.argv[1:]
456edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
457edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    silent = args and args[0] == '-s'
458edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if silent:
459edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        del args[0]
460edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
461edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if args:
462edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        file = args[0]
463edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    else:
464edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        file = 'test.html'
465edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
466edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if file == '-':
467edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        f = sys.stdin
468edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    else:
469edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        try:
470edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            f = open(file, 'r')
471edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        except IOError, msg:
472edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            print file, ":", msg
473edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            sys.exit(1)
474edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
475edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    data = f.read()
476edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
477edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if f is not sys.stdin:
478edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        f.close()
479edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
480edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if silent:
481edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        f = formatter.NullFormatter()
482edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    else:
483edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        f = formatter.AbstractFormatter(formatter.DumbWriter())
484edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
485edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    p = HTMLParser(f)
486edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    p.feed(data)
487edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    p.close()
488edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
489edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
490edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepif __name__ == '__main__':
491edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    test()
492