1edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep"""HTML 2.0 parser. 2edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 3edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepSee the HTML 2.0 specification: 4edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoephttp://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_toc.html 5edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep""" 6edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 7edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepfrom warnings import warnpy3k 8edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepwarnpy3k("the htmllib module has been removed in Python 3.0", 9edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep stacklevel=2) 10edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdel warnpy3k 11edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 12edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport sgmllib 13edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 14edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepfrom formatter import AS_IS 15edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 16edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep__all__ = ["HTMLParser", "HTMLParseError"] 17edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 18edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 19edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass HTMLParseError(sgmllib.SGMLParseError): 20edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """Error raised when an HTML document can't be parsed.""" 21edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 22edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 23edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass HTMLParser(sgmllib.SGMLParser): 24edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """This is the basic HTML parser class. 25edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 26edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep It supports all entity names required by the XHTML 1.0 Recommendation. 27edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep It also defines handlers for all HTML 2.0 and many HTML 3.0 and 3.2 28edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elements. 29edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 30edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 31edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 32edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep from htmlentitydefs import entitydefs 33edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 34edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, formatter, verbose=0): 35edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """Creates an instance of the HTMLParser class. 36edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 37edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The formatter parameter is the formatter instance associated with 38edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep the parser. 39edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 40edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 41edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sgmllib.SGMLParser.__init__(self, verbose) 42edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter = formatter 43edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 44edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def error(self, message): 45edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise HTMLParseError(message) 46edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 47edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 48edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sgmllib.SGMLParser.reset(self) 49edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.savedata = None 50edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.isindex = 0 51edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.title = None 52edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.base = None 53edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.anchor = None 54edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.anchorlist = [] 55edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.nofill = 0 56edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.list_stack = [] 57edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 58edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # ------ Methods used internally; some may be overridden 59edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 60edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --- Formatter interface, taking care of 'savedata' mode; 61edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # shouldn't need to be overridden 62edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 63edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def handle_data(self, data): 64edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.savedata is not None: 65edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.savedata = self.savedata + data 66edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 67edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.nofill: 68edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.add_literal_data(data) 69edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 70edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.add_flowing_data(data) 71edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 72edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --- Hooks to save data; shouldn't need to be overridden 73edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 74edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def save_bgn(self): 75edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """Begins saving character data in a buffer instead of sending it 76edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep to the formatter object. 77edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 78edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Retrieve the stored data via the save_end() method. Use of the 79edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep save_bgn() / save_end() pair may not be nested. 80edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 81edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 82edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.savedata = '' 83edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 84edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def save_end(self): 85edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """Ends buffering character data and returns all data saved since 86edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep the preceding call to the save_bgn() method. 87edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 88edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep If the nofill flag is false, whitespace is collapsed to single 89edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep spaces. A call to this method without a preceding call to the 90edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep save_bgn() method will raise a TypeError exception. 91edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 92edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 93edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = self.savedata 94edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.savedata = None 95edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not self.nofill: 96edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = ' '.join(data.split()) 97edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return data 98edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 99edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --- Hooks for anchors; should probably be overridden 100edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 101edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def anchor_bgn(self, href, name, type): 102edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """This method is called at the start of an anchor region. 103edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 104edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The arguments correspond to the attributes of the <A> tag with 105edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep the same names. The default implementation maintains a list of 106edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep hyperlinks (defined by the HREF attribute for <A> tags) within 107edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep the document. The list of hyperlinks is available as the data 108edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep attribute anchorlist. 109edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 110edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 111edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.anchor = href 112edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.anchor: 113edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.anchorlist.append(href) 114edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 115edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def anchor_end(self): 116edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """This method is called at the end of an anchor region. 117edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 118edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The default implementation adds a textual footnote marker using an 119edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep index into the list of hyperlinks created by the anchor_bgn()method. 120edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 121edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 122edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.anchor: 123edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.handle_data("[%d]" % len(self.anchorlist)) 124edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.anchor = None 125edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 126edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --- Hook for images; should probably be overridden 127edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 128edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def handle_image(self, src, alt, *args): 129edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """This method is called to handle images. 130edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 131edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The default implementation simply passes the alt value to the 132edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handle_data() method. 133edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 134edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 135edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.handle_data(alt) 136edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 137edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --------- Top level elememts 138edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 139edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_html(self, attrs): pass 140edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_html(self): pass 141edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 142edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_head(self, attrs): pass 143edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_head(self): pass 144edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 145edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_body(self, attrs): pass 146edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_body(self): pass 147edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 148edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # ------ Head elements 149edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 150edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_title(self, attrs): 151edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.save_bgn() 152edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 153edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_title(self): 154edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.title = self.save_end() 155edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 156edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def do_base(self, attrs): 157edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for a, v in attrs: 158edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if a == 'href': 159edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.base = v 160edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 161edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def do_isindex(self, attrs): 162edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.isindex = 1 163edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 164edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def do_link(self, attrs): 165edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep pass 166edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 167edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def do_meta(self, attrs): 168edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep pass 169edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 170edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def do_nextid(self, attrs): # Deprecated 171edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep pass 172edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 173edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # ------ Body elements 174edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 175edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --- Headings 176edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 177edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_h1(self, attrs): 178edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 179edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_font(('h1', 0, 1, 0)) 180edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 181edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_h1(self): 182edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 183edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_font() 184edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 185edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_h2(self, attrs): 186edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 187edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_font(('h2', 0, 1, 0)) 188edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 189edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_h2(self): 190edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 191edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_font() 192edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 193edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_h3(self, attrs): 194edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 195edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_font(('h3', 0, 1, 0)) 196edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 197edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_h3(self): 198edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 199edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_font() 200edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 201edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_h4(self, attrs): 202edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 203edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_font(('h4', 0, 1, 0)) 204edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 205edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_h4(self): 206edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 207edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_font() 208edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 209edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_h5(self, attrs): 210edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 211edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_font(('h5', 0, 1, 0)) 212edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 213edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_h5(self): 214edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 215edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_font() 216edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 217edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_h6(self, attrs): 218edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 219edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_font(('h6', 0, 1, 0)) 220edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 221edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_h6(self): 222edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 223edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_font() 224edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 225edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --- Block Structuring Elements 226edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 227edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def do_p(self, attrs): 228edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 229edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 230edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_pre(self, attrs): 231edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 232edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1)) 233edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.nofill = self.nofill + 1 234edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 235edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_pre(self): 236edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 237edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_font() 238edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.nofill = max(0, self.nofill - 1) 239edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 240edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_xmp(self, attrs): 241edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.start_pre(attrs) 242edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.setliteral('xmp') # Tell SGML parser 243edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 244edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_xmp(self): 245edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.end_pre() 246edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 247edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_listing(self, attrs): 248edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.start_pre(attrs) 249edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.setliteral('listing') # Tell SGML parser 250edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 251edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_listing(self): 252edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.end_pre() 253edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 254edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_address(self, attrs): 255edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(0) 256edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS)) 257edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 258edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_address(self): 259edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(0) 260edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_font() 261edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 262edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_blockquote(self, attrs): 263edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 264edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_margin('blockquote') 265edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 266edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_blockquote(self): 267edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 268edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_margin() 269edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 270edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --- List Elements 271edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 272edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_ul(self, attrs): 273edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(not self.list_stack) 274edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_margin('ul') 275edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.list_stack.append(['ul', '*', 0]) 276edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 277edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_ul(self): 278edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.list_stack: del self.list_stack[-1] 279edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(not self.list_stack) 280edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_margin() 281edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 282edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def do_li(self, attrs): 283edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(0) 284edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.list_stack: 285edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep [dummy, label, counter] = top = self.list_stack[-1] 286edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep top[2] = counter = counter+1 287edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 288edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep label, counter = '*', 0 289edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.add_label_data(label, counter) 290edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 291edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_ol(self, attrs): 292edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(not self.list_stack) 293edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_margin('ol') 294edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep label = '1.' 295edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for a, v in attrs: 296edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if a == 'type': 297edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if len(v) == 1: v = v + '.' 298edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep label = v 299edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.list_stack.append(['ol', label, 0]) 300edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 301edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_ol(self): 302edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.list_stack: del self.list_stack[-1] 303edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(not self.list_stack) 304edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_margin() 305edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 306edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_menu(self, attrs): 307edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.start_ul(attrs) 308edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 309edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_menu(self): 310edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.end_ul() 311edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 312edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_dir(self, attrs): 313edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.start_ul(attrs) 314edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 315edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_dir(self): 316edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.end_ul() 317edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 318edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_dl(self, attrs): 319edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(1) 320edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.list_stack.append(['dl', '', 0]) 321edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 322edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_dl(self): 323edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.ddpop(1) 324edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.list_stack: del self.list_stack[-1] 325edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 326edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def do_dt(self, attrs): 327edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.ddpop() 328edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 329edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def do_dd(self, attrs): 330edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.ddpop() 331edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_margin('dd') 332edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.list_stack.append(['dd', '', 0]) 333edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 334edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def ddpop(self, bl=0): 335edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.end_paragraph(bl) 336edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.list_stack: 337edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.list_stack[-1][0] == 'dd': 338edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep del self.list_stack[-1] 339edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_margin() 340edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 341edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --- Phrase Markup 342edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 343edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Idiomatic Elements 344edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 345edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_cite(self, attrs): self.start_i(attrs) 346edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_cite(self): self.end_i() 347edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 348edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_code(self, attrs): self.start_tt(attrs) 349edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_code(self): self.end_tt() 350edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 351edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_em(self, attrs): self.start_i(attrs) 352edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_em(self): self.end_i() 353edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 354edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_kbd(self, attrs): self.start_tt(attrs) 355edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_kbd(self): self.end_tt() 356edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 357edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_samp(self, attrs): self.start_tt(attrs) 358edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_samp(self): self.end_tt() 359edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 360edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_strong(self, attrs): self.start_b(attrs) 361edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_strong(self): self.end_b() 362edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 363edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_var(self, attrs): self.start_i(attrs) 364edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_var(self): self.end_i() 365edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 366edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Typographic Elements 367edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 368edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_i(self, attrs): 369edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS)) 370edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_i(self): 371edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_font() 372edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 373edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_b(self, attrs): 374edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_font((AS_IS, AS_IS, 1, AS_IS)) 375edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_b(self): 376edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_font() 377edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 378edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_tt(self, attrs): 379edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1)) 380edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_tt(self): 381edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.pop_font() 382edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 383edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def start_a(self, attrs): 384edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep href = '' 385edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep name = '' 386edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep type = '' 387edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for attrname, value in attrs: 388edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep value = value.strip() 389edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if attrname == 'href': 390edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep href = value 391edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if attrname == 'name': 392edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep name = value 393edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if attrname == 'type': 394edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep type = value.lower() 395edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.anchor_bgn(href, name, type) 396edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 397edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def end_a(self): 398edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.anchor_end() 399edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 400edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --- Line Break 401edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 402edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def do_br(self, attrs): 403edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.add_line_break() 404edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 405edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --- Horizontal Rule 406edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 407edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def do_hr(self, attrs): 408edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.formatter.add_hor_rule() 409edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 410edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --- Image 411edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 412edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def do_img(self, attrs): 413edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep align = '' 414edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep alt = '(image)' 415edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ismap = '' 416edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep src = '' 417edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep width = 0 418edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep height = 0 419edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for attrname, value in attrs: 420edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if attrname == 'align': 421edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep align = value 422edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if attrname == 'alt': 423edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep alt = value 424edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if attrname == 'ismap': 425edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ismap = value 426edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if attrname == 'src': 427edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep src = value 428edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if attrname == 'width': 429edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep try: width = int(value) 430edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep except ValueError: pass 431edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if attrname == 'height': 432edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep try: height = int(value) 433edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep except ValueError: pass 434edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.handle_image(src, alt, ismap, align, width, height) 435edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 436edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --- Really Old Unofficial Deprecated Stuff 437edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 438edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def do_plaintext(self, attrs): 439edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.start_pre(attrs) 440edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.setnomoretags() # Tell SGML parser 441edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 442edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # --- Unhandled tags 443edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 444edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def unknown_starttag(self, tag, attrs): 445edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep pass 446edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 447edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def unknown_endtag(self, tag): 448edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep pass 449edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 450edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 451edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef test(args = None): 452edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep import sys, formatter 453edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 454edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not args: 455edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep args = sys.argv[1:] 456edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 457edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep silent = args and args[0] == '-s' 458edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if silent: 459edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep del args[0] 460edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 461edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if args: 462edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep file = args[0] 463edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 464edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep file = 'test.html' 465edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 466edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if file == '-': 467edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep f = sys.stdin 468edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 469edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep try: 470edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep f = open(file, 'r') 471edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep except IOError, msg: 472edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print file, ":", msg 473edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sys.exit(1) 474edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 475edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = f.read() 476edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 477edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if f is not sys.stdin: 478edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep f.close() 479edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 480edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if silent: 481edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep f = formatter.NullFormatter() 482edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 483edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep f = formatter.AbstractFormatter(formatter.DumbWriter()) 484edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 485edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep p = HTMLParser(f) 486edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep p.feed(data) 487edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep p.close() 488edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 489edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 490edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepif __name__ == '__main__': 491edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep test() 492