14adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport formatter 24adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport unittest 34adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 44adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofrom test import test_support 54adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaohtmllib = test_support.import_module('htmllib', deprecated=True) 64adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 74adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 84adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass AnchorCollector(htmllib.HTMLParser): 94adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, *args, **kw): 104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.__anchors = [] 114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao htmllib.HTMLParser.__init__(self, *args, **kw) 124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def get_anchor_info(self): 144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.__anchors 154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def anchor_bgn(self, *args): 174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.__anchors.append(args) 184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass DeclCollector(htmllib.HTMLParser): 204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, *args, **kw): 214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.__decls = [] 224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao htmllib.HTMLParser.__init__(self, *args, **kw) 234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def get_decl_info(self): 254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.__decls 264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def unknown_decl(self, data): 284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.__decls.append(data) 294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass HTMLParserTestCase(unittest.TestCase): 324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def test_anchor_collection(self): 334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # See SF bug #467059. 344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parser = AnchorCollector(formatter.NullFormatter(), verbose=1) 354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parser.feed( 364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """<a href='http://foo.org/' name='splat'> </a> 374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao <a href='http://www.python.org/'> </a> 384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao <a name='frob'> </a> 394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """) 404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parser.close() 414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.assertEqual(parser.get_anchor_info(), 424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao [('http://foo.org/', 'splat', ''), 434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ('http://www.python.org/', '', ''), 444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ('', 'frob', ''), 454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ]) 464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def test_decl_collection(self): 484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # See SF patch #545300 494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parser = DeclCollector(formatter.NullFormatter(), verbose=1) 504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parser.feed( 514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """<html> 524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao <body> 534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao hallo 544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao <![if !supportEmptyParas]> <![endif]> 554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao </body> 564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao </html> 574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """) 584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parser.close() 594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.assertEqual(parser.get_decl_info(), 604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ["if !supportEmptyParas", 614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "endif" 624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ]) 634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef test_main(): 654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao test_support.run_unittest(HTMLParserTestCase) 664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoif __name__ == "__main__": 694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao test_main() 70