14adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport formatter
24adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport unittest
34adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
44adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofrom test import test_support
54adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaohtmllib = test_support.import_module('htmllib', deprecated=True)
64adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
74adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
84adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass AnchorCollector(htmllib.HTMLParser):
94adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, *args, **kw):
104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.__anchors = []
114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        htmllib.HTMLParser.__init__(self, *args, **kw)
124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def get_anchor_info(self):
144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.__anchors
154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def anchor_bgn(self, *args):
174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.__anchors.append(args)
184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass DeclCollector(htmllib.HTMLParser):
204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, *args, **kw):
214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.__decls = []
224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        htmllib.HTMLParser.__init__(self, *args, **kw)
234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def get_decl_info(self):
254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.__decls
264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def unknown_decl(self, data):
284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.__decls.append(data)
294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass HTMLParserTestCase(unittest.TestCase):
324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def test_anchor_collection(self):
334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # See SF bug #467059.
344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        parser = AnchorCollector(formatter.NullFormatter(), verbose=1)
354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        parser.feed(
364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            """<a href='http://foo.org/' name='splat'> </a>
374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            <a href='http://www.python.org/'> </a>
384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            <a name='frob'> </a>
394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            """)
404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        parser.close()
414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.assertEqual(parser.get_anchor_info(),
424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                         [('http://foo.org/', 'splat', ''),
434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                          ('http://www.python.org/', '', ''),
444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                          ('', 'frob', ''),
454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                          ])
464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def test_decl_collection(self):
484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # See SF patch #545300
494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        parser = DeclCollector(formatter.NullFormatter(), verbose=1)
504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        parser.feed(
514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            """<html>
524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            <body>
534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            hallo
544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            <![if !supportEmptyParas]>&nbsp;<![endif]>
554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            </body>
564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            </html>
574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            """)
584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        parser.close()
594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.assertEqual(parser.get_decl_info(),
604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                         ["if !supportEmptyParas",
614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                          "endif"
624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                          ])
634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef test_main():
654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    test_support.run_unittest(HTMLParserTestCase)
664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoif __name__ == "__main__":
694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    test_main()
70