15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from HTMLParser import HTMLParser 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from StringIO import StringIO 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class _ConverterHTMLParser(HTMLParser): 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def __init__(self, io): 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HTMLParser.__init__(self) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._io = io 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._tag_stack = [] 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def handle_starttag(self, tag, attrs): 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) attrs_dict = dict(attrs) 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._tag_stack.append({'tag': tag}) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) class_attr = dict(attrs).get('class', None) 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if class_attr is not None: 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if class_attr == 'doc-family extensions': 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._io.write('{{^is_apps}}\n') 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._tag_stack[-1]['close'] = True 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if class_attr == 'doc-family apps': 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._io.write('{{?is_apps}}\n') 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._tag_stack[-1]['close'] = True 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._io.write(self.get_starttag_text()) 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def handle_startendtag(self, tag, attrs): 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._io.write(self.get_starttag_text()) 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def handle_endtag(self, tag): 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._io.write('</' + tag + '>') 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if len(self._tag_stack) == 0: 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if self._tag_stack[-1]['tag'] == tag: 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if self._tag_stack[-1].get('close', False): 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._io.write('\n{{/is_apps}}') 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._tag_stack.pop() 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def handle_data(self, data): 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._io.write(data) 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def handle_comment(self, data): 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._io.write('<!--' + data + '-->') 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def handle_entityref(self, name): 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._io.write('&' + name + ';') 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def handle_charref(self, name): 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._io.write('&#' + name + ';') 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def handle_decl(self, data): 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self._io.write('<!' + data + '>') 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def HandleDocFamily(html): 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) output = StringIO() 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) parser = _ConverterHTMLParser(output) 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) parser.feed(html) 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return output.getvalue() 59