1# Copyright (c) 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5from HTMLParser import HTMLParser 6from StringIO import StringIO 7 8class _ConverterHTMLParser(HTMLParser): 9 def __init__(self, io): 10 HTMLParser.__init__(self) 11 self._io = io 12 self._tag_stack = [] 13 14 def handle_starttag(self, tag, attrs): 15 attrs_dict = dict(attrs) 16 self._tag_stack.append({'tag': tag}) 17 class_attr = dict(attrs).get('class', None) 18 if class_attr is not None: 19 if class_attr == 'doc-family extensions': 20 self._io.write('{{^is_apps}}\n') 21 self._tag_stack[-1]['close'] = True 22 if class_attr == 'doc-family apps': 23 self._io.write('{{?is_apps}}\n') 24 self._tag_stack[-1]['close'] = True 25 self._io.write(self.get_starttag_text()) 26 27 def handle_startendtag(self, tag, attrs): 28 self._io.write(self.get_starttag_text()) 29 30 def handle_endtag(self, tag): 31 self._io.write('</' + tag + '>') 32 if len(self._tag_stack) == 0: 33 return 34 if self._tag_stack[-1]['tag'] == tag: 35 if self._tag_stack[-1].get('close', False): 36 self._io.write('\n{{/is_apps}}') 37 self._tag_stack.pop() 38 39 def handle_data(self, data): 40 self._io.write(data) 41 42 def handle_comment(self, data): 43 self._io.write('<!--' + data + '-->') 44 45 def handle_entityref(self, name): 46 self._io.write('&' + name + ';') 47 48 def handle_charref(self, name): 49 self._io.write('&#' + name + ';') 50 51 def handle_decl(self, data): 52 self._io.write('<!' + data + '>') 53 54def HandleDocFamily(html): 55 output = StringIO() 56 parser = _ConverterHTMLParser(output) 57 parser.feed(html) 58 return output.getvalue() 59