15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from HTMLParser import HTMLParser
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from StringIO import StringIO
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class _ConverterHTMLParser(HTMLParser):
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def __init__(self, io):
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    HTMLParser.__init__(self)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self._io = io
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self._tag_stack = []
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def handle_starttag(self, tag, attrs):
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    attrs_dict = dict(attrs)
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self._tag_stack.append({'tag': tag})
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    class_attr = dict(attrs).get('class', None)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if class_attr is not None:
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if class_attr == 'doc-family extensions':
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self._io.write('{{^is_apps}}\n')
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self._tag_stack[-1]['close'] = True
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if class_attr == 'doc-family apps':
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self._io.write('{{?is_apps}}\n')
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self._tag_stack[-1]['close'] = True
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self._io.write(self.get_starttag_text())
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def handle_startendtag(self, tag, attrs):
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self._io.write(self.get_starttag_text())
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def handle_endtag(self, tag):
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self._io.write('</' + tag + '>')
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if len(self._tag_stack) == 0:
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if self._tag_stack[-1]['tag'] == tag:
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if self._tag_stack[-1].get('close', False):
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self._io.write('\n{{/is_apps}}')
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      self._tag_stack.pop()
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def handle_data(self, data):
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self._io.write(data)
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def handle_comment(self, data):
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self._io.write('<!--' + data + '-->')
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def handle_entityref(self, name):
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self._io.write('&' + name + ';')
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def handle_charref(self, name):
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self._io.write('&#' + name + ';')
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def handle_decl(self, data):
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self._io.write('<!' + data + '>')
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def HandleDocFamily(html):
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  output = StringIO()
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  parser = _ConverterHTMLParser(output)
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  parser.feed(html)
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return output.getvalue()
59