1f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)# Copyright 2013 The Chromium Authors. All rights reserved.
2f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
3f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)# found in the LICENSE file.
4f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
5f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)from HTMLParser import HTMLParser
6f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
7f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
8f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class ParseResult(object):
9f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  '''The result of |ParseDocument|:
10f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  |title|             The title of the page, as pulled from the first <h1>.
11f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  |title_attributes|  The attributes of the <h1> tag the title is derived from.
12f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  |sections|          The list of Sections within this document.
13f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  |warnings|          Any warnings while parsing the document.
14f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  '''
15f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
16f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def __init__(self, title, title_attributes, sections, warnings):
17f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self.title = title
18f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self.title_attributes = title_attributes
19f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self.sections = sections
20f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self.warnings = warnings
21f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
22f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
23f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class DocumentSection(object):
24f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  '''A section of the document as grouped by <section>...</section>. Any content
25f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  not within section tags is considered an implicit section, so:
26f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  "Foo <section>Bar</section> Baz" is 3 sections.
27f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  |structure|  A list of DocumentStructureEntry for each top-level heading.
28f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  '''
29f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
30f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def __init__(self):
31f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self.structure = []
32f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
33f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
34f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class DocumentStructureEntry(object):
35f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  '''An entry in the document structure.
36f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  |attributes| The attributes of the header tag this entry is derived from.
37f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  |name|       The name of this entry, as pulled from the header tag this entry
38f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)               is derived from.
39f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  |entries|    A list of child DocumentStructureEntry items.
40f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  '''
41f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
42f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def __init__(self, tag, attributes):
43f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self.attributes = attributes
44f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self.name = ''
45f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self.entries = []
46f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    # Callers shouldn't care about the tag, but we need it for sanity checking,
47f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    # so make it private. In particular we pretend that anything but the first
48f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    # h1 is an h2, and it'd be odd to expose that.
49f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self._tag = tag
505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    # Documents can override the name of the entry using title="".
515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    self._has_explicit_name = False
52f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
53f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def __repr__(self):
54f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return '<%s>%s</%s>' % (self._tag, self.name, self._tag)
55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
56f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def __str__(self):
57f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return repr(self)
58f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
60f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)def ParseDocument(document, expect_title=False):
61f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  '''Parses the title and a document structure form |document| and returns a
62f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ParseResult.
63f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  '''
64f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  parser = _DocumentParser(expect_title)
65f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  parser.feed(document)
66f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  parser.close()
67f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  return parser.parse_result
68f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
69f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
70f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)def RemoveTitle(document):
71f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  '''Removes the first <h1>..</h1> tag found in |document| and returns a
72f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  (result, warning) tuple.
73f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
74f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  If no title is found or |document| is malformed in some way, returns the
75f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  original document and a warning message. Otherwise, returns the result of
76f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  removing the title from |document| with a None warning message.
77f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  '''
78f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
79f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def min_index(lhs, rhs):
80f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    lhs_index, rhs_index = document.find(lhs), document.find(rhs)
81f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if lhs_index == -1: return rhs_index
82f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if rhs_index == -1: return lhs_index
83f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return min(lhs_index, rhs_index)
84f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
85f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  title_start = min_index('<h1', '<H1')
86f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  if title_start == -1:
87f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return document, 'No opening <h1> was found'
88f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  title_end = min_index('/h1>', '/H1>')
89f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  if title_end == -1:
90f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return document, 'No closing </h1> was found'
91f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  if title_end < title_start:
92f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return document, 'The </h1> appeared before the <h1>'
93f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
94f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  return (document[:title_start] + document[title_end + 4:], None)
95f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
96f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
97a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)_HEADER_TAGS = ['h2', 'h3', 'h4']
98f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
99f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
100f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class _DocumentParser(HTMLParser):
101f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  '''HTMLParser for ParseDocument.
102f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  '''
103f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
104f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def __init__(self, expect_title):
105f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    HTMLParser.__init__(self)
106f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    # Public.
107f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self.parse_result = None
108f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    # Private.
109f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self._expect_title = expect_title
110f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self._title_entry = None
111f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self._sections = []
112f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self._processing_section = DocumentSection()
113f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self._processing_entry = None
114f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self._warnings = []
115f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
116f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def handle_starttag(self, tag, attrs):
117f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if tag == 'section':
118f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      self._OnSectionBoundary()
119f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      return
120f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
121f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if tag != 'h1' and tag not in _HEADER_TAGS:
122f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      return
123f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
124f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if self._processing_entry is not None:
125f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      self._WarnWithPosition('Found <%s> in the middle of processing a <%s>' %
126f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                             (tag, self._processing_entry._tag))
127f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      return
128f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
1295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    attrs_dict = dict(attrs)
1305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    self._processing_entry = DocumentStructureEntry(tag, attrs_dict)
1315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    explicit_name = attrs_dict.pop('title', None)
1335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    if explicit_name == '':
1345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      # Don't create a TOC entry at all if the tag has specified title="".
1355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      return
1365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    if explicit_name is not None:
1375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      self._processing_entry.name = explicit_name
1385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      self._processing_entry._has_explicit_name = True
139f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
140f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if tag == 'h1' and self._title_entry is not None:
141f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      self._WarnWithPosition('Found multiple <h1> tags. Subsequent <h1> tags '
142f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                             'will be classified as <h2> for the purpose of '
143f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                             'the structure')
144f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      tag = 'h2'
145f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
146f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if tag == 'h1':
147f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      self._title_entry = self._processing_entry
148f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    else:
149f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      belongs_to = self._processing_section.structure
150f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      for header in _HEADER_TAGS[:_HEADER_TAGS.index(tag)]:
151f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        if len(belongs_to) == 0:
152a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)          # TODO(kalman): Re-enable this warning once the reference pages have
153a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)          # their references fixed.
154a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)          #self._WarnWithPosition('Found <%s> without any preceding <%s>' %
155a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)          #                       (tag, header))
156f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          break
157f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        belongs_to = belongs_to[-1].entries
158f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      belongs_to.append(self._processing_entry)
159f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
160f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def handle_endtag(self, tag):
161f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if tag == 'section':
162f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      self._OnSectionBoundary()
163f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      return
164f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
165f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if tag != 'h1' and tag not in _HEADER_TAGS:
166f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      return
167f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
168f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if self._processing_entry is None:
169f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      self._WarnWithPosition('Found closing </%s> without an opening <%s>' %
170f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                             (tag, tag))
171f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      return
172f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
173f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if self._processing_entry._tag != tag:
174f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      self._WarnWithPosition('Found closing </%s> while processing a <%s>' %
175f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                             (tag, self._processing_entry._tag))
176f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      # Note: no early return, it's more likely that the mismatched header was
177f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      # a typo rather than a misplaced closing header tag.
178f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
179f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self._processing_entry = None
180f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
181f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def handle_data(self, data):
1825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    if (self._processing_entry is not None and
1835d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        not self._processing_entry._has_explicit_name):
184f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      # += is inefficient, but probably fine here because the chances of a
185f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      # large number of nested tags within header tags is pretty low.
186f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      self._processing_entry.name += data
187f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
188f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def close(self):
189f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    HTMLParser.close(self)
190f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
191f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self._OnSectionBoundary()
192f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
193f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if self._processing_entry is not None:
194f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      self._warnings.append('Finished parsing while still processing a <%s>' %
195f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                            parser._processing_entry._tag)
196f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
197f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if self._expect_title:
198f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      if not self._title_entry:
199f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        self._warnings.append('Expected a title')
200f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        title, title_attributes = '', {}
201f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      else:
202f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        title, title_attributes = (
203f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            self._title_entry.name, self._title_entry.attributes)
204f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    else:
205f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      if self._title_entry:
206f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        self._warnings.append('Found unexpected title "%s"' %
207f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                              self._title_entry.name)
208f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      title, title_attributes = None, None
209f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
210f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self.parse_result = ParseResult(
211f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        title, title_attributes, self._sections, self._warnings)
212f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
213f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def _OnSectionBoundary(self):
214f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    # Only start a new section if the previous section was non-empty.
215f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if self._processing_section.structure:
216f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      self._sections.append(self._processing_section)
217f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      self._processing_section = DocumentSection()
218f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
219f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def _WarnWithPosition(self, message):
220f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    line, col = self.getpos()
221f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self._warnings.append('%s (line %s, column %s)' % (message, line, col + 1))
222