1868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#!/usr/bin/env python
2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Copyright 2013 The Chromium Authors. All rights reserved.
3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# found in the LICENSE file.
5c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)"""Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text
7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)at 80 chars, enforcing standard attribute ordering, and standardizing
8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)indentation.
9c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)This is quite a bit more complicated than just calling tree.toprettyxml();
11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)we need additional customization, like special attribute ordering in tags
12c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)and wrapping text nodes, so we implement our own full custom XML pretty-printer.
13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)"""
14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)from __future__ import with_statement
16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)import diffutil
18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)import json
19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)import logging
20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)import shutil
21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)import sys
22c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)import textwrap
23c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)import xml.dom.minidom
24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
26c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)WRAP_COLUMN = 80
27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Desired order for tag attributes; attributes listed here will appear first,
29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# and in the same order as in these lists.
30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# { tag_name: [attribute_name, ...] }
31c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)ATTRIBUTE_ORDER = {
32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'enum': ['name', 'type'],
33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'histogram': ['name', 'enum', 'units'],
34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'int': ['value', 'label'],
35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'fieldtrial': ['name', 'separator', 'ordering'],
36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'group': ['name', 'label'],
37c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'affected-histogram': ['name'],
38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'with-group': ['name'],
39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Tag names for top-level nodes whose children we don't want to indent.
42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)TAGS_THAT_DONT_INDENT = [
43c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'histogram-configuration',
44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'histograms',
45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'fieldtrials',
46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'enums'
47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)]
48c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
49c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Extra vertical spacing rules for special tag names.
50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)}
51c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)TAGS_THAT_HAVE_EXTRA_NEWLINE = {
52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'histogram-configuration': (2, 1, 1),
53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'histograms': (2, 1, 1),
54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'fieldtrials': (2, 1, 1),
55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'enums': (2, 1, 1),
56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'histogram': (1, 1, 1),
57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'enum': (1, 1, 1),
58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'fieldtrial': (1, 1, 1),
59c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
60c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
61c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Tags that we allow to be squished into a single line for brevity.
62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)TAGS_THAT_ALLOW_SINGLE_LINE = [
63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'summary',
64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'int',
65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)]
66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# Tags whose children we want to alphabetize. The key is the parent tag name,
68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# and the value is a pair of the tag name of the children we want to sort,
69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)# and a key function that maps each child node to the desired sort key.
70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)ALPHABETIZATION_RULES = {
71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()),
72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'enums': ('enum', lambda n: n.attributes['name'].value.lower()),
73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'enum': ('int', lambda n: int(n.attributes['value'].value)),
74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()),
75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  'fieldtrial': ('affected-histogram',
76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                 lambda n: n.attributes['name'].value.lower()),
77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
78c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
80c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)class Error(Exception):
81c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  pass
82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)def LastLineLength(s):
85c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  """Returns the length of the last line in s.
86c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  Args:
88c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    s: A multi-line string, including newlines.
89c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  Returns:
91c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    The length of the last line in s, in characters.
92c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  """
93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if s.rfind('\n') == -1: return len(s)
94c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return len(s) - s.rfind('\n') - len('\n')
95c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
97c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)def XmlEscape(s):
98c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  """XML-escapes the given string, replacing magic characters (&<>") with their
99c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  escaped equivalents."""
100c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  s = s.replace("&", "&amp;").replace("<", "&lt;")
101c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  s = s.replace("\"", "&quot;").replace(">", "&gt;")
102c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return s
103c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
104c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
105c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)def PrettyPrintNode(node, indent=0):
106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  """Pretty-prints the given XML node at the given indent level.
107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
108c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  Args:
109c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    node: The minidom node to pretty-print.
110c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    indent: The current indent level.
111c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
112c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  Returns:
113c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    The pretty-printed string (including embedded newlines).
114c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
115c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  Raises:
116c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    Error if the XML has unknown tags or attributes.
117c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  """
118c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  # Handle the top-level document node.
119c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE:
120c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return '\n'.join([PrettyPrintNode(n) for n in node.childNodes])
121c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
122c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  # Handle text nodes.
123c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if node.nodeType == xml.dom.minidom.Node.TEXT_NODE:
124c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # Wrap each paragraph in the text to fit in the 80 column limit.
125c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    wrapper = textwrap.TextWrapper()
126c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    wrapper.initial_indent = ' ' * indent
127c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    wrapper.subsequent_indent = ' ' * indent
128c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    wrapper.break_on_hyphens = False
129c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    wrapper.break_long_words = False
130c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    wrapper.width = WRAP_COLUMN
131c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    text = XmlEscape(node.data)
132c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # Remove any common indent.
133c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    text = textwrap.dedent(text.strip('\n'))
134c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    lines = text.split('\n')
135c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # Split the text into paragraphs at blank line boundaries.
136c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    paragraphs = [[]]
137c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    for l in lines:
138c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if len(l.strip()) == 0 and len(paragraphs[-1]) > 0:
139c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        paragraphs.append([])
140c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      else:
141c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        paragraphs[-1].append(l)
142c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # Remove trailing empty paragraph if present.
143c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if len(paragraphs) > 0 and len(paragraphs[-1]) == 0:
144c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      paragraphs = paragraphs[:-1]
145c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # Wrap each paragraph and separate with two newlines.
146c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs])
147c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
148c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  # Handle element nodes.
149c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
150c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    newlines_after_open, newlines_before_close, newlines_after_close = (
151c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0)))
152c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # Open the tag.
153c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    s = ' ' * indent + '<' + node.tagName
154c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
155c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # Calculate how much space to allow for the '>' or '/>'.
156c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    closing_chars = 1
157c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if not node.childNodes:
158c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      closing_chars = 2
159c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
160c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # Pretty-print the attributes.
161c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    attributes = node.attributes.keys()
162c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if attributes:
163c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      # Reorder the attributes.
164c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if not node.tagName in ATTRIBUTE_ORDER:
165c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        unrecognized_attributes = attributes;
166c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      else:
167c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        unrecognized_attributes = (
168c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]])
169c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        attributes = (
170c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes])
171c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
172c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      for a in unrecognized_attributes:
173c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        logging.error(
174c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName))
175c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if unrecognized_attributes:
176c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        raise Error()
177c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
178c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      for a in attributes:
179c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        value = XmlEscape(node.attributes[a].value)
180c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        # Replace sequences of whitespace with single spaces.
181c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        words = value.split()
182c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        a_str = ' %s="%s"' % (a, ' '.join(words))
183c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        # Start a new line if the attribute will make this line too long.
184c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN:
185c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          s += '\n' + ' ' * (indent + 3)
186c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        # Output everything up to the first quote.
187c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        s += ' %s="' % (a)
188c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        value_indent_level = LastLineLength(s)
189c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        # Output one word at a time, splitting to the next line where necessary.
190c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        column = value_indent_level
191c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        for i, word in enumerate(words):
192c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          # This is slightly too conservative since not every word will be
193c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          # followed by the closing characters...
194c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN):
195c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            s = s.rstrip()  # remove any trailing whitespace
196c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            s += '\n' + ' ' * value_indent_level
197c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            column = value_indent_level
198c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          s += word + ' '
199c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          column += len(word) + 1
200c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        s = s.rstrip()  # remove any trailing whitespace
201c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        s += '"'
202c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      s = s.rstrip()  # remove any trailing whitespace
203c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
204c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    # Pretty-print the child nodes.
205c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if node.childNodes:
206c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      s += '>'
207c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      # Calculate the new indent level for child nodes.
208c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      new_indent = indent
209c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if node.tagName not in TAGS_THAT_DONT_INDENT:
210c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        new_indent += 2
211c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      child_nodes = node.childNodes
212c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
213c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      # Recursively pretty-print the child nodes.
214c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes]
215c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      child_nodes = [c for c in child_nodes if len(c.strip()) > 0]
216c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
217c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      # Determine whether we can fit the entire node on a single line.
218c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      close_tag = '</%s>' % node.tagName
219c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag)
220c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and
221c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left):
222c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        s += child_nodes[0].strip()
223c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      else:
224c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        s += '\n' * newlines_after_open + '\n'.join(child_nodes)
225c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        s += '\n' * newlines_before_close + ' ' * indent
226c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      s += close_tag
227c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    else:
228c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      s += '/>'
229c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    s += '\n' * newlines_after_close
230c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return s
231c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
232c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  # Handle comment nodes.
233c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE:
234c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return '<!--%s-->\n' % node.data
235c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
236c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  # Ignore other node types. This could be a processing instruction (<? ... ?>)
237c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  # or cdata section (<![CDATA[...]]!>), neither of which are legal in the
238c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  # histograms XML at present.
239c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  logging.error('Ignoring unrecognized node data: %s' % node.toxml())
240c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  raise Error()
241c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
242c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
243c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)def unsafeAppendChild(parent, child):
244c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  """Append child to parent's list of children, ignoring the possibility that it
245c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  is already in another node's childNodes list.  Requires that the previous
246c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parent of child is discarded (to avoid non-tree DOM graphs).
247c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  This can provide a significant speedup as O(n^2) operations are removed (in
248c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  particular, each child insertion avoids the need to traverse the old parent's
249c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  entire list of children)."""
250c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  child.parentNode = None
251c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parent.appendChild(child)
252c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  child.parentNode = parent
253c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
254c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
255c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)def TransformByAlphabetizing(node):
256c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  """Transform the given XML by alphabetizing specific node types according to
257c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  the rules in ALPHABETIZATION_RULES.
258c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
259c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  Args:
260c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    node: The minidom node to transform.
261c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
262c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  Returns:
263c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    The minidom node, with children appropriately alphabetized. Note that the
264c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    transformation is done in-place, i.e. the original minidom tree is modified
265c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    directly.
266c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  """
267c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if node.nodeType != xml.dom.minidom.Node.ELEMENT_NODE:
268c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    for c in node.childNodes: TransformByAlphabetizing(c)
269c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return node
270c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
271c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  # Element node with a tag name that we alphabetize the children of?
272c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if node.tagName in ALPHABETIZATION_RULES:
273868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    # Put subnodes in a list of node,key pairs to allow for custom sorting.
274c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    subtag, key_function = ALPHABETIZATION_RULES[node.tagName]
275c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    subnodes = []
276868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    last_key = -1
277c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    for c in node.childNodes:
278c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if (c.nodeType == xml.dom.minidom.Node.ELEMENT_NODE and
279c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          c.tagName == subtag):
280868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)        last_key = key_function(c)
281868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      # Subnodes that we don't want to rearrange use the last node's key,
282868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      # so they stay in the same relative position.
283868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      subnodes.append( (c, last_key) )
284868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
285868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    # Sort the subnode list.
286868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    subnodes.sort(key=lambda pair: pair[1])
287868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
288868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    # Re-add the subnodes, transforming each recursively.
289868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    while node.firstChild:
290868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      node.removeChild(node.firstChild)
291868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    for (c, _) in subnodes:
292868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      unsafeAppendChild(node, TransformByAlphabetizing(c))
293c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return node
294c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
295c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  # Recursively handle other element nodes and other node types.
296c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  for c in node.childNodes: TransformByAlphabetizing(c)
297c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return node
298c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
299c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
300c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)def PrettyPrint(raw_xml):
301c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  """Pretty-print the given XML.
302c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
303c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  Args:
304c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    xml: The contents of the histograms XML file, as a string.
305c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
306c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  Returns:
307c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    The pretty-printed version.
308c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  """
309c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  tree = xml.dom.minidom.parseString(raw_xml)
310c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  tree = TransformByAlphabetizing(tree)
311c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return PrettyPrintNode(tree)
312c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
313c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
314c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)def main():
315c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  logging.basicConfig(level=logging.INFO)
316c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
317c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  presubmit = ('--presubmit' in sys.argv)
318c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
319c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  logging.info('Loading histograms.xml...')
320c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  with open('histograms.xml', 'rb') as f:
321c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    xml = f.read()
322c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
323c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  # Check there are no CR ('\r') characters in the file.
324c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if '\r' in xml:
325c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    logging.info('DOS-style line endings (CR characters) detected - these are '
326c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                 'not allowed. Please run dos2unix histograms.xml')
327c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    sys.exit(1)
328c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
329c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  logging.info('Pretty-printing...')
330c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  try:
331c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    pretty = PrettyPrint(xml)
332c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  except Error:
333c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    logging.error('Aborting parsing due to fatal errors.')
334c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    sys.exit(1)
335c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
336c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if xml == pretty:
337c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    logging.info('histograms.xml is correctly pretty-printed.')
338c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    sys.exit(0)
339c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if presubmit:
340c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    logging.info('histograms.xml is not formatted correctly; run '
341c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                 'pretty_print.py to fix.')
342c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    sys.exit(1)
343c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if not diffutil.PromptUserToAcceptDiff(
344c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      xml, pretty,
345c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      'Is the prettified version acceptable?'):
346c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    logging.error('Aborting')
347c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return
348c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
349c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  logging.info('Creating backup file histograms.before.pretty-print.xml')
350c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  shutil.move('histograms.xml', 'histograms.before.pretty-print.xml')
351c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
352c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  logging.info('Writing new histograms.xml file')
353c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  with open('histograms.xml', 'wb') as f:
354c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    f.write(pretty)
355c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
356c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
357c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)if __name__ == '__main__':
358c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  main()
359