1#!/usr/bin/python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import collections
7import hashlib
8import operator
9import os
10import re
11import sys
12
13
14RESOURCE_EXTRACT_REGEX = re.compile('^#define (\S*) (\d*)$', re.MULTILINE)
15
16class Error(Exception):
17  """Base error class for all exceptions in generated_resources_map."""
18
19
20class HashCollisionError(Error):
21  """Multiple resource names hash to the same value."""
22
23
24Resource = collections.namedtuple("Resource", ['hash', 'name', 'index'])
25
26
27def _HashName(name):
28  """Returns the hash id for a name.
29
30  Args:
31    name: The name to hash.
32
33  Returns:
34    An int that is at most 32 bits.
35  """
36  md5hash = hashlib.md5()
37  md5hash.update(name)
38  return int(md5hash.hexdigest()[:8], 16)
39
40
41def _GetNameIndexPairsIter(string_to_scan):
42  """Gets an iterator of the resource name and index pairs of the given string.
43
44  Scans the input string for lines of the form "#define NAME INDEX" and returns
45  an iterator over all matching (NAME, INDEX) pairs.
46
47  Args:
48    string_to_scan: The input string to scan.
49
50  Yields:
51    A tuple of name and index.
52  """
53  for match in RESOURCE_EXTRACT_REGEX.finditer(string_to_scan):
54    yield match.group(1, 2)
55
56
57def _GetResourceListFromString(resources_content):
58  """Produces a list of |Resource| objects from a string.
59
60  The input string conaints lines of the form "#define NAME INDEX". The returned
61  list is sorted primarily by hash, then name, and then index.
62
63  Args:
64    resources_content: The input string to process, contains lines of the form
65        "#define NAME INDEX".
66
67  Returns:
68    A sorted list of |Resource| objects.
69  """
70  resources = [Resource(_HashName(name), name, index) for name, index in
71               _GetNameIndexPairsIter(resources_content)]
72
73  # The default |Resource| order makes |resources| sorted by the hash, then
74  # name, then index.
75  resources.sort()
76
77  return resources
78
79
80def _CheckForHashCollisions(sorted_resource_list):
81  """Checks a sorted list of |Resource| objects for hash collisions.
82
83  Args:
84    sorted_resource_list: A sorted list of |Resource| objects.
85
86  Returns:
87    A set of all |Resource| objects with collisions.
88  """
89  collisions = set()
90  for i in xrange(len(sorted_resource_list) - 1):
91    resource = sorted_resource_list[i]
92    next_resource = sorted_resource_list[i+1]
93    if resource.hash == next_resource.hash:
94      collisions.add(resource)
95      collisions.add(next_resource)
96
97  return collisions
98
99
100def _GenDataArray(
101    resources, entry_pattern, array_name, array_type, data_getter):
102  """Generates a C++ statement defining a literal array containing the hashes.
103
104  Args:
105    resources: A sorted list of |Resource| objects.
106    entry_pattern: A pattern to be used to generate each entry in the array. The
107        pattern is expected to have a place for data and one for a comment, in
108        that order.
109    array_name: The name of the array being generated.
110    array_type: The type of the array being generated.
111    data_getter: A function that gets the array data from a |Resource| object.
112
113  Returns:
114    A string containing a C++ statement defining the an array.
115  """
116  lines = [entry_pattern % (data_getter(r), r.name) for r in resources]
117  pattern = """const %(type)s %(name)s[] = {
118%(content)s
119};
120"""
121  return pattern % {'type': array_type,
122                    'name': array_name,
123                    'content': '\n'.join(lines)}
124
125
126def _GenerateFileContent(resources_content):
127  """Generates the .cc content from the given generated_resources.h content.
128
129  Args:
130    resources_content: The input string to process, contains lines of the form
131        "#define NAME INDEX".
132
133  Returns:
134    .cc file content defining the kResourceHashes and kResourceIndices arrays.
135  """
136  hashed_tuples = _GetResourceListFromString(resources_content)
137
138  collisions = _CheckForHashCollisions(hashed_tuples)
139  if collisions:
140    error_message = "\n".join(
141        ["hash: %i, name: %s" % (i[0], i[1]) for i in sorted(collisions)])
142    error_message = ("\nThe following names had hash collisions "
143                     "(sorted by the hash value):\n%s\n" %(error_message))
144    raise HashCollisionError(error_message)
145
146  hashes_array = _GenDataArray(
147      hashed_tuples, "    %iU,  // %s", 'kResourceHashes', 'uint32_t',
148      operator.attrgetter('hash'))
149  indices_array = _GenDataArray(
150      hashed_tuples, "    %s,  // %s", 'kResourceIndices', 'int',
151      operator.attrgetter('index'))
152
153  return (
154      "// This file was generated by generate_resources_map.py. Do not edit.\n"
155      "\n\n"
156      "#include "
157      "\"chrome/browser/metrics/variations/generated_resources_map.h\"\n\n"
158      "namespace chrome_variations {\n\n"
159      "const size_t kNumResources = %i;\n\n"
160      "%s"
161      "\n"
162      "%s"
163      "\n"
164      "}  // namespace chrome_variations\n") % (
165          len(hashed_tuples), hashes_array, indices_array)
166
167
168def main(resources_file, map_file):
169  generated_resources_h = ""
170  with open(resources_file, "r") as resources:
171    generated_resources_h = resources.read()
172
173  if len(generated_resources_h) == 0:
174    raise Error("No content loaded for %s." % (resources_file))
175
176  file_content = _GenerateFileContent(generated_resources_h)
177
178  with open(map_file, "w") as generated_file:
179    generated_file.write(file_content)
180
181
182if __name__ == '__main__':
183  sys.exit(main(sys.argv[1], sys.argv[2]))
184