1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Support for formatting a data pack file used for platform agnostic resource
7files.
8"""
9
10import collections
11import exceptions
12import os
13import struct
14import sys
15if __name__ == '__main__':
16  sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
17
18from grit import util
19from grit.node import include
20from grit.node import message
21from grit.node import structure
22
23
24PACK_FILE_VERSION = 4
25HEADER_LENGTH = 2 * 4 + 1  # Two uint32s. (file version, number of entries) and
26                           # one uint8 (encoding of text resources)
27BINARY, UTF8, UTF16 = range(3)
28
29
30class WrongFileVersion(Exception):
31  pass
32
33
34DataPackContents = collections.namedtuple(
35    'DataPackContents', 'resources encoding')
36
37
38def Format(root, lang='en', output_dir='.'):
39  """Writes out the data pack file format (platform agnostic resource file)."""
40  data = {}
41  for node in root.ActiveDescendants():
42    with node:
43      if isinstance(node, (include.IncludeNode, message.MessageNode,
44                           structure.StructureNode)):
45        id, value = node.GetDataPackPair(lang, UTF8)
46        if value is not None:
47          data[id] = value
48  return WriteDataPackToString(data, UTF8)
49
50
51def ReadDataPack(input_file):
52  """Reads a data pack file and returns a dictionary."""
53  data = util.ReadFile(input_file, util.BINARY)
54  original_data = data
55
56  # Read the header.
57  version, num_entries, encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])
58  if version != PACK_FILE_VERSION:
59    print 'Wrong file version in ', input_file
60    raise WrongFileVersion
61
62  resources = {}
63  if num_entries == 0:
64    return DataPackContents(resources, encoding)
65
66  # Read the index and data.
67  data = data[HEADER_LENGTH:]
68  kIndexEntrySize = 2 + 4  # Each entry is a uint16 and a uint32.
69  for _ in range(num_entries):
70    id, offset = struct.unpack('<HI', data[:kIndexEntrySize])
71    data = data[kIndexEntrySize:]
72    next_id, next_offset = struct.unpack('<HI', data[:kIndexEntrySize])
73    resources[id] = original_data[offset:next_offset]
74
75  return DataPackContents(resources, encoding)
76
77
78def WriteDataPackToString(resources, encoding):
79  """Returns a string with a map of id=>data in the data pack format."""
80  ids = sorted(resources.keys())
81  ret = []
82
83  # Write file header.
84  ret.append(struct.pack('<IIB', PACK_FILE_VERSION, len(ids), encoding))
85  HEADER_LENGTH = 2 * 4 + 1            # Two uint32s and one uint8.
86
87  # Each entry is a uint16 + a uint32s. We have one extra entry for the last
88  # item.
89  index_length = (len(ids) + 1) * (2 + 4)
90
91  # Write index.
92  data_offset = HEADER_LENGTH + index_length
93  for id in ids:
94    ret.append(struct.pack('<HI', id, data_offset))
95    data_offset += len(resources[id])
96
97  ret.append(struct.pack('<HI', 0, data_offset))
98
99  # Write data.
100  for id in ids:
101    ret.append(resources[id])
102  return ''.join(ret)
103
104
105def WriteDataPack(resources, output_file, encoding):
106  """Writes a map of id=>data into output_file as a data pack."""
107  content = WriteDataPackToString(resources, encoding)
108  with open(output_file, 'wb') as file:
109    file.write(content)
110
111
112def RePack(output_file, input_files, whitelist_file=None):
113  """Write a new data pack file by combining input pack files.
114
115  Args:
116      output_file: path to the new data pack file.
117      input_files: a list of paths to the data pack files to combine.
118      whitelist_file: path to the file that contains the list of resource IDs
119                      that should be kept in the output file or None to include
120                      all resources.
121
122  Raises:
123      KeyError: if there are duplicate keys or resource encoding is
124      inconsistent.
125  """
126  input_data_packs = [ReadDataPack(filename) for filename in input_files]
127  whitelist = None
128  if whitelist_file:
129    whitelist = util.ReadFile(whitelist_file, util.RAW_TEXT).strip().split('\n')
130    whitelist = set(map(int, whitelist))
131  resources, encoding = RePackFromDataPackStrings(input_data_packs, whitelist)
132  WriteDataPack(resources, output_file, encoding)
133
134
135def RePackFromDataPackStrings(inputs, whitelist):
136  """Returns a data pack string that combines the resources from inputs.
137
138  Args:
139      inputs: a list of data pack strings that need to be combined.
140      whitelist: a list of resource IDs that should be kept in the output string
141                 or None to include all resources.
142
143  Returns:
144      DataPackContents: a tuple containing the new combined data pack and its
145                        encoding.
146
147  Raises:
148      KeyError: if there are duplicate keys or resource encoding is
149      inconsistent.
150  """
151  resources = {}
152  encoding = None
153  for content in inputs:
154    # Make sure we have no dups.
155    duplicate_keys = set(content.resources.keys()) & set(resources.keys())
156    if duplicate_keys:
157      raise exceptions.KeyError('Duplicate keys: ' + str(list(duplicate_keys)))
158
159    # Make sure encoding is consistent.
160    if encoding in (None, BINARY):
161      encoding = content.encoding
162    elif content.encoding not in (BINARY, encoding):
163      raise exceptions.KeyError('Inconsistent encodings: ' + str(encoding) +
164                                ' vs ' + str(content.encoding))
165
166    if whitelist:
167      whitelisted_resources = dict([(key, content.resources[key])
168                                    for key in content.resources.keys()
169                                    if key in whitelist])
170      resources.update(whitelisted_resources)
171      removed_keys = [key for key in content.resources.keys()
172                      if key not in whitelist]
173      for key in removed_keys:
174        print 'RePackFromDataPackStrings Removed Key:', key
175    else:
176      resources.update(content.resources)
177
178  # Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16
179  if encoding is None:
180    encoding = BINARY
181  return DataPackContents(resources, encoding)
182
183
184# Temporary hack for external programs that import data_pack.
185# TODO(benrg): Remove this.
186class DataPack(object):
187  pass
188DataPack.ReadDataPack = staticmethod(ReadDataPack)
189DataPack.WriteDataPackToString = staticmethod(WriteDataPackToString)
190DataPack.WriteDataPack = staticmethod(WriteDataPack)
191DataPack.RePack = staticmethod(RePack)
192
193
194def main():
195  if len(sys.argv) > 1:
196    # When an argument is given, read and explode the file to text
197    # format, for easier diffing.
198    data = ReadDataPack(sys.argv[1])
199    print data.encoding
200    for (resource_id, text) in data.resources.iteritems():
201      print '%s: %s' % (resource_id, text)
202  else:
203    # Just write a simple file.
204    data = {1: '', 4: 'this is id 4', 6: 'this is id 6', 10: ''}
205    WriteDataPack(data, 'datapack1.pak', UTF8)
206    data2 = {1000: 'test', 5: 'five'}
207    WriteDataPack(data2, 'datapack2.pak', UTF8)
208    print 'wrote datapack1 and datapack2 to current directory.'
209
210
211if __name__ == '__main__':
212  main()
213