create_string_rc.py revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Generates .h and .rc files for installer strings. Run "python
7create_string_rc.py" for usage details.
8
9This script generates an rc file and header (NAME.{rc,h}) to be included in
10setup.exe. The rc file includes translations for strings pulled from the given
11.grd file(s) and their corresponding localized .xtb files.
12
13The header file includes IDs for each string, but also has values to allow
14getting a string based on a language offset.  For example, the header file
15looks like this:
16
17#define IDS_L10N_OFFSET_AR 0
18#define IDS_L10N_OFFSET_BG 1
19#define IDS_L10N_OFFSET_CA 2
20...
21#define IDS_L10N_OFFSET_ZH_TW 41
22
23#define IDS_MY_STRING_AR 1600
24#define IDS_MY_STRING_BG 1601
25...
26#define IDS_MY_STRING_BASE IDS_MY_STRING_AR
27
28This allows us to lookup an an ID for a string by adding IDS_MY_STRING_BASE and
29IDS_L10N_OFFSET_* for the language we are interested in.
30"""
31
32import argparse
33import glob
34import io
35import os
36import sys
37from xml import sax
38
39BASEDIR = os.path.dirname(os.path.abspath(__file__))
40sys.path.append(os.path.join(BASEDIR, '../../../../tools/grit'))
41sys.path.append(os.path.join(BASEDIR, '../../../../tools/python'))
42
43from grit.extern import tclib
44
45# The IDs of strings we want to import from the .grd files and include in
46# setup.exe's resources.
47STRING_IDS = [
48  'IDS_PRODUCT_NAME',
49  'IDS_SXS_SHORTCUT_NAME',
50  'IDS_PRODUCT_APP_LAUNCHER_NAME',
51  'IDS_PRODUCT_BINARIES_NAME',
52  'IDS_PRODUCT_DESCRIPTION',
53  'IDS_UNINSTALL_CHROME',
54  'IDS_ABOUT_VERSION_COMPANY_NAME',
55  'IDS_INSTALL_HIGHER_VERSION',
56  'IDS_INSTALL_HIGHER_VERSION_APP_LAUNCHER',
57  'IDS_INSTALL_FAILED',
58  'IDS_SAME_VERSION_REPAIR_FAILED',
59  'IDS_SETUP_PATCH_FAILED',
60  'IDS_INSTALL_OS_NOT_SUPPORTED',
61  'IDS_INSTALL_OS_ERROR',
62  'IDS_INSTALL_TEMP_DIR_FAILED',
63  'IDS_INSTALL_UNCOMPRESSION_FAILED',
64  'IDS_INSTALL_INVALID_ARCHIVE',
65  'IDS_INSTALL_INSUFFICIENT_RIGHTS',
66  'IDS_INSTALL_NO_PRODUCTS_TO_UPDATE',
67  'IDS_UNINSTALL_COMPLETE',
68  'IDS_INSTALL_DIR_IN_USE',
69  'IDS_INSTALL_MULTI_INSTALLATION_EXISTS',
70  'IDS_INSTALL_INCONSISTENT_UPDATE_POLICY',
71  'IDS_OEM_MAIN_SHORTCUT_NAME',
72  'IDS_SHORTCUT_TOOLTIP',
73  'IDS_SHORTCUT_NEW_WINDOW',
74  'IDS_APP_LAUNCHER_PRODUCT_DESCRIPTION',
75  'IDS_APP_LAUNCHER_SHORTCUT_TOOLTIP',
76  'IDS_UNINSTALL_APP_LAUNCHER',
77  'IDS_APP_LIST_SHORTCUT_NAME',
78  'IDS_APP_LIST_SHORTCUT_NAME_CANARY',
79  'IDS_APP_SHORTCUTS_SUBDIR_NAME',
80  'IDS_APP_SHORTCUTS_SUBDIR_NAME_CANARY',
81]
82
83# The ID of the first resource string.
84FIRST_RESOURCE_ID = 1600
85
86
87class GrdHandler(sax.handler.ContentHandler):
88  """Extracts selected strings from a .grd file.
89
90  Attributes:
91    messages: A dict mapping string identifiers to their corresponding messages.
92  """
93  def __init__(self, string_ids):
94    """Constructs a handler that reads selected strings from a .grd file.
95
96    The dict attribute |messages| is populated with the strings that are read.
97
98    Args:
99      string_ids: A list of message identifiers to extract.
100    """
101    sax.handler.ContentHandler.__init__(self)
102    self.messages = {}
103    self.__id_set = set(string_ids)
104    self.__message_name = None
105    self.__element_stack = []
106    self.__text_scraps = []
107    self.__characters_callback = None
108
109  def startElement(self, name, attrs):
110    self.__element_stack.append(name)
111    if name == 'message':
112      self.__OnOpenMessage(attrs.getValue('name'))
113
114  def endElement(self, name):
115    popped = self.__element_stack.pop()
116    assert popped == name
117    if name == 'message':
118      self.__OnCloseMessage()
119
120  def characters(self, content):
121    if self.__characters_callback:
122      self.__characters_callback(self.__element_stack[-1], content)
123
124  def __IsExtractingMessage(self):
125    """Returns True if a message is currently being extracted."""
126    return self.__message_name is not None
127
128  def __OnOpenMessage(self, message_name):
129    """Invoked at the start of a <message> with message's name."""
130    assert not self.__IsExtractingMessage()
131    self.__message_name = (message_name if message_name in self.__id_set
132                           else None)
133    if self.__message_name:
134      self.__characters_callback = self.__OnMessageText
135
136  def __OnMessageText(self, containing_element, message_text):
137    """Invoked to handle a block of text for a message."""
138    if message_text and (containing_element == 'message' or
139                         containing_element == 'ph'):
140      self.__text_scraps.append(message_text)
141
142  def __OnCloseMessage(self):
143    """Invoked at the end of a message."""
144    if self.__IsExtractingMessage():
145      self.messages[self.__message_name] = ''.join(self.__text_scraps).strip()
146      self.__message_name = None
147      self.__text_scraps = []
148      self.__characters_callback = None
149
150
151class XtbHandler(sax.handler.ContentHandler):
152  """Extracts selected translations from an .xrd file.
153
154  Populates the |lang| and |translations| attributes with the language and
155  selected strings of an .xtb file. Instances may be re-used to read the same
156  set of translations from multiple .xtb files.
157
158  Attributes:
159    translations: A mapping of translation ids to strings.
160    lang: The language parsed from the .xtb file.
161  """
162  def __init__(self, translation_ids):
163    """Constructs an instance to parse the given strings from an .xtb file.
164
165    Args:
166      translation_ids: a mapping of translation ids to their string
167        identifiers for the translations to be extracted.
168    """
169    sax.handler.ContentHandler.__init__(self)
170    self.lang = None
171    self.translations = None
172    self.__translation_ids = translation_ids
173    self.__element_stack = []
174    self.__string_id = None
175    self.__text_scraps = []
176    self.__characters_callback = None
177
178  def startDocument(self):
179    # Clear the lang and translations since a new document is being parsed.
180    self.lang = ''
181    self.translations = {}
182
183  def startElement(self, name, attrs):
184    self.__element_stack.append(name)
185    # translationbundle is the document element, and hosts the lang id.
186    if len(self.__element_stack) == 1:
187      assert name == 'translationbundle'
188      self.__OnLanguage(attrs.getValue('lang'))
189    if name == 'translation':
190      self.__OnOpenTranslation(attrs.getValue('id'))
191
192  def endElement(self, name):
193    popped = self.__element_stack.pop()
194    assert popped == name
195    if name == 'translation':
196      self.__OnCloseTranslation()
197
198  def characters(self, content):
199    if self.__characters_callback:
200      self.__characters_callback(self.__element_stack[-1], content)
201
202  def __OnLanguage(self, lang):
203    self.lang = lang.replace('-', '_').upper()
204
205  def __OnOpenTranslation(self, translation_id):
206    assert self.__string_id is None
207    self.__string_id = self.__translation_ids.get(translation_id)
208    if self.__string_id is not None:
209      self.__characters_callback = self.__OnTranslationText
210
211  def __OnTranslationText(self, containing_element, message_text):
212    if message_text and containing_element == 'translation':
213      self.__text_scraps.append(message_text)
214
215  def __OnCloseTranslation(self):
216    if self.__string_id is not None:
217      self.translations[self.__string_id] = ''.join(self.__text_scraps).strip()
218      self.__string_id = None
219      self.__text_scraps = []
220      self.__characters_callback = None
221
222
223class StringRcMaker(object):
224  """Makes .h and .rc files containing strings and translations."""
225  def __init__(self, name, inputs, outdir):
226    """Constructs a maker.
227
228    Args:
229      name: The base name of the generated files (e.g.,
230        'installer_util_strings').
231      inputs: A list of (grd_file, xtb_dir) pairs containing the source data.
232      outdir: The directory into which the files will be generated.
233    """
234    self.name = name
235    self.inputs = inputs
236    self.outdir = outdir
237
238  def MakeFiles(self):
239    translated_strings = self.__ReadSourceAndTranslatedStrings()
240    self.__WriteRCFile(translated_strings)
241    self.__WriteHeaderFile(translated_strings)
242
243  class __TranslationData(object):
244    """A container of information about a single translation."""
245    def __init__(self, resource_id_str, language, translation):
246      self.resource_id_str = resource_id_str
247      self.language = language
248      self.translation = translation
249
250    def __cmp__(self, other):
251      """Allow __TranslationDatas to be sorted by id then by language."""
252      id_result = cmp(self.resource_id_str, other.resource_id_str)
253      return cmp(self.language, other.language) if id_result == 0 else id_result
254
255  def __ReadSourceAndTranslatedStrings(self):
256    """Reads the source strings and translations from all inputs."""
257    translated_strings = []
258    for grd_file, xtb_dir in self.inputs:
259      # Get the name of the grd file sans extension.
260      source_name = os.path.splitext(os.path.basename(grd_file))[0]
261      # Compute a glob for the translation files.
262      xtb_pattern = os.path.join(os.path.dirname(grd_file), xtb_dir,
263                                 '%s*.xtb' % source_name)
264      translated_strings.extend(
265        self.__ReadSourceAndTranslationsFrom(grd_file, glob.glob(xtb_pattern)))
266    translated_strings.sort()
267    return translated_strings
268
269  def __ReadSourceAndTranslationsFrom(self, grd_file, xtb_files):
270    """Reads source strings and translations for a .grd file.
271
272    Reads the source strings and all available translations for the messages
273    identified by STRING_IDS. The source string is used where translations are
274    missing.
275
276    Args:
277      grd_file: Path to a .grd file.
278      xtb_files: List of paths to .xtb files.
279
280    Returns:
281      An unsorted list of __TranslationData instances.
282    """
283    sax_parser = sax.make_parser()
284
285    # Read the source (en-US) string from the .grd file.
286    grd_handler = GrdHandler(STRING_IDS)
287    sax_parser.setContentHandler(grd_handler)
288    sax_parser.parse(grd_file)
289    source_strings = grd_handler.messages
290
291    # Manually put the source strings as en-US in the list of translated
292    # strings.
293    translated_strings = []
294    for string_id, message_text in source_strings.iteritems():
295      translated_strings.append(self.__TranslationData(string_id,
296                                                       'EN_US',
297                                                       message_text))
298
299    # Generate the message ID for each source string to correlate it with its
300    # translations in the .xtb files.
301    translation_ids = {
302      tclib.GenerateMessageId(message_text): string_id
303      for (string_id, message_text) in source_strings.iteritems()
304    }
305
306    # Gather the translated strings from the .xtb files. Use the en-US string
307    # for any message lacking a translation.
308    xtb_handler = XtbHandler(translation_ids)
309    sax_parser.setContentHandler(xtb_handler)
310    for xtb_filename in xtb_files:
311      sax_parser.parse(xtb_filename)
312      for string_id, message_text in source_strings.iteritems():
313        translated_string = xtb_handler.translations.get(string_id,
314                                                         message_text)
315        translated_strings.append(self.__TranslationData(string_id,
316                                                         xtb_handler.lang,
317                                                         translated_string))
318    return translated_strings
319
320  def __WriteRCFile(self, translated_strings):
321    """Writes a resource file with the strings provided in |translated_strings|.
322    """
323    HEADER_TEXT = (
324      u'#include "%s.h"\n\n'
325      u'STRINGTABLE\n'
326      u'BEGIN\n'
327      ) % self.name
328
329    FOOTER_TEXT = (
330      u'END\n'
331    )
332
333    with io.open(os.path.join(self.outdir, self.name + '.rc'),
334                 mode='w',
335                 encoding='utf-16',
336                 newline='\n') as outfile:
337      outfile.write(HEADER_TEXT)
338      for translation in translated_strings:
339        # Escape special characters for the rc file.
340        escaped_text = (translation.translation.replace('"', '""')
341                       .replace('\t', '\\t')
342                       .replace('\n', '\\n'))
343        outfile.write(u'  %s "%s"\n' %
344                      (translation.resource_id_str + '_' + translation.language,
345                       escaped_text))
346      outfile.write(FOOTER_TEXT)
347
348  def __WriteHeaderFile(self, translated_strings):
349    """Writes a .h file with resource ids."""
350    # TODO(grt): Stream the lines to the file rather than building this giant
351    # list of lines first.
352    lines = []
353    do_languages_lines = ['\n#define DO_LANGUAGES']
354    installer_string_mapping_lines = ['\n#define DO_INSTALLER_STRING_MAPPING']
355
356    # Write the values for how the languages ids are offset.
357    seen_languages = set()
358    offset_id = 0
359    for translation_data in translated_strings:
360      lang = translation_data.language
361      if lang not in seen_languages:
362        seen_languages.add(lang)
363        lines.append('#define IDS_L10N_OFFSET_%s %s' % (lang, offset_id))
364        do_languages_lines.append('  HANDLE_LANGUAGE(%s, IDS_L10N_OFFSET_%s)'
365                                  % (lang.replace('_', '-').lower(), lang))
366        offset_id += 1
367      else:
368        break
369
370    # Write the resource ids themselves.
371    resource_id = FIRST_RESOURCE_ID
372    for translation_data in translated_strings:
373      lines.append('#define %s %s' % (translation_data.resource_id_str + '_' +
374                                      translation_data.language,
375                                      resource_id))
376      resource_id += 1
377
378    # Write out base ID values.
379    for string_id in STRING_IDS:
380      lines.append('#define %s_BASE %s_%s' % (string_id,
381                                              string_id,
382                                              translated_strings[0].language))
383      installer_string_mapping_lines.append('  HANDLE_STRING(%s_BASE, %s)'
384                                            % (string_id, string_id))
385
386    with open(os.path.join(self.outdir, self.name + '.h'), 'wb') as outfile:
387      outfile.write('\n'.join(lines))
388      outfile.write('\n#ifndef RC_INVOKED')
389      outfile.write(' \\\n'.join(do_languages_lines))
390      outfile.write(' \\\n'.join(installer_string_mapping_lines))
391      # .rc files must end in a new line
392      outfile.write('\n#endif  // ndef RC_INVOKED\n')
393
394
395def ParseCommandLine():
396  def GrdPathAndXtbDirPair(string):
397    """Returns (grd_path, xtb_dir) given a colon-separated string of the same.
398    """
399    parts = string.split(':')
400    if len(parts) is not 2:
401      raise argparse.ArgumentTypeError('%r is not grd_path:xtb_dir')
402    return (parts[0], parts[1])
403
404  parser = argparse.ArgumentParser(
405    description='Generate .h and .rc files for installer strings.')
406  parser.add_argument('-i', action='append',
407                      type=GrdPathAndXtbDirPair,
408                      required=True,
409                      help='path to .grd file:relative path to .xtb dir',
410                      metavar='GRDFILE:XTBDIR',
411                      dest='inputs')
412  parser.add_argument('-o',
413                      required=True,
414                      help='output directory for generated .rc and .h files',
415                      dest='outdir')
416  parser.add_argument('-n',
417                      required=True,
418                      help='base name of generated .rc and .h files',
419                      dest='name')
420  return parser.parse_args()
421
422
423def main():
424  args = ParseCommandLine()
425  StringRcMaker(args.name, args.inputs, args.outdir).MakeFiles()
426  return 0
427
428
429if '__main__' == __name__:
430  sys.exit(main())
431