1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6'''Fast and efficient parser for XTB files.
7'''
8
9
10import sys
11import xml.sax
12import xml.sax.handler
13
14import grit.node.base
15
16
17class XtbContentHandler(xml.sax.handler.ContentHandler):
18  '''A content handler that calls a given callback function for each
19  translation in the XTB file.
20  '''
21
22  def __init__(self, callback, defs=None, debug=False, target_platform=None):
23    self.callback = callback
24    self.debug = debug
25    # 0 if we are not currently parsing a translation, otherwise the message
26    # ID of that translation.
27    self.current_id = 0
28    # Empty if we are not currently parsing a translation, otherwise the
29    # parts we have for that translation - a list of tuples
30    # (is_placeholder, text)
31    self.current_structure = []
32    # Set to the language ID when we see the <translationbundle> node.
33    self.language = ''
34    # Keep track of the if block we're inside.  We can't nest ifs.
35    self.if_expr = None
36    # Root defines to be used with if expr.
37    if defs:
38      self.defines = defs
39    else:
40      self.defines = {}
41    # Target platform for build.
42    if target_platform:
43      self.target_platform = target_platform
44    else:
45      self.target_platform = sys.platform
46
47  def startElement(self, name, attrs):
48    if name == 'translation':
49      assert self.current_id == 0 and len(self.current_structure) == 0, (
50              "Didn't expect a <translation> element here.")
51      self.current_id = attrs.getValue('id')
52    elif name == 'ph':
53      assert self.current_id != 0, "Didn't expect a <ph> element here."
54      self.current_structure.append((True, attrs.getValue('name')))
55    elif name == 'translationbundle':
56      self.language = attrs.getValue('lang')
57    elif name in ('if', 'then', 'else'):
58      assert self.if_expr is None, "Can't nest <if> or use <else> in xtb files"
59      self.if_expr = attrs.getValue('expr')
60
61  def endElement(self, name):
62    if name == 'translation':
63      assert self.current_id != 0
64
65      defs = self.defines
66      def pp_ifdef(define):
67        return define in defs
68      def pp_if(define):
69        return define in defs and defs[define]
70
71      # If we're in an if block, only call the callback (add the translation)
72      # if the expression is True.
73      should_run_callback = True
74      if self.if_expr:
75        should_run_callback = grit.node.base.Node.EvaluateExpression(
76            self.if_expr, self.defines, self.target_platform)
77      if should_run_callback:
78        self.callback(self.current_id, self.current_structure)
79
80      self.current_id = 0
81      self.current_structure = []
82    elif name == 'if':
83      assert self.if_expr is not None
84      self.if_expr = None
85
86  def characters(self, content):
87    if self.current_id != 0:
88      # We are inside a <translation> node so just add the characters to our
89      # structure.
90      #
91      # This naive way of handling characters is OK because in the XTB format,
92      # <ph> nodes are always empty (always <ph name="XXX"/>) and whitespace
93      # inside the <translation> node should be preserved.
94      self.current_structure.append((False, content))
95
96
97class XtbErrorHandler(xml.sax.handler.ErrorHandler):
98  def error(self, exception):
99    pass
100
101  def fatalError(self, exception):
102    raise exception
103
104  def warning(self, exception):
105    pass
106
107
108def Parse(xtb_file, callback_function, defs=None, debug=False,
109          target_platform=None):
110  '''Parse xtb_file, making a call to callback_function for every translation
111  in the XTB file.
112
113  The callback function must have the signature as described below.  The 'parts'
114  parameter is a list of tuples (is_placeholder, text).  The 'text' part is
115  either the raw text (if is_placeholder is False) or the name of the placeholder
116  (if is_placeholder is True).
117
118  Args:
119    xtb_file:           open('fr.xtb')
120    callback_function:  def Callback(msg_id, parts): pass
121    defs:               None, or a dictionary of preprocessor definitions.
122    debug:              Default False. Set True for verbose debug output.
123    target_platform:    None, or a sys.platform-like identifier of the build
124                        target platform.
125
126  Return:
127    The language of the XTB, e.g. 'fr'
128  '''
129  # Start by advancing the file pointer past the DOCTYPE thing, as the TC
130  # uses a path to the DTD that only works in Unix.
131  # TODO(joi) Remove this ugly hack by getting the TC gang to change the
132  # XTB files somehow?
133  front_of_file = xtb_file.read(1024)
134  xtb_file.seek(front_of_file.find('<translationbundle'))
135
136  handler = XtbContentHandler(callback=callback_function, defs=defs,
137                              debug=debug, target_platform=target_platform)
138  xml.sax.parse(xtb_file, handler)
139  assert handler.language != ''
140  return handler.language
141
142