1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6'''Base types for nodes in a GRIT resource tree.
7'''
8
9import collections
10import os
11import sys
12import types
13from xml.sax import saxutils
14
15from grit import clique
16from grit import exception
17from grit import util
18
19
20class Node(object):
21  '''An item in the tree that has children.'''
22
23  # Valid content types that can be returned by _ContentType()
24  _CONTENT_TYPE_NONE = 0   # No CDATA content but may have children
25  _CONTENT_TYPE_CDATA = 1  # Only CDATA, no children.
26  _CONTENT_TYPE_MIXED = 2  # CDATA and children, possibly intermingled
27
28  # Default nodes to not whitelist skipped
29  _whitelist_marked_as_skip = False
30
31  # A class-static cache to memoize EvaluateExpression().
32  # It has a 2 level nested dict structure.  The outer dict has keys
33  # of tuples which define the environment in which the expression
34  # will be evaluated. The inner dict is map of expr->result.
35  eval_expr_cache = collections.defaultdict(dict)
36
37  def __init__(self):
38    self.children = []        # A list of child elements
39    self.mixed_content = []   # A list of u'' and/or child elements (this
40                              # duplicates 'children' but
41                              # is needed to preserve markup-type content).
42    self.name = u''           # The name of this element
43    self.attrs = {}           # The set of attributes (keys to values)
44    self.parent = None        # Our parent unless we are the root element.
45    self.uberclique = None    # Allows overriding uberclique for parts of tree
46
47  # This context handler allows you to write "with node:" and get a
48  # line identifying the offending node if an exception escapes from the body
49  # of the with statement.
50  def __enter__(self):
51    return self
52
53  def __exit__(self, exc_type, exc_value, traceback):
54    if exc_type is not None:
55      print u'Error processing node %s' % unicode(self)
56
57  def __iter__(self):
58    '''A preorder iteration through the tree that this node is the root of.'''
59    return self.Preorder()
60
61  def Preorder(self):
62    '''Generator that generates first this node, then the same generator for
63    any child nodes.'''
64    yield self
65    for child in self.children:
66      for iterchild in child.Preorder():
67        yield iterchild
68
69  def ActiveChildren(self):
70    '''Returns the children of this node that should be included in the current
71    configuration. Overridden by <if>.'''
72    return [node for node in self.children if not node.WhitelistMarkedAsSkip()]
73
74  def ActiveDescendants(self):
75    '''Yields the current node and all descendants that should be included in
76    the current configuration, in preorder.'''
77    yield self
78    for child in self.ActiveChildren():
79      for descendant in child.ActiveDescendants():
80        yield descendant
81
82  def GetRoot(self):
83    '''Returns the root Node in the tree this Node belongs to.'''
84    curr = self
85    while curr.parent:
86      curr = curr.parent
87    return curr
88
89    # TODO(joi) Use this (currently untested) optimization?:
90    #if hasattr(self, '_root'):
91    #  return self._root
92    #curr = self
93    #while curr.parent and not hasattr(curr, '_root'):
94    #  curr = curr.parent
95    #if curr.parent:
96    #  self._root = curr._root
97    #else:
98    #  self._root = curr
99    #return self._root
100
101  def StartParsing(self, name, parent):
102    '''Called at the start of parsing.
103
104    Args:
105      name: u'elementname'
106      parent: grit.node.base.Node or subclass or None
107    '''
108    assert isinstance(name, types.StringTypes)
109    assert not parent or isinstance(parent, Node)
110    self.name = name
111    self.parent = parent
112
113  def AddChild(self, child):
114    '''Adds a child to the list of children of this node, if it is a valid
115    child for the node.'''
116    assert isinstance(child, Node)
117    if (not self._IsValidChild(child) or
118        self._ContentType() == self._CONTENT_TYPE_CDATA):
119      explanation = 'invalid child %s for parent %s' % (str(child), self.name)
120      raise exception.UnexpectedChild(explanation)
121    self.children.append(child)
122    self.mixed_content.append(child)
123
124  def RemoveChild(self, child_id):
125    '''Removes the first node that has a "name" attribute which
126    matches "child_id" in the list of immediate children of
127    this node.
128
129    Args:
130      child_id: String identifying the child to be removed
131    '''
132    index = 0
133    # Safe not to copy since we only remove the first element found
134    for child in self.children:
135      name_attr = child.attrs['name']
136      if name_attr == child_id:
137        self.children.pop(index)
138        self.mixed_content.pop(index)
139        break
140      index += 1
141
142  def AppendContent(self, content):
143    '''Appends a chunk of text as content of this node.
144
145    Args:
146      content: u'hello'
147
148    Return:
149      None
150    '''
151    assert isinstance(content, types.StringTypes)
152    if self._ContentType() != self._CONTENT_TYPE_NONE:
153      self.mixed_content.append(content)
154    elif content.strip() != '':
155      raise exception.UnexpectedContent()
156
157  def HandleAttribute(self, attrib, value):
158    '''Informs the node of an attribute that was parsed out of the GRD file
159    for it.
160
161    Args:
162      attrib: 'name'
163      value: 'fooblat'
164
165    Return:
166      None
167    '''
168    assert isinstance(attrib, types.StringTypes)
169    assert isinstance(value, types.StringTypes)
170    if self._IsValidAttribute(attrib, value):
171      self.attrs[attrib] = value
172    else:
173      raise exception.UnexpectedAttribute(attrib)
174
175  def EndParsing(self):
176    '''Called at the end of parsing.'''
177
178    # TODO(joi) Rewrite this, it's extremely ugly!
179    if len(self.mixed_content):
180      if isinstance(self.mixed_content[0], types.StringTypes):
181        # Remove leading and trailing chunks of pure whitespace.
182        while (len(self.mixed_content) and
183               isinstance(self.mixed_content[0], types.StringTypes) and
184               self.mixed_content[0].strip() == ''):
185          self.mixed_content = self.mixed_content[1:]
186        # Strip leading and trailing whitespace from mixed content chunks
187        # at front and back.
188        if (len(self.mixed_content) and
189            isinstance(self.mixed_content[0], types.StringTypes)):
190          self.mixed_content[0] = self.mixed_content[0].lstrip()
191        # Remove leading and trailing ''' (used to demarcate whitespace)
192        if (len(self.mixed_content) and
193            isinstance(self.mixed_content[0], types.StringTypes)):
194          if self.mixed_content[0].startswith("'''"):
195            self.mixed_content[0] = self.mixed_content[0][3:]
196    if len(self.mixed_content):
197      if isinstance(self.mixed_content[-1], types.StringTypes):
198        # Same stuff all over again for the tail end.
199        while (len(self.mixed_content) and
200               isinstance(self.mixed_content[-1], types.StringTypes) and
201               self.mixed_content[-1].strip() == ''):
202          self.mixed_content = self.mixed_content[:-1]
203        if (len(self.mixed_content) and
204            isinstance(self.mixed_content[-1], types.StringTypes)):
205          self.mixed_content[-1] = self.mixed_content[-1].rstrip()
206        if (len(self.mixed_content) and
207            isinstance(self.mixed_content[-1], types.StringTypes)):
208          if self.mixed_content[-1].endswith("'''"):
209            self.mixed_content[-1] = self.mixed_content[-1][:-3]
210
211    # Check that all mandatory attributes are there.
212    for node_mandatt in self.MandatoryAttributes():
213      mandatt_list = []
214      if node_mandatt.find('|') >= 0:
215        mandatt_list = node_mandatt.split('|')
216      else:
217        mandatt_list.append(node_mandatt)
218
219      mandatt_option_found = False
220      for mandatt in mandatt_list:
221        assert mandatt not in self.DefaultAttributes().keys()
222        if mandatt in self.attrs:
223          if not mandatt_option_found:
224            mandatt_option_found = True
225          else:
226            raise exception.MutuallyExclusiveMandatoryAttribute(mandatt)
227
228      if not mandatt_option_found:
229        raise exception.MissingMandatoryAttribute(mandatt)
230
231    # Add default attributes if not specified in input file.
232    for defattr in self.DefaultAttributes():
233      if not defattr in self.attrs:
234        self.attrs[defattr] = self.DefaultAttributes()[defattr]
235
236  def GetCdata(self):
237    '''Returns all CDATA of this element, concatenated into a single
238    string.  Note that this ignores any elements embedded in CDATA.'''
239    return ''.join([c for c in self.mixed_content
240                    if isinstance(c, types.StringTypes)])
241
242  def __unicode__(self):
243    '''Returns this node and all nodes below it as an XML document in a Unicode
244    string.'''
245    header = u'<?xml version="1.0" encoding="UTF-8"?>\n'
246    return header + self.FormatXml()
247
248  def FormatXml(self, indent = u'', one_line = False):
249    '''Returns this node and all nodes below it as an XML
250    element in a Unicode string.  This differs from __unicode__ in that it does
251    not include the <?xml> stuff at the top of the string.  If one_line is true,
252    children and CDATA are layed out in a way that preserves internal
253    whitespace.
254    '''
255    assert isinstance(indent, types.StringTypes)
256
257    content_one_line = (one_line or
258                        self._ContentType() == self._CONTENT_TYPE_MIXED)
259    inside_content = self.ContentsAsXml(indent, content_one_line)
260
261    # Then the attributes for this node.
262    attribs = u''
263    default_attribs = self.DefaultAttributes()
264    for attrib, value in sorted(self.attrs.items()):
265      # Only print an attribute if it is other than the default value.
266      if attrib not in default_attribs or value != default_attribs[attrib]:
267        attribs += u' %s=%s' % (attrib, saxutils.quoteattr(value))
268
269    # Finally build the XML for our node and return it
270    if len(inside_content) > 0:
271      if one_line:
272        return u'<%s%s>%s</%s>' % (self.name, attribs, inside_content, self.name)
273      elif content_one_line:
274        return u'%s<%s%s>\n%s  %s\n%s</%s>' % (
275          indent, self.name, attribs,
276          indent, inside_content,
277          indent, self.name)
278      else:
279        return u'%s<%s%s>\n%s\n%s</%s>' % (
280          indent, self.name, attribs,
281          inside_content,
282          indent, self.name)
283    else:
284      return u'%s<%s%s />' % (indent, self.name, attribs)
285
286  def ContentsAsXml(self, indent, one_line):
287    '''Returns the contents of this node (CDATA and child elements) in XML
288    format.  If 'one_line' is true, the content will be laid out on one line.'''
289    assert isinstance(indent, types.StringTypes)
290
291    # Build the contents of the element.
292    inside_parts = []
293    last_item = None
294    for mixed_item in self.mixed_content:
295      if isinstance(mixed_item, Node):
296        inside_parts.append(mixed_item.FormatXml(indent + u'  ', one_line))
297        if not one_line:
298          inside_parts.append(u'\n')
299      else:
300        message = mixed_item
301        # If this is the first item and it starts with whitespace, we add
302        # the ''' delimiter.
303        if not last_item and message.lstrip() != message:
304          message = u"'''" + message
305        inside_parts.append(util.EncodeCdata(message))
306      last_item = mixed_item
307
308    # If there are only child nodes and no cdata, there will be a spurious
309    # trailing \n
310    if len(inside_parts) and inside_parts[-1] == '\n':
311      inside_parts = inside_parts[:-1]
312
313    # If the last item is a string (not a node) and ends with whitespace,
314    # we need to add the ''' delimiter.
315    if (isinstance(last_item, types.StringTypes) and
316        last_item.rstrip() != last_item):
317      inside_parts[-1] = inside_parts[-1] + u"'''"
318
319    return u''.join(inside_parts)
320
321  def SubstituteMessages(self, substituter):
322    '''Applies substitutions to all messages in the tree.
323
324    Called as a final step of RunGatherers.
325
326    Args:
327      substituter: a grit.util.Substituter object.
328    '''
329    for child in self.children:
330      child.SubstituteMessages(substituter)
331
332  def _IsValidChild(self, child):
333    '''Returns true if 'child' is a valid child of this node.
334    Overridden by subclasses.'''
335    return False
336
337  def _IsValidAttribute(self, name, value):
338    '''Returns true if 'name' is the name of a valid attribute of this element
339    and 'value' is a valid value for that attribute.  Overriden by
340    subclasses unless they have only mandatory attributes.'''
341    return (name in self.MandatoryAttributes() or
342            name in self.DefaultAttributes())
343
344  def _ContentType(self):
345    '''Returns the type of content this element can have.  Overridden by
346    subclasses.  The content type can be one of the _CONTENT_TYPE_XXX constants
347    above.'''
348    return self._CONTENT_TYPE_NONE
349
350  def MandatoryAttributes(self):
351    '''Returns a list of attribute names that are mandatory (non-optional)
352    on the current element. One can specify a list of
353    "mutually exclusive mandatory" attributes by specifying them as one
354    element in the list, separated by a "|" character.
355    '''
356    return []
357
358  def DefaultAttributes(self):
359    '''Returns a dictionary of attribute names that have defaults, mapped to
360    the default value.  Overridden by subclasses.'''
361    return {}
362
363  def GetCliques(self):
364    '''Returns all MessageClique objects belonging to this node.  Overridden
365    by subclasses.
366
367    Return:
368      [clique1, clique2] or []
369    '''
370    return []
371
372  def ToRealPath(self, path_from_basedir):
373    '''Returns a real path (which can be absolute or relative to the current
374    working directory), given a path that is relative to the base directory
375    set for the GRIT input file.
376
377    Args:
378      path_from_basedir: '..'
379
380    Return:
381      'resource'
382    '''
383    return util.normpath(os.path.join(self.GetRoot().GetBaseDir(),
384                                      os.path.expandvars(path_from_basedir)))
385
386  def GetInputPath(self):
387    '''Returns a path, relative to the base directory set for the grd file,
388    that points to the file the node refers to.
389    '''
390    # This implementation works for most nodes that have an input file.
391    return self.attrs['file']
392
393  def UberClique(self):
394    '''Returns the uberclique that should be used for messages originating in
395    a given node.  If the node itself has its uberclique set, that is what we
396    use, otherwise we search upwards until we find one.  If we do not find one
397    even at the root node, we set the root node's uberclique to a new
398    uberclique instance.
399    '''
400    node = self
401    while not node.uberclique and node.parent:
402      node = node.parent
403    if not node.uberclique:
404      node.uberclique = clique.UberClique()
405    return node.uberclique
406
407  def IsTranslateable(self):
408    '''Returns false if the node has contents that should not be translated,
409    otherwise returns false (even if the node has no contents).
410    '''
411    if not 'translateable' in self.attrs:
412      return True
413    else:
414      return self.attrs['translateable'] == 'true'
415
416  def GetNodeById(self, id):
417    '''Returns the node in the subtree parented by this node that has a 'name'
418    attribute matching 'id'.  Returns None if no such node is found.
419    '''
420    for node in self:
421      if 'name' in node.attrs and node.attrs['name'] == id:
422        return node
423    return None
424
425  def GetChildrenOfType(self, type):
426    '''Returns a list of all subnodes (recursing to all leaves) of this node
427    that are of the indicated type (or tuple of types).
428
429    Args:
430      type: A type you could use with isinstance().
431
432    Return:
433      A list, possibly empty.
434    '''
435    return [child for child in self if isinstance(child, type)]
436
437  def GetTextualIds(self):
438    '''Returns a list of the textual ids of this node.
439    '''
440    if 'name' in self.attrs:
441      return [self.attrs['name']]
442    return []
443
444  @classmethod
445  def EvaluateExpression(cls, expr, defs, target_platform, extra_variables=None):
446    '''Worker for EvaluateCondition (below) and conditions in XTB files.'''
447    cache_dict = cls.eval_expr_cache[
448        (tuple(defs.iteritems()), target_platform, extra_variables)]
449    if expr in cache_dict:
450      return cache_dict[expr]
451    def pp_ifdef(symbol):
452      return symbol in defs
453    def pp_if(symbol):
454      return defs.get(symbol, False)
455    variable_map = {
456        'defs' : defs,
457        'os': target_platform,
458        'is_linux': target_platform.startswith('linux'),
459        'is_macosx': target_platform == 'darwin',
460        'is_win': target_platform in ('cygwin', 'win32'),
461        'is_android': target_platform == 'android',
462        'is_ios': target_platform == 'ios',
463        'is_posix': (target_platform in ('darwin', 'linux2', 'linux3', 'sunos5',
464                                         'android', 'ios')
465                    or 'bsd' in target_platform),
466        'pp_ifdef' : pp_ifdef,
467        'pp_if' : pp_if,
468    }
469    if extra_variables:
470      variable_map.update(extra_variables)
471    eval_result = cache_dict[expr] = eval(expr, {}, variable_map)
472    return eval_result
473
474  def EvaluateCondition(self, expr):
475    '''Returns true if and only if the Python expression 'expr' evaluates
476    to true.
477
478    The expression is given a few local variables:
479      - 'lang' is the language currently being output
480           (the 'lang' attribute of the <output> element).
481      - 'context' is the current output context
482           (the 'context' attribute of the <output> element).
483      - 'defs' is a map of C preprocessor-style symbol names to their values.
484      - 'os' is the current platform (likely 'linux2', 'win32' or 'darwin').
485      - 'pp_ifdef(symbol)' is a shorthand for "symbol in defs".
486      - 'pp_if(symbol)' is a shorthand for "symbol in defs and defs[symbol]".
487      - 'is_linux', 'is_macosx', 'is_win', 'is_posix' are true if 'os'
488           matches the given platform.
489    '''
490    root = self.GetRoot()
491    lang = getattr(root, 'output_language', '')
492    context = getattr(root, 'output_context', '')
493    defs = getattr(root, 'defines', {})
494    target_platform = getattr(root, 'target_platform', '')
495    extra_variables = (
496        ('lang', lang),
497        ('context', context),
498    )
499    return Node.EvaluateExpression(
500        expr, defs, target_platform, extra_variables)
501
502  def OnlyTheseTranslations(self, languages):
503    '''Turns off loading of translations for languages not in the provided list.
504
505    Attrs:
506      languages: ['fr', 'zh_cn']
507    '''
508    for node in self:
509      if (hasattr(node, 'IsTranslation') and
510          node.IsTranslation() and
511          node.GetLang() not in languages):
512        node.DisableLoading()
513
514  def FindBooleanAttribute(self, attr, default, skip_self):
515    '''Searches all ancestors of the current node for the nearest enclosing
516    definition of the given boolean attribute.
517
518    Args:
519      attr: 'fallback_to_english'
520      default: What to return if no node defines the attribute.
521      skip_self: Don't check the current node, only its parents.
522    '''
523    p = self.parent if skip_self else self
524    while p:
525      value = p.attrs.get(attr, 'default').lower()
526      if value != 'default':
527        return (value == 'true')
528      p = p.parent
529    return default
530
531  def PseudoIsAllowed(self):
532    '''Returns true if this node is allowed to use pseudo-translations.  This
533    is true by default, unless this node is within a <release> node that has
534    the allow_pseudo attribute set to false.
535    '''
536    return self.FindBooleanAttribute('allow_pseudo',
537                                     default=True, skip_self=True)
538
539  def ShouldFallbackToEnglish(self):
540    '''Returns true iff this node should fall back to English when
541    pseudotranslations are disabled and no translation is available for a
542    given message.
543    '''
544    return self.FindBooleanAttribute('fallback_to_english',
545                                     default=False, skip_self=True)
546
547  def WhitelistMarkedAsSkip(self):
548    '''Returns true if the node is marked to be skipped in the output by a
549    whitelist.
550    '''
551    return self._whitelist_marked_as_skip
552
553  def SetWhitelistMarkedAsSkip(self, mark_skipped):
554    '''Sets WhitelistMarkedAsSkip.
555    '''
556    self._whitelist_marked_as_skip = mark_skipped
557
558  def ExpandVariables(self):
559    '''Whether we need to expand variables on a given node.'''
560    return False
561
562
563class ContentNode(Node):
564  '''Convenience baseclass for nodes that can have content.'''
565  def _ContentType(self):
566    return self._CONTENT_TYPE_MIXED
567
568