1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6'''Base types for nodes in a GRIT resource tree.
7'''
8
9import ast
10import os
11import types
12from xml.sax import saxutils
13
14from grit import clique
15from grit import exception
16from grit import util
17
18
19class Node(object):
20  '''An item in the tree that has children.'''
21
22  # Valid content types that can be returned by _ContentType()
23  _CONTENT_TYPE_NONE = 0   # No CDATA content but may have children
24  _CONTENT_TYPE_CDATA = 1  # Only CDATA, no children.
25  _CONTENT_TYPE_MIXED = 2  # CDATA and children, possibly intermingled
26
27  # Default nodes to not whitelist skipped
28  _whitelist_marked_as_skip = False
29
30  # A class-static cache to speed up EvaluateExpression().
31  # Keys are expressions (e.g. 'is_ios and lang == "fr"'). Values are tuples
32  # (code, variables_in_expr) where code is the compiled expression and can be
33  # directly eval'd, and variables_in_expr is the list of variable and method
34  # names used in the expression (e.g. ['is_ios', 'lang']).
35  eval_expr_cache = {}
36
37  def __init__(self):
38    self.children = []        # A list of child elements
39    self.mixed_content = []   # A list of u'' and/or child elements (this
40                              # duplicates 'children' but
41                              # is needed to preserve markup-type content).
42    self.name = u''           # The name of this element
43    self.attrs = {}           # The set of attributes (keys to values)
44    self.parent = None        # Our parent unless we are the root element.
45    self.uberclique = None    # Allows overriding uberclique for parts of tree
46
47  # This context handler allows you to write "with node:" and get a
48  # line identifying the offending node if an exception escapes from the body
49  # of the with statement.
50  def __enter__(self):
51    return self
52
53  def __exit__(self, exc_type, exc_value, traceback):
54    if exc_type is not None:
55      print u'Error processing node %s' % unicode(self)
56
57  def __iter__(self):
58    '''A preorder iteration through the tree that this node is the root of.'''
59    return self.Preorder()
60
61  def Preorder(self):
62    '''Generator that generates first this node, then the same generator for
63    any child nodes.'''
64    yield self
65    for child in self.children:
66      for iterchild in child.Preorder():
67        yield iterchild
68
69  def ActiveChildren(self):
70    '''Returns the children of this node that should be included in the current
71    configuration. Overridden by <if>.'''
72    return [node for node in self.children if not node.WhitelistMarkedAsSkip()]
73
74  def ActiveDescendants(self):
75    '''Yields the current node and all descendants that should be included in
76    the current configuration, in preorder.'''
77    yield self
78    for child in self.ActiveChildren():
79      for descendant in child.ActiveDescendants():
80        yield descendant
81
82  def GetRoot(self):
83    '''Returns the root Node in the tree this Node belongs to.'''
84    curr = self
85    while curr.parent:
86      curr = curr.parent
87    return curr
88
89    # TODO(joi) Use this (currently untested) optimization?:
90    #if hasattr(self, '_root'):
91    #  return self._root
92    #curr = self
93    #while curr.parent and not hasattr(curr, '_root'):
94    #  curr = curr.parent
95    #if curr.parent:
96    #  self._root = curr._root
97    #else:
98    #  self._root = curr
99    #return self._root
100
101  def StartParsing(self, name, parent):
102    '''Called at the start of parsing.
103
104    Args:
105      name: u'elementname'
106      parent: grit.node.base.Node or subclass or None
107    '''
108    assert isinstance(name, types.StringTypes)
109    assert not parent or isinstance(parent, Node)
110    self.name = name
111    self.parent = parent
112
113  def AddChild(self, child):
114    '''Adds a child to the list of children of this node, if it is a valid
115    child for the node.'''
116    assert isinstance(child, Node)
117    if (not self._IsValidChild(child) or
118        self._ContentType() == self._CONTENT_TYPE_CDATA):
119      explanation = 'invalid child %s for parent %s' % (str(child), self.name)
120      raise exception.UnexpectedChild(explanation)
121    self.children.append(child)
122    self.mixed_content.append(child)
123
124  def RemoveChild(self, child_id):
125    '''Removes the first node that has a "name" attribute which
126    matches "child_id" in the list of immediate children of
127    this node.
128
129    Args:
130      child_id: String identifying the child to be removed
131    '''
132    index = 0
133    # Safe not to copy since we only remove the first element found
134    for child in self.children:
135      name_attr = child.attrs['name']
136      if name_attr == child_id:
137        self.children.pop(index)
138        self.mixed_content.pop(index)
139        break
140      index += 1
141
142  def AppendContent(self, content):
143    '''Appends a chunk of text as content of this node.
144
145    Args:
146      content: u'hello'
147
148    Return:
149      None
150    '''
151    assert isinstance(content, types.StringTypes)
152    if self._ContentType() != self._CONTENT_TYPE_NONE:
153      self.mixed_content.append(content)
154    elif content.strip() != '':
155      raise exception.UnexpectedContent()
156
157  def HandleAttribute(self, attrib, value):
158    '''Informs the node of an attribute that was parsed out of the GRD file
159    for it.
160
161    Args:
162      attrib: 'name'
163      value: 'fooblat'
164
165    Return:
166      None
167    '''
168    assert isinstance(attrib, types.StringTypes)
169    assert isinstance(value, types.StringTypes)
170    if self._IsValidAttribute(attrib, value):
171      self.attrs[attrib] = value
172    else:
173      raise exception.UnexpectedAttribute(attrib)
174
175  def EndParsing(self):
176    '''Called at the end of parsing.'''
177
178    # TODO(joi) Rewrite this, it's extremely ugly!
179    if len(self.mixed_content):
180      if isinstance(self.mixed_content[0], types.StringTypes):
181        # Remove leading and trailing chunks of pure whitespace.
182        while (len(self.mixed_content) and
183               isinstance(self.mixed_content[0], types.StringTypes) and
184               self.mixed_content[0].strip() == ''):
185          self.mixed_content = self.mixed_content[1:]
186        # Strip leading and trailing whitespace from mixed content chunks
187        # at front and back.
188        if (len(self.mixed_content) and
189            isinstance(self.mixed_content[0], types.StringTypes)):
190          self.mixed_content[0] = self.mixed_content[0].lstrip()
191        # Remove leading and trailing ''' (used to demarcate whitespace)
192        if (len(self.mixed_content) and
193            isinstance(self.mixed_content[0], types.StringTypes)):
194          if self.mixed_content[0].startswith("'''"):
195            self.mixed_content[0] = self.mixed_content[0][3:]
196    if len(self.mixed_content):
197      if isinstance(self.mixed_content[-1], types.StringTypes):
198        # Same stuff all over again for the tail end.
199        while (len(self.mixed_content) and
200               isinstance(self.mixed_content[-1], types.StringTypes) and
201               self.mixed_content[-1].strip() == ''):
202          self.mixed_content = self.mixed_content[:-1]
203        if (len(self.mixed_content) and
204            isinstance(self.mixed_content[-1], types.StringTypes)):
205          self.mixed_content[-1] = self.mixed_content[-1].rstrip()
206        if (len(self.mixed_content) and
207            isinstance(self.mixed_content[-1], types.StringTypes)):
208          if self.mixed_content[-1].endswith("'''"):
209            self.mixed_content[-1] = self.mixed_content[-1][:-3]
210
211    # Check that all mandatory attributes are there.
212    for node_mandatt in self.MandatoryAttributes():
213      mandatt_list = []
214      if node_mandatt.find('|') >= 0:
215        mandatt_list = node_mandatt.split('|')
216      else:
217        mandatt_list.append(node_mandatt)
218
219      mandatt_option_found = False
220      for mandatt in mandatt_list:
221        assert mandatt not in self.DefaultAttributes().keys()
222        if mandatt in self.attrs:
223          if not mandatt_option_found:
224            mandatt_option_found = True
225          else:
226            raise exception.MutuallyExclusiveMandatoryAttribute(mandatt)
227
228      if not mandatt_option_found:
229        raise exception.MissingMandatoryAttribute(mandatt)
230
231    # Add default attributes if not specified in input file.
232    for defattr in self.DefaultAttributes():
233      if not defattr in self.attrs:
234        self.attrs[defattr] = self.DefaultAttributes()[defattr]
235
236  def GetCdata(self):
237    '''Returns all CDATA of this element, concatenated into a single
238    string.  Note that this ignores any elements embedded in CDATA.'''
239    return ''.join([c for c in self.mixed_content
240                    if isinstance(c, types.StringTypes)])
241
242  def __unicode__(self):
243    '''Returns this node and all nodes below it as an XML document in a Unicode
244    string.'''
245    header = u'<?xml version="1.0" encoding="UTF-8"?>\n'
246    return header + self.FormatXml()
247
248  def FormatXml(self, indent = u'', one_line = False):
249    '''Returns this node and all nodes below it as an XML
250    element in a Unicode string.  This differs from __unicode__ in that it does
251    not include the <?xml> stuff at the top of the string.  If one_line is true,
252    children and CDATA are layed out in a way that preserves internal
253    whitespace.
254    '''
255    assert isinstance(indent, types.StringTypes)
256
257    content_one_line = (one_line or
258                        self._ContentType() == self._CONTENT_TYPE_MIXED)
259    inside_content = self.ContentsAsXml(indent, content_one_line)
260
261    # Then the attributes for this node.
262    attribs = u''
263    default_attribs = self.DefaultAttributes()
264    for attrib, value in sorted(self.attrs.items()):
265      # Only print an attribute if it is other than the default value.
266      if attrib not in default_attribs or value != default_attribs[attrib]:
267        attribs += u' %s=%s' % (attrib, saxutils.quoteattr(value))
268
269    # Finally build the XML for our node and return it
270    if len(inside_content) > 0:
271      if one_line:
272        return u'<%s%s>%s</%s>' % (self.name, attribs, inside_content, self.name)
273      elif content_one_line:
274        return u'%s<%s%s>\n%s  %s\n%s</%s>' % (
275          indent, self.name, attribs,
276          indent, inside_content,
277          indent, self.name)
278      else:
279        return u'%s<%s%s>\n%s\n%s</%s>' % (
280          indent, self.name, attribs,
281          inside_content,
282          indent, self.name)
283    else:
284      return u'%s<%s%s />' % (indent, self.name, attribs)
285
286  def ContentsAsXml(self, indent, one_line):
287    '''Returns the contents of this node (CDATA and child elements) in XML
288    format.  If 'one_line' is true, the content will be laid out on one line.'''
289    assert isinstance(indent, types.StringTypes)
290
291    # Build the contents of the element.
292    inside_parts = []
293    last_item = None
294    for mixed_item in self.mixed_content:
295      if isinstance(mixed_item, Node):
296        inside_parts.append(mixed_item.FormatXml(indent + u'  ', one_line))
297        if not one_line:
298          inside_parts.append(u'\n')
299      else:
300        message = mixed_item
301        # If this is the first item and it starts with whitespace, we add
302        # the ''' delimiter.
303        if not last_item and message.lstrip() != message:
304          message = u"'''" + message
305        inside_parts.append(util.EncodeCdata(message))
306      last_item = mixed_item
307
308    # If there are only child nodes and no cdata, there will be a spurious
309    # trailing \n
310    if len(inside_parts) and inside_parts[-1] == '\n':
311      inside_parts = inside_parts[:-1]
312
313    # If the last item is a string (not a node) and ends with whitespace,
314    # we need to add the ''' delimiter.
315    if (isinstance(last_item, types.StringTypes) and
316        last_item.rstrip() != last_item):
317      inside_parts[-1] = inside_parts[-1] + u"'''"
318
319    return u''.join(inside_parts)
320
321  def SubstituteMessages(self, substituter):
322    '''Applies substitutions to all messages in the tree.
323
324    Called as a final step of RunGatherers.
325
326    Args:
327      substituter: a grit.util.Substituter object.
328    '''
329    for child in self.children:
330      child.SubstituteMessages(substituter)
331
332  def _IsValidChild(self, child):
333    '''Returns true if 'child' is a valid child of this node.
334    Overridden by subclasses.'''
335    return False
336
337  def _IsValidAttribute(self, name, value):
338    '''Returns true if 'name' is the name of a valid attribute of this element
339    and 'value' is a valid value for that attribute.  Overriden by
340    subclasses unless they have only mandatory attributes.'''
341    return (name in self.MandatoryAttributes() or
342            name in self.DefaultAttributes())
343
344  def _ContentType(self):
345    '''Returns the type of content this element can have.  Overridden by
346    subclasses.  The content type can be one of the _CONTENT_TYPE_XXX constants
347    above.'''
348    return self._CONTENT_TYPE_NONE
349
350  def MandatoryAttributes(self):
351    '''Returns a list of attribute names that are mandatory (non-optional)
352    on the current element. One can specify a list of
353    "mutually exclusive mandatory" attributes by specifying them as one
354    element in the list, separated by a "|" character.
355    '''
356    return []
357
358  def DefaultAttributes(self):
359    '''Returns a dictionary of attribute names that have defaults, mapped to
360    the default value.  Overridden by subclasses.'''
361    return {}
362
363  def GetCliques(self):
364    '''Returns all MessageClique objects belonging to this node.  Overridden
365    by subclasses.
366
367    Return:
368      [clique1, clique2] or []
369    '''
370    return []
371
372  def ToRealPath(self, path_from_basedir):
373    '''Returns a real path (which can be absolute or relative to the current
374    working directory), given a path that is relative to the base directory
375    set for the GRIT input file.
376
377    Args:
378      path_from_basedir: '..'
379
380    Return:
381      'resource'
382    '''
383    return util.normpath(os.path.join(self.GetRoot().GetBaseDir(),
384                                      os.path.expandvars(path_from_basedir)))
385
386  def GetInputPath(self):
387    '''Returns a path, relative to the base directory set for the grd file,
388    that points to the file the node refers to.
389    '''
390    # This implementation works for most nodes that have an input file.
391    return self.attrs['file']
392
393  def UberClique(self):
394    '''Returns the uberclique that should be used for messages originating in
395    a given node.  If the node itself has its uberclique set, that is what we
396    use, otherwise we search upwards until we find one.  If we do not find one
397    even at the root node, we set the root node's uberclique to a new
398    uberclique instance.
399    '''
400    node = self
401    while not node.uberclique and node.parent:
402      node = node.parent
403    if not node.uberclique:
404      node.uberclique = clique.UberClique()
405    return node.uberclique
406
407  def IsTranslateable(self):
408    '''Returns false if the node has contents that should not be translated,
409    otherwise returns false (even if the node has no contents).
410    '''
411    if not 'translateable' in self.attrs:
412      return True
413    else:
414      return self.attrs['translateable'] == 'true'
415
416  def GetNodeById(self, id):
417    '''Returns the node in the subtree parented by this node that has a 'name'
418    attribute matching 'id'.  Returns None if no such node is found.
419    '''
420    for node in self:
421      if 'name' in node.attrs and node.attrs['name'] == id:
422        return node
423    return None
424
425  def GetChildrenOfType(self, type):
426    '''Returns a list of all subnodes (recursing to all leaves) of this node
427    that are of the indicated type (or tuple of types).
428
429    Args:
430      type: A type you could use with isinstance().
431
432    Return:
433      A list, possibly empty.
434    '''
435    return [child for child in self if isinstance(child, type)]
436
437  def GetTextualIds(self):
438    '''Returns a list of the textual ids of this node.
439    '''
440    if 'name' in self.attrs:
441      return [self.attrs['name']]
442    return []
443
444  @classmethod
445  def EvaluateExpression(cls, expr, defs, target_platform, extra_variables={}):
446    '''Worker for EvaluateCondition (below) and conditions in XTB files.'''
447    if expr in cls.eval_expr_cache:
448      code, variables_in_expr = cls.eval_expr_cache[expr]
449    else:
450      # Get a list of all variable and method names used in the expression.
451      syntax_tree = ast.parse(expr, mode='eval')
452      variables_in_expr = [node.id for node in ast.walk(syntax_tree) if
453          isinstance(node, ast.Name) and node.id not in ('True', 'False')]
454      code = compile(syntax_tree, filename='<string>', mode='eval')
455      cls.eval_expr_cache[expr] = code, variables_in_expr
456
457    # Set values only for variables that are needed to eval the expression.
458    variable_map = {}
459    for name in variables_in_expr:
460      if name == 'os':
461        value = target_platform
462      elif name == 'defs':
463        value = defs
464
465      elif name == 'is_linux':
466        value = target_platform.startswith('linux')
467      elif name == 'is_macosx':
468        value = target_platform == 'darwin'
469      elif name == 'is_win':
470        value = target_platform in ('cygwin', 'win32')
471      elif name == 'is_android':
472        value = target_platform == 'android'
473      elif name == 'is_ios':
474        value = target_platform == 'ios'
475      elif name == 'is_bsd':
476        value = 'bsd' in target_platform
477      elif name == 'is_posix':
478        value = (target_platform in ('darwin', 'linux2', 'linux3', 'sunos5',
479                                     'android', 'ios')
480                 or 'bsd' in target_platform)
481
482      elif name == 'pp_ifdef':
483        def pp_ifdef(symbol):
484          return symbol in defs
485        value = pp_ifdef
486      elif name == 'pp_if':
487        def pp_if(symbol):
488          return defs.get(symbol, False)
489        value = pp_if
490
491      elif name in defs:
492        value = defs[name]
493      elif name in extra_variables:
494        value = extra_variables[name]
495      else:
496        # Undefined variables default to False.
497        value = False
498
499      variable_map[name] = value
500
501    eval_result = eval(code, {}, variable_map)
502    assert isinstance(eval_result, bool)
503    return eval_result
504
505  def EvaluateCondition(self, expr):
506    '''Returns true if and only if the Python expression 'expr' evaluates
507    to true.
508
509    The expression is given a few local variables:
510      - 'lang' is the language currently being output
511           (the 'lang' attribute of the <output> element).
512      - 'context' is the current output context
513           (the 'context' attribute of the <output> element).
514      - 'defs' is a map of C preprocessor-style symbol names to their values.
515      - 'os' is the current platform (likely 'linux2', 'win32' or 'darwin').
516      - 'pp_ifdef(symbol)' is a shorthand for "symbol in defs".
517      - 'pp_if(symbol)' is a shorthand for "symbol in defs and defs[symbol]".
518      - 'is_linux', 'is_macosx', 'is_win', 'is_posix' are true if 'os'
519           matches the given platform.
520    '''
521    root = self.GetRoot()
522    lang = getattr(root, 'output_language', '')
523    context = getattr(root, 'output_context', '')
524    defs = getattr(root, 'defines', {})
525    target_platform = getattr(root, 'target_platform', '')
526    extra_variables = {
527        'lang': lang,
528        'context': context,
529    }
530    return Node.EvaluateExpression(
531        expr, defs, target_platform, extra_variables)
532
533  def OnlyTheseTranslations(self, languages):
534    '''Turns off loading of translations for languages not in the provided list.
535
536    Attrs:
537      languages: ['fr', 'zh_cn']
538    '''
539    for node in self:
540      if (hasattr(node, 'IsTranslation') and
541          node.IsTranslation() and
542          node.GetLang() not in languages):
543        node.DisableLoading()
544
545  def FindBooleanAttribute(self, attr, default, skip_self):
546    '''Searches all ancestors of the current node for the nearest enclosing
547    definition of the given boolean attribute.
548
549    Args:
550      attr: 'fallback_to_english'
551      default: What to return if no node defines the attribute.
552      skip_self: Don't check the current node, only its parents.
553    '''
554    p = self.parent if skip_self else self
555    while p:
556      value = p.attrs.get(attr, 'default').lower()
557      if value != 'default':
558        return (value == 'true')
559      p = p.parent
560    return default
561
562  def PseudoIsAllowed(self):
563    '''Returns true if this node is allowed to use pseudo-translations.  This
564    is true by default, unless this node is within a <release> node that has
565    the allow_pseudo attribute set to false.
566    '''
567    return self.FindBooleanAttribute('allow_pseudo',
568                                     default=True, skip_self=True)
569
570  def ShouldFallbackToEnglish(self):
571    '''Returns true iff this node should fall back to English when
572    pseudotranslations are disabled and no translation is available for a
573    given message.
574    '''
575    return self.FindBooleanAttribute('fallback_to_english',
576                                     default=False, skip_self=True)
577
578  def WhitelistMarkedAsSkip(self):
579    '''Returns true if the node is marked to be skipped in the output by a
580    whitelist.
581    '''
582    return self._whitelist_marked_as_skip
583
584  def SetWhitelistMarkedAsSkip(self, mark_skipped):
585    '''Sets WhitelistMarkedAsSkip.
586    '''
587    self._whitelist_marked_as_skip = mark_skipped
588
589  def ExpandVariables(self):
590    '''Whether we need to expand variables on a given node.'''
591    return False
592
593  def IsResourceMapSource(self):
594    '''Whether this node is a resource map source.'''
595    return False
596
597  def GeneratesResourceMapEntry(self, output_all_resource_defines,
598                                is_active_descendant):
599    '''Whether this node should output a resource map entry.
600
601    Args:
602      output_all_resource_defines: The value of output_all_resource_defines for
603                                   the root node.
604      is_active_descendant: Whether the current node is an active descendant
605                            from the root node.'''
606    return False
607
608
609class ContentNode(Node):
610  '''Convenience baseclass for nodes that can have content.'''
611  def _ContentType(self):
612    return self._CONTENT_TYPE_MIXED
613
614