1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""This module defines the core structure of the classification rules.
6
7This module does NOT specify how the rules filter the data: this responsibility
8is of to the concrete classifiers, which have to override the Rule class herein
9defined and know how to do the math.
10
11This module, instead, defines the format of the rules and the way they are
12encoded and loaded (in a python-style dictionary file).
13Rules are organized in a tree, where the root is always represented by a 'Total'
14node, and the leaves are arbitrarily defined by the user, according to the
15following principles:
16- Order of siblings rules matter: what is caught by a rule will not be caught
17  by the next ones, but it is propagated to its children rules if any.
18- Every non-leaf node X gets an implicit extra-children named X-other. This
19  catch-all child catches everything (within the parent rule scope) that is
20  not caught by the other siblings. This is to guarantee that, when doing the
21  math (the aggregation), at any level, the sum of the values in the leaves
22  match the value of their parent.
23
24The format of a rule dictionary is the following:
25[
26{
27  'name':       'Name of the rule',
28  'filter-X':   'The embedder will know how to interpret this value and will use
29                 it to filter the data'
30  'filter-Y':   'Idem'
31  children: [
32    {
33      'name':   'Name of the sub-rule 1'
34      ... and so on recursively ,
35    },
36  ]
37},
38]
39
40And a typical resulting rule tree looks like this:
41                          +----------------------+
42                          |        Total         |
43                          |----------------------|
44       +------------------+      Match all.      +--------------------+
45       |                  +----------+-----------+                    |
46       |                             |                                |
47 +-----v-----+                 +-----v-----+                   +------v----+
48 |    Foo    |                 |    Bar    |                   |Total-other|
49 |-----------|                 |-----------|                   |-----------|
50 |File: foo* |             +---+File: bar* +-----+             | Match all |
51 +-----------+             |   +-----------+     |             +-----------+
52                           |                     |
53                    +------v------+       +------v----+
54                    | Bar::Coffee |       | Bar-other |
55                    |-------------|       |-----------|
56                    |File: bar*cof|       | Match all |
57                    +-------------+       +-----------+
58"""
59
60import ast
61
62
63def Load(content, rule_builder):
64  """Construct a rule tree from a python-style dict representation.
65
66  Args:
67    content: a string containing the dict (i.e. content of the rule file).
68    rule_builder: a method which takes two arguments (rule_name, filters_dict)
69        and returns a subclass of |Rule|. |filters_dict| is a dict of the keys
70        (filter-foo, filter-bar in the example above) for the rule node.
71  """
72  rules_dict = ast.literal_eval(content)
73  root = Rule('Total')
74  _MakeRuleNodeFromDictNode(root, rules_dict, rule_builder)
75  return root
76
77
78class Rule(object):
79  """ An abstract class representing a rule node in the rules tree.
80
81  Embedders must override the Match method when deriving this class.
82  """
83
84  def __init__(self, name):
85    self.name = name
86    self.children = []
87
88  def Match(self, _):  # pylint: disable=R0201
89    """ The rationale of this default implementation is modeling the root
90    ('Total') and the catch-all (*-other) rules that every |RuleTree| must have,
91    regardless of the embedder-specific children rules. This is to guarantee
92    that the totals match at any level of the tree.
93    """
94    return True
95
96  def AppendChild(self, child_rule):
97    assert(isinstance(child_rule, Rule))
98    duplicates = filter(lambda x: x.name == child_rule.name, self.children)
99    assert(not duplicates), 'Duplicate rule ' + child_rule.name
100    self.children.append(child_rule)
101
102
103def _MakeRuleNodeFromDictNode(rule_node, dict_nodes, rule_builder):
104  """Recursive rule tree builder for traversing the rule dict."""
105  for dict_node in dict_nodes:
106    assert('name' in dict_node)
107    # Extract the filter keys (e.g., mmap-file, mmap-prot) that will be passed
108    # to the |rule_builder|
109    filter_keys = set(dict_node.keys()) - set(('name', 'children'))
110    filters = dict((k, dict_node[k]) for k in filter_keys)
111    child_rule = rule_builder(dict_node['name'], filters)
112    rule_node.AppendChild(child_rule)
113    dict_children = dict_node.get('children', {})
114    _MakeRuleNodeFromDictNode(child_rule, dict_children, rule_builder)
115
116    # If the rule_node isn't a leaf, add the 'name-other' catch-all sibling to
117    # catch all the entries that matched this node but none of its children.
118  if len(rule_node.children):
119    rule_node.AppendChild(Rule(rule_node.name + '-other'))