rules.py revision a1401311d1ab56c4ed0a474bd38c108f75cb0cd9
1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""This module defines the core structure of the classification rules. 6 7This module does NOT specify how the rules filter the data: this responsibility 8is of to the concrete classifiers, which have to override the Rule class herein 9defined and know how to do the math. 10 11This module, instead, defines the format of the rules and the way they are 12encoded and loaded (in a python-style dictionary file). 13Rules are organized in a tree, where the root is always represented by a 'Total' 14node, and the leaves are arbitrarily defined by the user, according to the 15following principles: 16- Order of siblings rules matter: what is caught by a rule will not be caught 17 by the next ones, but it is propagated to its children rules if any. 18- Every non-leaf node X gets an implicit extra-children named X-other. This 19 catch-all child catches everything (within the parent rule scope) that is 20 not caught by the other siblings. This is to guarantee that, when doing the 21 math (the aggregation), at any level, the sum of the values in the leaves 22 match the value of their parent. 23 24The format of a rule dictionary is the following: 25[ 26{ 27 'name': 'Name of the rule', 28 'filter-X': 'The embedder will know how to interpret this value and will use 29 it to filter the data' 30 'filter-Y': 'Idem' 31 children: [ 32 { 33 'name': 'Name of the sub-rule 1' 34 ... and so on recursively , 35 }, 36 ] 37}, 38] 39 40And a typical resulting rule tree looks like this: 41 +----------------------+ 42 | Total | 43 |----------------------| 44 +------------------+ Match all. +--------------------+ 45 | +----------+-----------+ | 46 | | | 47 +-----v-----+ +-----v-----+ +------v----+ 48 | Foo | | Bar | |Total-other| 49 |-----------| |-----------| |-----------| 50 |File: foo* | +---+File: bar* +-----+ | Match all | 51 +-----------+ | +-----------+ | +-----------+ 52 | | 53 +------v------+ +------v----+ 54 | Bar::Coffee | | Bar-other | 55 |-------------| |-----------| 56 |File: bar*cof| | Match all | 57 +-------------+ +-----------+ 58""" 59 60import ast 61 62 63def Load(content, rule_builder): 64 """Construct a rule tree from a python-style dict representation. 65 66 Args: 67 content: a string containing the dict (i.e. content of the rule file). 68 rule_builder: a method which takes two arguments (rule_name, filters_dict) 69 and returns a subclass of |Rule|. |filters_dict| is a dict of the keys 70 (filter-foo, filter-bar in the example above) for the rule node. 71 """ 72 rules_dict = ast.literal_eval(content) 73 root = Rule('Total') 74 _MakeRuleNodeFromDictNode(root, rules_dict, rule_builder) 75 return root 76 77 78class Rule(object): 79 """ An abstract class representing a rule node in the rules tree. 80 81 Embedders must override the Match method when deriving this class. 82 """ 83 84 def __init__(self, name): 85 self.name = name 86 self.children = [] 87 88 def Match(self, _): # pylint: disable=R0201 89 """ The rationale of this default implementation is modeling the root 90 ('Total') and the catch-all (*-other) rules that every |RuleTree| must have, 91 regardless of the embedder-specific children rules. This is to guarantee 92 that the totals match at any level of the tree. 93 """ 94 return True 95 96 def AppendChild(self, child_rule): 97 assert(isinstance(child_rule, Rule)) 98 duplicates = filter(lambda x: x.name == child_rule.name, self.children) 99 assert(not duplicates), 'Duplicate rule ' + child_rule.name 100 self.children.append(child_rule) 101 102 103def _MakeRuleNodeFromDictNode(rule_node, dict_nodes, rule_builder): 104 """Recursive rule tree builder for traversing the rule dict.""" 105 for dict_node in dict_nodes: 106 assert('name' in dict_node) 107 # Extract the filter keys (e.g., mmap-file, mmap-prot) that will be passed 108 # to the |rule_builder| 109 filter_keys = set(dict_node.keys()) - set(('name', 'children')) 110 filters = dict((k, dict_node[k]) for k in filter_keys) 111 child_rule = rule_builder(dict_node['name'], filters) 112 rule_node.AppendChild(child_rule) 113 dict_children = dict_node.get('children', {}) 114 _MakeRuleNodeFromDictNode(child_rule, dict_children, rule_builder) 115 116 # If the rule_node isn't a leaf, add the 'name-other' catch-all sibling to 117 # catch all the entries that matched this node but none of its children. 118 if len(rule_node.children): 119 rule_node.AppendChild(Rule(rule_node.name + '-other'))