1# Copyright (c) 2015 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4"""GetTraccHandlesQuery is designed to be either evaluable directly 5from python, or be convertable to an Appengine datastore query. As a result, 6exercise discretion when adding features to this class. 7""" 8import operator 9import re 10import datetime 11 12 13def _InOp(a, b): 14 return a in b 15 16 17class _ReadField(object): 18 19 def __init__(self, fieldName): 20 self.fieldName = fieldName 21 22 def AsQueryString(self): 23 return self.fieldName 24 25 def Eval(self, metadata): 26 return metadata[self.fieldName] 27 28 29class _Constant(object): 30 31 def __init__(self, constant): 32 self.constant = constant 33 34 def AsQueryString(self): 35 if isinstance(self.constant, list): 36 # Tuples 37 return '(%s)' % ','.join([c.AsQueryString() for c in self.constant]) 38 elif isinstance(self.constant, (datetime.datetime, datetime.date)): 39 # Dates. 40 return self.constant.strftime("Date(%Y-%m-%d %H:%M:%S.%f)") 41 elif isinstance(self.constant, str): 42 # Strings need quotes. 43 return "'%s'" % self.constant 44 # Everything else from the eval() statement below. 45 return str(self.constant) 46 47 def Eval(self, metadata): 48 # pylint: disable=unused-argument 49 return self.constant 50 51 52def _StringToValue(s): 53 try: 54 constant = eval(s, {}, {}) 55 return _Constant(constant) 56 except Exception: # pylint: disable=bare-except 57 pass 58 59 # Barewords are assumed to be fields. 60 m = re.match(r'([a-zA-Z0-9_]+)$', s) 61 if m: 62 return _ReadField(m.group(1)) 63 64 # Tuples. 65 m = re.match(r'\(.+\)$', s) 66 if m: 67 items = m.group(0).split(r',\s*') 68 return _Constant([_StringToValue(x) for x in items]) 69 70 # Dates. 71 m = re.match(r'Date\((.+)\)$', s) 72 if m: 73 d = datetime.datetime.strptime(m.group(1), "%Y-%m-%d %H:%M:%S.%f") 74 return _Constant(d) 75 76 # Dunno! 77 raise NotImplementedError() 78 79_OPERATORS = { 80 '=': operator.eq, 81 '<': operator.lt, 82 '<=': operator.le, 83 '>': operator.gt, 84 '>=': operator.ge, 85 '!=': operator.ne, 86 ' IN ': _InOp # Spaces matter for proper parsing. 87} 88 89 90def _OperatorToString(op): 91 for k, v in _OPERATORS.iteritems(): 92 if op == v: 93 return k 94 raise NotImplementedError() 95 96 97# Since we use find(token) in our actual tokenizing function, 98# we need to search for the longest tokens first so that '<=' is searched 99# for first, bofore '<', for instance. 100_TOKEN_SEARCH_ORDER = list(_OPERATORS.keys()) 101_TOKEN_SEARCH_ORDER.sort(lambda x, y: len(y) - len(x)) 102 103 104class Filter(object): 105 106 def __init__(self, a, op, b): 107 self.a = a 108 self.op = op 109 self.b = b 110 111 def Eval(self, metadata): 112 return self.op(self.a.Eval(metadata), 113 self.b.Eval(metadata)) 114 115 def AsQueryString(self): 116 # strip() the operator in case we get " IN ", just so that it's not 117 # double spaced. 118 return '%s %s %s' % (self.a.AsQueryString(), 119 _OperatorToString(self.op).strip(), 120 self.b.AsQueryString()) 121 122 @staticmethod 123 def FromString(s): 124 found_op_key = None 125 found_op_key_idx = -1 126 for op_key in _TOKEN_SEARCH_ORDER: 127 i = s.find(op_key) 128 if i != -1: 129 found_op_key_idx = i 130 found_op_key = op_key 131 break 132 133 if found_op_key_idx == -1: 134 raise Exception('Expected: operator') 135 136 lvalue = s[:found_op_key_idx] 137 rvalue = s[found_op_key_idx + len(found_op_key):] 138 139 lvalue = lvalue.strip() 140 rvalue = rvalue.strip() 141 142 lvalue = _StringToValue(lvalue) 143 rvalue = _StringToValue(rvalue) 144 145 if not isinstance(lvalue, _ReadField) or not isinstance(rvalue, _Constant): 146 # GQL Syntax needs the property on the left and the value on the right. 147 # https://cloud.google.com/appengine/docs/python/datastore/gqlreference 148 raise Exception('Expected lvalue field and rvalue constant') 149 150 return Filter(lvalue, 151 _OPERATORS[found_op_key], 152 rvalue) 153 154 155class CorpusQuery(object): 156 157 def __init__(self): 158 self.max_trace_handles = None 159 self.filters = [] 160 161 def AsGQLWhereClause(self): 162 gql = '' 163 args = [] 164 if self.filters: 165 filter_strings = [] 166 for f in self.filters: 167 # Constants need to be passed back as positional arguments to avoid 168 # potential gql injection problems. 169 a_string = '' 170 if isinstance(f.a, _Constant): 171 args.append(f.a.constant) 172 a_string = ':%d' % len(args) 173 else: 174 a_string = f.a.fieldName 175 176 b_string = '' 177 if isinstance(f.b, _Constant): 178 args.append(f.b.constant) 179 b_string = ':%d' % len(args) 180 else: 181 b_string = f.b.fieldName 182 183 filter_strings.append('%s %s %s' % (a_string, 184 _OperatorToString(f.op).strip(), 185 b_string)) 186 gql = 'WHERE ' + ' AND '.join(filter_strings) 187 if self.max_trace_handles: 188 gql += ' LIMIT %d' % self.max_trace_handles 189 # strip() the final GQL in case it's just " LIMIT 1", just so that it looks 190 # a bit nicer. 191 return (gql.strip(), args) 192 193 def AsQueryString(self): 194 filter_strings = [f.AsQueryString() for f in self.filters] 195 if self.max_trace_handles: 196 filter_strings.append('MAX_TRACE_HANDLES=%d' % self.max_trace_handles) 197 query_str = ' AND '.join(filter_strings) 198 return query_str 199 200 @staticmethod 201 def FromString(filterString): 202 """This follows the same filter rules as GQL""" 203 if filterString == 'True' or filterString == '': 204 return CorpusQuery() 205 206 q = CorpusQuery() 207 exprs = filterString.split(' AND ') 208 for expr in exprs: 209 m = re.match(r'MAX_TRACE_HANDLES\s*=\s*(\d+)', expr) 210 if m: 211 q.max_trace_handles = int(m.group(1)) 212 continue 213 214 f = Filter.FromString(expr) 215 q.filters.append(f) 216 217 return q 218 219 def Eval(self, metadata, num_trace_handles_so_far=0): 220 if not self.max_trace_handles is None: 221 if num_trace_handles_so_far >= self.max_trace_handles: 222 return False 223 224 for flt in self.filters: 225 if not flt.Eval(metadata): 226 return False 227 228 return True 229