1# Copyright (c) 2015 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""GetTraccHandlesQuery is designed to be either evaluable directly
5from python, or be convertable to an Appengine datastore query. As a result,
6exercise discretion when adding features to this class.
7"""
8import operator
9import re
10import datetime
11
12
13def _InOp(a, b):
14  return a in b
15
16
17class _ReadField(object):
18
19  def __init__(self, fieldName):
20    self.fieldName = fieldName
21
22  def AsQueryString(self):
23    return self.fieldName
24
25  def Eval(self, metadata):
26    return metadata[self.fieldName]
27
28
29class _Constant(object):
30
31  def __init__(self, constant):
32    self.constant = constant
33
34  def AsQueryString(self):
35    if isinstance(self.constant, list):
36      # Tuples
37      return '(%s)' % ','.join([c.AsQueryString() for c in self.constant])
38    elif isinstance(self.constant, (datetime.datetime, datetime.date)):
39      # Dates.
40      return self.constant.strftime("Date(%Y-%m-%d %H:%M:%S.%f)")
41    elif isinstance(self.constant, str):
42      # Strings need quotes.
43      return "'%s'" % self.constant
44    # Everything else from the eval() statement below.
45    return str(self.constant)
46
47  def Eval(self, metadata):
48    # pylint: disable=unused-argument
49    return self.constant
50
51
52def _StringToValue(s):
53  try:
54    constant = eval(s, {}, {})
55    return _Constant(constant)
56  except Exception:  # pylint: disable=bare-except
57    pass
58
59  # Barewords are assumed to be fields.
60  m = re.match(r'([a-zA-Z0-9_]+)$', s)
61  if m:
62    return _ReadField(m.group(1))
63
64  # Tuples.
65  m = re.match(r'\(.+\)$', s)
66  if m:
67    items = m.group(0).split(r',\s*')
68    return _Constant([_StringToValue(x) for x in items])
69
70  # Dates.
71  m = re.match(r'Date\((.+)\)$', s)
72  if m:
73    d = datetime.datetime.strptime(m.group(1), "%Y-%m-%d %H:%M:%S.%f")
74    return _Constant(d)
75
76  # Dunno!
77  raise NotImplementedError()
78
79_OPERATORS = {
80  '=': operator.eq,
81  '<': operator.lt,
82  '<=': operator.le,
83  '>': operator.gt,
84  '>=': operator.ge,
85  '!=': operator.ne,
86  ' IN ': _InOp  # Spaces matter for proper parsing.
87}
88
89
90def _OperatorToString(op):
91  for k, v in _OPERATORS.iteritems():
92    if op == v:
93      return k
94  raise NotImplementedError()
95
96
97# Since we use find(token) in our actual tokenizing function,
98# we need to search for the longest tokens first so that '<=' is searched
99# for first, bofore '<', for instance.
100_TOKEN_SEARCH_ORDER = list(_OPERATORS.keys())
101_TOKEN_SEARCH_ORDER.sort(lambda x, y: len(y) - len(x))
102
103
104class Filter(object):
105
106  def __init__(self, a, op, b):
107    self.a = a
108    self.op = op
109    self.b = b
110
111  def Eval(self, metadata):
112    return self.op(self.a.Eval(metadata),
113                   self.b.Eval(metadata))
114
115  def AsQueryString(self):
116    # strip() the operator in case we get " IN ", just so that it's not
117    # double spaced.
118    return '%s %s %s' % (self.a.AsQueryString(),
119                         _OperatorToString(self.op).strip(),
120                         self.b.AsQueryString())
121
122  @staticmethod
123  def FromString(s):
124    found_op_key = None
125    found_op_key_idx = -1
126    for op_key in _TOKEN_SEARCH_ORDER:
127      i = s.find(op_key)
128      if i != -1:
129        found_op_key_idx = i
130        found_op_key = op_key
131        break
132
133    if found_op_key_idx == -1:
134      raise Exception('Expected: operator')
135
136    lvalue = s[:found_op_key_idx]
137    rvalue = s[found_op_key_idx + len(found_op_key):]
138
139    lvalue = lvalue.strip()
140    rvalue = rvalue.strip()
141
142    lvalue = _StringToValue(lvalue)
143    rvalue = _StringToValue(rvalue)
144
145    if not isinstance(lvalue, _ReadField) or not isinstance(rvalue, _Constant):
146      # GQL Syntax needs the property on the left and the value on the right.
147      # https://cloud.google.com/appengine/docs/python/datastore/gqlreference
148      raise Exception('Expected lvalue field and rvalue constant')
149
150    return Filter(lvalue,
151                  _OPERATORS[found_op_key],
152                  rvalue)
153
154
155class CorpusQuery(object):
156
157  def __init__(self):
158    self.max_trace_handles = None
159    self.filters = []
160
161  def AsGQLWhereClause(self):
162    gql = ''
163    args = []
164    if self.filters:
165      filter_strings = []
166      for f in self.filters:
167        # Constants need to be passed back as positional arguments to avoid
168        # potential gql injection problems.
169        a_string = ''
170        if isinstance(f.a, _Constant):
171          args.append(f.a.constant)
172          a_string = ':%d' % len(args)
173        else:
174          a_string = f.a.fieldName
175
176        b_string = ''
177        if isinstance(f.b, _Constant):
178          args.append(f.b.constant)
179          b_string = ':%d' % len(args)
180        else:
181          b_string = f.b.fieldName
182
183        filter_strings.append('%s %s %s' % (a_string,
184                                             _OperatorToString(f.op).strip(),
185                                             b_string))
186      gql = 'WHERE ' + ' AND '.join(filter_strings)
187    if self.max_trace_handles:
188      gql += ' LIMIT %d' % self.max_trace_handles
189    # strip() the final GQL in case it's just " LIMIT 1", just so that it looks
190    # a bit nicer.
191    return (gql.strip(), args)
192
193  def AsQueryString(self):
194    filter_strings = [f.AsQueryString() for f in self.filters]
195    if self.max_trace_handles:
196      filter_strings.append('MAX_TRACE_HANDLES=%d' % self.max_trace_handles)
197    query_str = ' AND '.join(filter_strings)
198    return query_str
199
200  @staticmethod
201  def FromString(filterString):
202    """This follows the same filter rules as GQL"""
203    if filterString == 'True' or filterString == '':
204      return CorpusQuery()
205
206    q = CorpusQuery()
207    exprs = filterString.split(' AND ')
208    for expr in exprs:
209      m = re.match(r'MAX_TRACE_HANDLES\s*=\s*(\d+)', expr)
210      if m:
211        q.max_trace_handles = int(m.group(1))
212        continue
213
214      f = Filter.FromString(expr)
215      q.filters.append(f)
216
217    return q
218
219  def Eval(self, metadata, num_trace_handles_so_far=0):
220    if not self.max_trace_handles is None:
221      if num_trace_handles_so_far >= self.max_trace_handles:
222        return False
223
224    for flt in self.filters:
225      if not flt.Eval(metadata):
226        return False
227
228    return True
229