1#!/usr/bin/env python
2# Copyright 2010 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Represents a lexographic range of namespaces."""
17
18
19
20# pylint: disable=g-bad-name
21
22__all__ = [
23    'NAMESPACE_CHARACTERS',
24    'MAX_NAMESPACE_LENGTH',
25    'MAX_NAMESPACE',
26    'MIN_NAMESPACE',
27    'NAMESPACE_BATCH_SIZE',
28    'NamespaceRange',
29    'get_namespace_keys',
30]
31
32import itertools
33import string
34
35from google.appengine.api import datastore
36from google.appengine.ext import db
37from google.appengine.ext.db import metadata
38
39NAMESPACE_CHARACTERS = ''.join(sorted(string.digits +
40                                      string.lowercase +
41                                      string.uppercase +
42                                      '._-'))
43MAX_NAMESPACE_LENGTH = 100
44MIN_NAMESPACE = ''
45NAMESPACE_BATCH_SIZE = 50
46
47
48def _setup_constants(alphabet=NAMESPACE_CHARACTERS,
49                     max_length=MAX_NAMESPACE_LENGTH,
50                     batch_size=NAMESPACE_BATCH_SIZE):
51  """Calculate derived constant values. Only useful for testing."""
52
53  global NAMESPACE_CHARACTERS
54  global MAX_NAMESPACE_LENGTH
55  # pylint: disable=global-variable-undefined
56  global MAX_NAMESPACE
57  global _LEX_DISTANCE
58  global NAMESPACE_BATCH_SIZE
59
60  NAMESPACE_CHARACTERS = alphabet
61  MAX_NAMESPACE_LENGTH = max_length
62  MAX_NAMESPACE = NAMESPACE_CHARACTERS[-1] * MAX_NAMESPACE_LENGTH
63  NAMESPACE_BATCH_SIZE = batch_size
64
65  # _LEX_DISTANCE will contain the lexical distance between two adjacent
66  # characters in NAMESPACE_CHARACTERS at each character index. This is used
67  # to calculate the ordinal for each string. Example:
68  # NAMESPACE_CHARACTERS = 'ab'
69  # MAX_NAMESPACE_LENGTH = 3
70  # _LEX_DISTANCE = [1, 3, 7]
71  # ''    => 0
72  # 'a'   => 1
73  # 'aa'  => 2
74  # 'aaa' => 3
75  # 'aab' => 4 - Distance between 'aaa' and 'aab' is 1.
76  # 'ab'  => 5 - Distance between 'aa' and 'ab' is 3.
77  # 'aba' => 6
78  # 'abb' => 7
79  # 'b'   => 8 - Distance between 'a' and 'b' is 7.
80  # 'ba'  => 9
81  # 'baa' => 10
82  # 'bab' => 11
83  # ...
84  # _namespace_to_ord('bab') = (1 * 7 + 1) + (0 * 3 + 1) + (1 * 1 + 1) = 11
85  _LEX_DISTANCE = [1]
86  for i in range(1, MAX_NAMESPACE_LENGTH):
87    _LEX_DISTANCE.append(
88        _LEX_DISTANCE[i-1] * len(NAMESPACE_CHARACTERS) + 1)
89  # pylint: disable=undefined-loop-variable
90  del i
91_setup_constants()
92
93
94def _ord_to_namespace(n, _max_length=None):
95  """Convert a namespace ordinal to a namespace string.
96
97  Converts an int, representing the sequence number of a namespace ordered
98  lexographically, into a namespace string.
99
100  >>> _ord_to_namespace(0)
101  ''
102  >>> _ord_to_namespace(1)
103  '-'
104  >>> _ord_to_namespace(2)
105  '--'
106  >>> _ord_to_namespace(3)
107  '---'
108
109  Args:
110    n: A number representing the lexographical ordering of a namespace.
111    _max_length: The maximum namespace length.
112  Returns:
113    A string representing the nth namespace in lexographical order.
114  """
115  if _max_length is None:
116    _max_length = MAX_NAMESPACE_LENGTH
117
118  length = _LEX_DISTANCE[_max_length - 1]
119  if n == 0:
120    return ''
121  n -= 1
122  return (NAMESPACE_CHARACTERS[n / length] +
123          _ord_to_namespace(n % length, _max_length - 1))
124
125
126def _namespace_to_ord(namespace):
127  """Converts a namespace string into an int representing its lexographic order.
128
129  >>> _namespace_to_ord('')
130  ''
131  >>> _namespace_to_ord('_')
132  1
133  >>> _namespace_to_ord('__')
134  2
135
136  Args:
137    namespace: A namespace string.
138
139  Returns:
140    An int representing the lexographical order of the given namespace string.
141  """
142  n = 0
143  for i, c in enumerate(namespace):
144    n += (_LEX_DISTANCE[MAX_NAMESPACE_LENGTH - i- 1] *
145          NAMESPACE_CHARACTERS.index(c)
146          + 1)
147  return n
148
149
150def _key_for_namespace(namespace, app):
151  """Return the __namespace__ key for a namespace.
152
153  Args:
154    namespace: The namespace whose key is requested.
155    app: The id of the application that the key belongs to.
156
157  Returns:
158    A db.Key representing the namespace.
159  """
160  if namespace:
161    return db.Key.from_path(metadata.Namespace.KIND_NAME,
162                            namespace,
163                            _app=app)
164  else:
165    return db.Key.from_path(metadata.Namespace.KIND_NAME,
166                            metadata.Namespace.EMPTY_NAMESPACE_ID,
167                            _app=app)
168
169
170class NamespaceRange(object):
171  """An inclusive lexographical range of namespaces.
172
173  This class is immutable.
174  """
175
176  def __init__(self,
177               namespace_start=None,
178               namespace_end=None,
179               _app=None):
180    # pylint: disable=g-doc-args
181    """Initializes a NamespaceRange instance.
182
183    Args:
184      namespace_start: A string representing the start of the namespace range.
185          namespace_start is included in the range. If namespace_start is None
186          then the lexographically first namespace is used.
187      namespace_end: A string representing the end of the namespace range.
188          namespace_end is included in the range and must be >= namespace_start.
189          If namespace_end is None then the lexographically last namespace is
190          used.
191
192    Raises:
193      ValueError: if namespace_start > namespace_end.
194    """
195    if namespace_start is None:
196      namespace_start = MIN_NAMESPACE
197
198    if namespace_end is None:
199      namespace_end = MAX_NAMESPACE
200
201    if namespace_start > namespace_end:
202      raise ValueError('namespace_start (%r) > namespace_end (%r)' % (
203          namespace_start, namespace_end))
204    self.__namespace_start = namespace_start
205    self.__namespace_end = namespace_end
206    self.__app = _app
207
208  @property
209  def app(self):
210    return self.__app
211
212  @property
213  def namespace_start(self):
214    return self.__namespace_start
215
216  @property
217  def namespace_end(self):
218    return self.__namespace_end
219
220  @property
221  def is_single_namespace(self):
222    """True if the namespace range only includes a single namespace."""
223    return self.namespace_start == self.namespace_end
224
225  def split_range(self):
226    """Splits the NamespaceRange into two nearly equal-sized ranges.
227
228    Returns:
229      If this NamespaceRange contains a single namespace then a list containing
230      this NamespaceRange is returned. Otherwise a two-element list containing
231      two NamespaceRanges whose total range is identical to this
232      NamespaceRange's is returned.
233    """
234    if self.is_single_namespace:
235      return [self]
236
237    mid_point = (_namespace_to_ord(self.namespace_start) +
238                 _namespace_to_ord(self.namespace_end)) // 2
239
240    return [NamespaceRange(self.namespace_start,
241                           _ord_to_namespace(mid_point),
242                           _app=self.app),
243            NamespaceRange(_ord_to_namespace(mid_point+1),
244                           self.namespace_end,
245                           _app=self.app)]
246
247  def __copy__(self):
248    return self.__class__(self.__namespace_start,
249                          self.__namespace_end,
250                          self.__app)
251
252  def __eq__(self, o):
253    return (self.namespace_start == o.namespace_start and
254            self.namespace_end == o.namespace_end)
255
256  def __hash__(self):
257    return hash((self.namespace_start, self.namespace_end, self.app))
258
259  def __repr__(self):
260    if self.app is None:
261      return 'NamespaceRange(namespace_start=%r, namespace_end=%r)' % (
262          self.namespace_start, self.namespace_end)
263    else:
264      return 'NamespaceRange(namespace_start=%r, namespace_end=%r, _app=%r)' % (
265          self.namespace_start, self.namespace_end, self.app)
266
267  def with_start_after(self, after_namespace):
268    """Returns a copy of this NamespaceName with a new namespace_start.
269
270    Args:
271      after_namespace: A namespace string.
272
273    Returns:
274      A NamespaceRange object whose namespace_start is the lexographically next
275      namespace after the given namespace string.
276
277    Raises:
278      ValueError: if the NamespaceRange includes only a single namespace.
279    """
280    namespace_start = _ord_to_namespace(_namespace_to_ord(after_namespace) + 1)
281    return NamespaceRange(namespace_start, self.namespace_end, _app=self.app)
282
283  def make_datastore_query(self, cursor=None):
284    """Returns a datastore.Query that generates all namespaces in the range.
285
286    Args:
287      cursor: start cursor for the query.
288
289    Returns:
290      A datastore.Query instance that generates db.Keys for each namespace in
291      the NamespaceRange.
292    """
293    filters = {}
294    filters['__key__ >= '] = _key_for_namespace(
295        self.namespace_start, self.app)
296    filters['__key__ <= '] = _key_for_namespace(
297        self.namespace_end, self.app)
298
299    return datastore.Query('__namespace__',
300                           filters=filters,
301                           keys_only=True,
302                           cursor=cursor,
303                           _app=self.app)
304
305  def normalized_start(self):
306    """Returns a NamespaceRange with leading non-existant namespaces removed.
307
308    Returns:
309      A copy of this NamespaceRange whose namespace_start is adjusted to exclude
310      the portion of the range that contains no actual namespaces in the
311      datastore. None is returned if the NamespaceRange contains no actual
312      namespaces in the datastore.
313    """
314    namespaces_after_key = list(self.make_datastore_query().Run(limit=1))
315
316    if not namespaces_after_key:
317      return None
318
319    namespace_after_key = namespaces_after_key[0].name() or ''
320    return NamespaceRange(namespace_after_key,
321                          self.namespace_end,
322                          _app=self.app)
323
324  def to_json_object(self):
325    """Returns a dict representation that can be serialized to JSON."""
326    obj_dict = dict(namespace_start=self.namespace_start,
327                    namespace_end=self.namespace_end)
328    if self.app is not None:
329      obj_dict['app'] = self.app
330    return obj_dict
331
332  @classmethod
333  def from_json_object(cls, json):
334    """Returns a NamespaceRange from an object deserialized from JSON."""
335    return cls(json['namespace_start'],
336               json['namespace_end'],
337               _app=json.get('app'))
338
339  # TODO(user): Implement an option where the returned namespace range is
340  # not normalized using with_start_after to support consistent namespace
341  # queries.
342  @classmethod
343  def split(cls,
344            n,
345            contiguous,
346            can_query=itertools.chain(itertools.repeat(True, 50),
347                                      itertools.repeat(False)).next,
348            _app=None):
349    # pylint: disable=g-doc-args
350    """Splits the complete NamespaceRange into n equally-sized NamespaceRanges.
351
352    Args:
353      n: The maximum number of NamespaceRanges to return. Fewer than n
354          namespaces may be returned.
355      contiguous: If True then the returned NamespaceRanges will cover the
356          entire space of possible namespaces (i.e. from MIN_NAMESPACE to
357          MAX_NAMESPACE) without gaps. If False then the returned
358          NamespaceRanges may exclude namespaces that don't appear in the
359          datastore.
360      can_query: A function that returns True if split() can query the datastore
361          to generate more fair namespace range splits, and False otherwise.
362          If not set then split() is allowed to make 50 datastore queries.
363
364    Returns:
365      A list of at most n NamespaceRanges representing a near-equal distribution
366      of actual existant datastore namespaces. The returned list will be sorted
367      lexographically.
368
369    Raises:
370      ValueError: if n is < 1.
371    """
372    if n < 1:
373      raise ValueError('n must be >= 1')
374
375    ranges = None
376    if can_query():
377      if not contiguous:
378        ns_keys = get_namespace_keys(_app, n + 1)
379        if not ns_keys:
380          return []
381        else:
382          if len(ns_keys) <= n:
383            # If you have less actual namespaces than number of NamespaceRanges
384            # to return, then just return the list of those namespaces.
385            ns_range = []
386            for ns_key in ns_keys:
387              ns_range.append(NamespaceRange(ns_key.name() or '',
388                                             ns_key.name() or '',
389                                             _app=_app))
390            return sorted(ns_range,
391                          key=lambda ns_range: ns_range.namespace_start)
392          # Use the first key and save the initial normalized_start() call.
393          ranges = [NamespaceRange(ns_keys[0].name() or '', _app=_app)]
394      else:
395        ns_range = NamespaceRange(_app=_app).normalized_start()
396        if ns_range is None:
397          return [NamespaceRange(_app=_app)]
398        ranges = [ns_range]
399    else:
400      ranges = [NamespaceRange(_app=_app)]
401
402    singles = []
403    while ranges and (len(ranges) + len(singles)) < n:
404      namespace_range = ranges.pop(0)
405      if namespace_range.is_single_namespace:
406        singles.append(namespace_range)
407      else:
408        left, right = namespace_range.split_range()
409        if can_query():
410          right = right.normalized_start()
411        if right is not None:
412          ranges.append(right)
413        ranges.append(left)
414    ns_ranges = sorted(singles + ranges,
415                       key=lambda ns_range: ns_range.namespace_start)
416
417    if contiguous:
418      if not ns_ranges:
419        # This condition is possible if every namespace was deleted after the
420        # first call to ns_range.normalized_start().
421        return [NamespaceRange(_app=_app)]
422
423      continuous_ns_ranges = []
424      for i in range(len(ns_ranges)):
425        if i == 0:
426          namespace_start = MIN_NAMESPACE
427        else:
428          namespace_start = ns_ranges[i].namespace_start
429
430        if i == len(ns_ranges) - 1:
431          namespace_end = MAX_NAMESPACE
432        else:
433          namespace_end = _ord_to_namespace(
434              _namespace_to_ord(ns_ranges[i+1].namespace_start) - 1)
435
436        continuous_ns_ranges.append(NamespaceRange(namespace_start,
437                                                   namespace_end,
438                                                   _app=_app))
439      return continuous_ns_ranges
440    else:
441      return ns_ranges
442
443  def __iter__(self):
444    """Iterate over all the namespaces within this range."""
445    cursor = None
446    while True:
447      query = self.make_datastore_query(cursor=cursor)
448      count = 0
449      for ns_key in query.Run(limit=NAMESPACE_BATCH_SIZE):
450        count += 1
451        yield ns_key.name() or ''
452      if count < NAMESPACE_BATCH_SIZE:
453        break
454      cursor = query.GetCursor()
455
456
457def get_namespace_keys(app, limit):
458  """Get namespace keys."""
459  ns_query = datastore.Query('__namespace__', keys_only=True, _app=app)
460  return list(ns_query.Run(limit=limit, batch_size=limit))
461