1"""Manage shelves of pickled objects.
2
3A "shelf" is a persistent, dictionary-like object.  The difference
4with dbm databases is that the values (not the keys!) in a shelf can
5be essentially arbitrary Python objects -- anything that the "pickle"
6module can handle.  This includes most class instances, recursive data
7types, and objects containing lots of shared sub-objects.  The keys
8are ordinary strings.
9
10To summarize the interface (key is a string, data is an arbitrary
11object):
12
13        import shelve
14        d = shelve.open(filename) # open, with (g)dbm filename -- no suffix
15
16        d[key] = data   # store data at key (overwrites old data if
17                        # using an existing key)
18        data = d[key]   # retrieve a COPY of the data at key (raise
19                        # KeyError if no such key) -- NOTE that this
20                        # access returns a *copy* of the entry!
21        del d[key]      # delete data stored at key (raises KeyError
22                        # if no such key)
23        flag = d.has_key(key)   # true if the key exists; same as "key in d"
24        list = d.keys() # a list of all existing keys (slow!)
25
26        d.close()       # close it
27
28Dependent on the implementation, closing a persistent dictionary may
29or may not be necessary to flush changes to disk.
30
31Normally, d[key] returns a COPY of the entry.  This needs care when
32mutable entries are mutated: for example, if d[key] is a list,
33        d[key].append(anitem)
34does NOT modify the entry d[key] itself, as stored in the persistent
35mapping -- it only modifies the copy, which is then immediately
36discarded, so that the append has NO effect whatsoever.  To append an
37item to d[key] in a way that will affect the persistent mapping, use:
38        data = d[key]
39        data.append(anitem)
40        d[key] = data
41
42To avoid the problem with mutable entries, you may pass the keyword
43argument writeback=True in the call to shelve.open.  When you use:
44        d = shelve.open(filename, writeback=True)
45then d keeps a cache of all entries you access, and writes them all back
46to the persistent mapping when you call d.close().  This ensures that
47such usage as d[key].append(anitem) works as intended.
48
49However, using keyword argument writeback=True may consume vast amount
50of memory for the cache, and it may make d.close() very slow, if you
51access many of d's entries after opening it in this way: d has no way to
52check which of the entries you access are mutable and/or which ones you
53actually mutate, so it must cache, and write back at close, all of the
54entries that you access.  You can call d.sync() to write back all the
55entries in the cache, and empty the cache (d.sync() also synchronizes
56the persistent dictionary on disk, if feasible).
57"""
58
59# Try using cPickle and cStringIO if available.
60
61try:
62    from cPickle import Pickler, Unpickler
63except ImportError:
64    from pickle import Pickler, Unpickler
65
66try:
67    from cStringIO import StringIO
68except ImportError:
69    from StringIO import StringIO
70
71import UserDict
72
73__all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"]
74
75class _ClosedDict(UserDict.DictMixin):
76    'Marker for a closed dict.  Access attempts raise a ValueError.'
77
78    def closed(self, *args):
79        raise ValueError('invalid operation on closed shelf')
80    __getitem__ = __setitem__ = __delitem__ = keys = closed
81
82    def __repr__(self):
83        return '<Closed Dictionary>'
84
85class Shelf(UserDict.DictMixin):
86    """Base class for shelf implementations.
87
88    This is initialized with a dictionary-like object.
89    See the module's __doc__ string for an overview of the interface.
90    """
91
92    def __init__(self, dict, protocol=None, writeback=False):
93        self.dict = dict
94        if protocol is None:
95            protocol = 0
96        self._protocol = protocol
97        self.writeback = writeback
98        self.cache = {}
99
100    def keys(self):
101        return self.dict.keys()
102
103    def __len__(self):
104        return len(self.dict)
105
106    def has_key(self, key):
107        return key in self.dict
108
109    def __contains__(self, key):
110        return key in self.dict
111
112    def get(self, key, default=None):
113        if key in self.dict:
114            return self[key]
115        return default
116
117    def __getitem__(self, key):
118        try:
119            value = self.cache[key]
120        except KeyError:
121            f = StringIO(self.dict[key])
122            value = Unpickler(f).load()
123            if self.writeback:
124                self.cache[key] = value
125        return value
126
127    def __setitem__(self, key, value):
128        if self.writeback:
129            self.cache[key] = value
130        f = StringIO()
131        p = Pickler(f, self._protocol)
132        p.dump(value)
133        self.dict[key] = f.getvalue()
134
135    def __delitem__(self, key):
136        del self.dict[key]
137        try:
138            del self.cache[key]
139        except KeyError:
140            pass
141
142    def close(self):
143        if self.dict is None:
144            return
145        try:
146            self.sync()
147            try:
148                self.dict.close()
149            except AttributeError:
150                pass
151        finally:
152            # Catch errors that may happen when close is called from __del__
153            # because CPython is in interpreter shutdown.
154            try:
155                self.dict = _ClosedDict()
156            except:
157                self.dict = None
158
159    def __del__(self):
160        if not hasattr(self, 'writeback'):
161            # __init__ didn't succeed, so don't bother closing
162            return
163        self.close()
164
165    def sync(self):
166        if self.writeback and self.cache:
167            self.writeback = False
168            for key, entry in self.cache.iteritems():
169                self[key] = entry
170            self.writeback = True
171            self.cache = {}
172        if hasattr(self.dict, 'sync'):
173            self.dict.sync()
174
175
176class BsdDbShelf(Shelf):
177    """Shelf implementation using the "BSD" db interface.
178
179    This adds methods first(), next(), previous(), last() and
180    set_location() that have no counterpart in [g]dbm databases.
181
182    The actual database must be opened using one of the "bsddb"
183    modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or
184    bsddb.rnopen) and passed to the constructor.
185
186    See the module's __doc__ string for an overview of the interface.
187    """
188
189    def __init__(self, dict, protocol=None, writeback=False):
190        Shelf.__init__(self, dict, protocol, writeback)
191
192    def set_location(self, key):
193        (key, value) = self.dict.set_location(key)
194        f = StringIO(value)
195        return (key, Unpickler(f).load())
196
197    def next(self):
198        (key, value) = self.dict.next()
199        f = StringIO(value)
200        return (key, Unpickler(f).load())
201
202    def previous(self):
203        (key, value) = self.dict.previous()
204        f = StringIO(value)
205        return (key, Unpickler(f).load())
206
207    def first(self):
208        (key, value) = self.dict.first()
209        f = StringIO(value)
210        return (key, Unpickler(f).load())
211
212    def last(self):
213        (key, value) = self.dict.last()
214        f = StringIO(value)
215        return (key, Unpickler(f).load())
216
217
218class DbfilenameShelf(Shelf):
219    """Shelf implementation using the "anydbm" generic dbm interface.
220
221    This is initialized with the filename for the dbm database.
222    See the module's __doc__ string for an overview of the interface.
223    """
224
225    def __init__(self, filename, flag='c', protocol=None, writeback=False):
226        import anydbm
227        Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback)
228
229
230def open(filename, flag='c', protocol=None, writeback=False):
231    """Open a persistent dictionary for reading and writing.
232
233    The filename parameter is the base filename for the underlying
234    database.  As a side-effect, an extension may be added to the
235    filename and more than one file may be created.  The optional flag
236    parameter has the same interpretation as the flag parameter of
237    anydbm.open(). The optional protocol parameter specifies the
238    version of the pickle protocol (0, 1, or 2).
239
240    See the module's __doc__ string for an overview of the interface.
241    """
242
243    return DbfilenameShelf(filename, flag, protocol, writeback)
244