dumbdbm.py revision 793d4b49361a010e4baa485ec77e3adc430f0236
19f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum"""A dumb and slow but simple dbm clone.
29f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
39f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van RossumFor database spam, spam.dir contains the index (a text file),
49f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumspam.bak *may* contain a backup of the index (also a text file),
59f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumwhile spam.dat contains the data (a binary file).
69f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
79f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van RossumXXX TO DO:
89f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
99f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- seems to contain a bug when updating...
109f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
119f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- reclaim free space (currently, space once occupied by deleted or expanded
129f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumitems is never reused)
139f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
149f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- support concurrent access (currently, if two processes take turns making
159f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumupdates, they can mess up the index)
169f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
179f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- support efficient access to large databases (currently, the whole index
189f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumis read when the database is opened, and some updates rewrite the whole index)
199f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
209f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- support opening for read-only (flag = 'm')
219f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
229f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum"""
239f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
24d0cd95ce7febd97683d6e113e1b486b93bcf88ebMartin v. Löwisimport os as _os
259f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumimport __builtin__
269f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
279f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum_open = __builtin__.open
289f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
299f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum_BLOCKSIZE = 512
309f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
3188869f9787cd4ceb2298e4b13980beb057687824Tim Peterserror = IOError                         # For anydbm
3222a18904e43c47902ae1e58922abeb87b530b0aeGuido van Rossum
339f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumclass _Database:
349f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
352c8373bc234039fff699df9c28d2cb74efd6a37cFred Drake    def __init__(self, file, mode):
362c8373bc234039fff699df9c28d2cb74efd6a37cFred Drake        self._mode = mode
37e2ae77b8b8a62e648bb1864a9b36ef3280984404Guido van Rossum        self._dirfile = file + _os.extsep + 'dir'
38e2ae77b8b8a62e648bb1864a9b36ef3280984404Guido van Rossum        self._datfile = file + _os.extsep + 'dat'
39e2ae77b8b8a62e648bb1864a9b36ef3280984404Guido van Rossum        self._bakfile = file + _os.extsep + 'bak'
4088869f9787cd4ceb2298e4b13980beb057687824Tim Peters        # Mod by Jack: create data file if needed
4188869f9787cd4ceb2298e4b13980beb057687824Tim Peters        try:
4288869f9787cd4ceb2298e4b13980beb057687824Tim Peters            f = _open(self._datfile, 'r')
4388869f9787cd4ceb2298e4b13980beb057687824Tim Peters        except IOError:
442c8373bc234039fff699df9c28d2cb74efd6a37cFred Drake            f = _open(self._datfile, 'w', self._mode)
4588869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.close()
4688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        self._update()
4788869f9787cd4ceb2298e4b13980beb057687824Tim Peters
4888869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def _update(self):
4988869f9787cd4ceb2298e4b13980beb057687824Tim Peters        self._index = {}
5088869f9787cd4ceb2298e4b13980beb057687824Tim Peters        try:
5188869f9787cd4ceb2298e4b13980beb057687824Tim Peters            f = _open(self._dirfile)
5288869f9787cd4ceb2298e4b13980beb057687824Tim Peters        except IOError:
5388869f9787cd4ceb2298e4b13980beb057687824Tim Peters            pass
5488869f9787cd4ceb2298e4b13980beb057687824Tim Peters        else:
5588869f9787cd4ceb2298e4b13980beb057687824Tim Peters            while 1:
5688869f9787cd4ceb2298e4b13980beb057687824Tim Peters                line = f.readline().rstrip()
5788869f9787cd4ceb2298e4b13980beb057687824Tim Peters                if not line: break
5888869f9787cd4ceb2298e4b13980beb057687824Tim Peters                key, (pos, siz) = eval(line)
5988869f9787cd4ceb2298e4b13980beb057687824Tim Peters                self._index[key] = (pos, siz)
6088869f9787cd4ceb2298e4b13980beb057687824Tim Peters            f.close()
6188869f9787cd4ceb2298e4b13980beb057687824Tim Peters
6288869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def _commit(self):
6388869f9787cd4ceb2298e4b13980beb057687824Tim Peters        try: _os.unlink(self._bakfile)
6488869f9787cd4ceb2298e4b13980beb057687824Tim Peters        except _os.error: pass
6588869f9787cd4ceb2298e4b13980beb057687824Tim Peters        try: _os.rename(self._dirfile, self._bakfile)
6688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        except _os.error: pass
672c8373bc234039fff699df9c28d2cb74efd6a37cFred Drake        f = _open(self._dirfile, 'w', self._mode)
6888869f9787cd4ceb2298e4b13980beb057687824Tim Peters        for key, (pos, siz) in self._index.items():
6988869f9787cd4ceb2298e4b13980beb057687824Tim Peters            f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
7088869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.close()
7188869f9787cd4ceb2298e4b13980beb057687824Tim Peters
7288869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def __getitem__(self, key):
7388869f9787cd4ceb2298e4b13980beb057687824Tim Peters        pos, siz = self._index[key]     # may raise KeyError
7488869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f = _open(self._datfile, 'rb')
7588869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.seek(pos)
7688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        dat = f.read(siz)
7788869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.close()
7888869f9787cd4ceb2298e4b13980beb057687824Tim Peters        return dat
7988869f9787cd4ceb2298e4b13980beb057687824Tim Peters
8088869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def _addval(self, val):
8188869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f = _open(self._datfile, 'rb+')
8288869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.seek(0, 2)
8388869f9787cd4ceb2298e4b13980beb057687824Tim Peters        pos = int(f.tell())
849f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum## Does not work under MW compiler
8588869f9787cd4ceb2298e4b13980beb057687824Tim Peters##              pos = ((pos + _BLOCKSIZE - 1) / _BLOCKSIZE) * _BLOCKSIZE
8688869f9787cd4ceb2298e4b13980beb057687824Tim Peters##              f.seek(pos)
8754e54c6877329e105406c48490f218faff59db39Guido van Rossum        npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
8888869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.write('\0'*(npos-pos))
8988869f9787cd4ceb2298e4b13980beb057687824Tim Peters        pos = npos
9088869f9787cd4ceb2298e4b13980beb057687824Tim Peters
9188869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.write(val)
9288869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.close()
9388869f9787cd4ceb2298e4b13980beb057687824Tim Peters        return (pos, len(val))
9488869f9787cd4ceb2298e4b13980beb057687824Tim Peters
9588869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def _setval(self, pos, val):
9688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f = _open(self._datfile, 'rb+')
9788869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.seek(pos)
9888869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.write(val)
9988869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.close()
10088869f9787cd4ceb2298e4b13980beb057687824Tim Peters        return (pos, len(val))
10188869f9787cd4ceb2298e4b13980beb057687824Tim Peters
10288869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def _addkey(self, key, (pos, siz)):
10388869f9787cd4ceb2298e4b13980beb057687824Tim Peters        self._index[key] = (pos, siz)
1042c8373bc234039fff699df9c28d2cb74efd6a37cFred Drake        f = _open(self._dirfile, 'a', self._mode)
10588869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
10688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.close()
10788869f9787cd4ceb2298e4b13980beb057687824Tim Peters
10888869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def __setitem__(self, key, val):
10988869f9787cd4ceb2298e4b13980beb057687824Tim Peters        if not type(key) == type('') == type(val):
11088869f9787cd4ceb2298e4b13980beb057687824Tim Peters            raise TypeError, "keys and values must be strings"
111793d4b49361a010e4baa485ec77e3adc430f0236Raymond Hettinger        if not key in self._index:
11288869f9787cd4ceb2298e4b13980beb057687824Tim Peters            (pos, siz) = self._addval(val)
11388869f9787cd4ceb2298e4b13980beb057687824Tim Peters            self._addkey(key, (pos, siz))
11488869f9787cd4ceb2298e4b13980beb057687824Tim Peters        else:
11588869f9787cd4ceb2298e4b13980beb057687824Tim Peters            pos, siz = self._index[key]
11688869f9787cd4ceb2298e4b13980beb057687824Tim Peters            oldblocks = (siz + _BLOCKSIZE - 1) / _BLOCKSIZE
11788869f9787cd4ceb2298e4b13980beb057687824Tim Peters            newblocks = (len(val) + _BLOCKSIZE - 1) / _BLOCKSIZE
11888869f9787cd4ceb2298e4b13980beb057687824Tim Peters            if newblocks <= oldblocks:
11988869f9787cd4ceb2298e4b13980beb057687824Tim Peters                pos, siz = self._setval(pos, val)
12088869f9787cd4ceb2298e4b13980beb057687824Tim Peters                self._index[key] = pos, siz
12188869f9787cd4ceb2298e4b13980beb057687824Tim Peters            else:
12288869f9787cd4ceb2298e4b13980beb057687824Tim Peters                pos, siz = self._addval(val)
12388869f9787cd4ceb2298e4b13980beb057687824Tim Peters                self._index[key] = pos, siz
12488869f9787cd4ceb2298e4b13980beb057687824Tim Peters
12588869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def __delitem__(self, key):
12688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        del self._index[key]
12788869f9787cd4ceb2298e4b13980beb057687824Tim Peters        self._commit()
12888869f9787cd4ceb2298e4b13980beb057687824Tim Peters
12988869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def keys(self):
13088869f9787cd4ceb2298e4b13980beb057687824Tim Peters        return self._index.keys()
13188869f9787cd4ceb2298e4b13980beb057687824Tim Peters
13288869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def has_key(self, key):
133793d4b49361a010e4baa485ec77e3adc430f0236Raymond Hettinger        return key in self._index
13488869f9787cd4ceb2298e4b13980beb057687824Tim Peters
135a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake    def __contains__(self, key):
136793d4b49361a010e4baa485ec77e3adc430f0236Raymond Hettinger        return key in self._index
137a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake
138a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake    def iterkeys(self):
139a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake        return self._index.iterkeys()
140a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake    __iter__ = iterkeys
141a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake
14288869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def __len__(self):
14388869f9787cd4ceb2298e4b13980beb057687824Tim Peters        return len(self._index)
14488869f9787cd4ceb2298e4b13980beb057687824Tim Peters
14588869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def close(self):
146ed9057083bcdcce37ddb66bc01e9b7fddb5ea206Anthony Baxter        self._commit()
14788869f9787cd4ceb2298e4b13980beb057687824Tim Peters        self._index = None
14888869f9787cd4ceb2298e4b13980beb057687824Tim Peters        self._datfile = self._dirfile = self._bakfile = None
1499f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
150ed9057083bcdcce37ddb66bc01e9b7fddb5ea206Anthony Baxter    def __del__(self):
151ed9057083bcdcce37ddb66bc01e9b7fddb5ea206Anthony Baxter        if self._index is not None:
152ed9057083bcdcce37ddb66bc01e9b7fddb5ea206Anthony Baxter            self._commit()
153e4418609f79b94b91bda2621b5e6f067fb6a31d5Tim Peters
154ed9057083bcdcce37ddb66bc01e9b7fddb5ea206Anthony Baxter
1559f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
1562c8373bc234039fff699df9c28d2cb74efd6a37cFred Drakedef open(file, flag=None, mode=0666):
157aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger    """Open the database file, filename, and return corresponding object.
158aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger
159aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger    The flag argument, used to control how the database is opened in the
160aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger    other DBM implementations, is ignored in the dumbdbm module; the
161aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger    database is always opened for update, and will be created if it does
162aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger    not exist.
163aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger
164aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger    The optional mode argument is the UNIX mode of the file, used only when
165aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger    the database has to be created.  It defaults to octal code 0666 (and
166aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger    will be modified by the prevailing umask).
167aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger
168aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger    """
16988869f9787cd4ceb2298e4b13980beb057687824Tim Peters    # flag, mode arguments are currently ignored
1702c8373bc234039fff699df9c28d2cb74efd6a37cFred Drake    return _Database(file, mode)
171