dumbdbm.py revision a7cc69e02ef9ec7014600911a66978898e545cf1
19f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum"""A dumb and slow but simple dbm clone.
29f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
39f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van RossumFor database spam, spam.dir contains the index (a text file),
49f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumspam.bak *may* contain a backup of the index (also a text file),
59f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumwhile spam.dat contains the data (a binary file).
69f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
79f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van RossumXXX TO DO:
89f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
99f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- seems to contain a bug when updating...
109f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
119f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- reclaim free space (currently, space once occupied by deleted or expanded
129f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumitems is never reused)
139f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
149f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- support concurrent access (currently, if two processes take turns making
159f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumupdates, they can mess up the index)
169f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
179f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- support efficient access to large databases (currently, the whole index
189f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumis read when the database is opened, and some updates rewrite the whole index)
199f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
209f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- support opening for read-only (flag = 'm')
219f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
229f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum"""
239f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
249f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum_os = __import__('os')
259f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumimport __builtin__
269f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
279f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum_open = __builtin__.open
289f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
299f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum_BLOCKSIZE = 512
309f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
3188869f9787cd4ceb2298e4b13980beb057687824Tim Peterserror = IOError                         # For anydbm
3222a18904e43c47902ae1e58922abeb87b530b0aeGuido van Rossum
339f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumclass _Database:
349f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
3588869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def __init__(self, file):
36d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum        if _os.sep == '.':
37d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum            endsep = '/'
38d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum        else:
39d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum            endsep = '.'
40d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum        self._dirfile = file + endsep + 'dir'
41d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum        self._datfile = file + endsep + 'dat'
42d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum        self._bakfile = file + endsep + 'bak'
4388869f9787cd4ceb2298e4b13980beb057687824Tim Peters        # Mod by Jack: create data file if needed
4488869f9787cd4ceb2298e4b13980beb057687824Tim Peters        try:
4588869f9787cd4ceb2298e4b13980beb057687824Tim Peters            f = _open(self._datfile, 'r')
4688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        except IOError:
4788869f9787cd4ceb2298e4b13980beb057687824Tim Peters            f = _open(self._datfile, 'w')
4888869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.close()
4988869f9787cd4ceb2298e4b13980beb057687824Tim Peters        self._update()
5088869f9787cd4ceb2298e4b13980beb057687824Tim Peters
5188869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def _update(self):
5288869f9787cd4ceb2298e4b13980beb057687824Tim Peters        self._index = {}
5388869f9787cd4ceb2298e4b13980beb057687824Tim Peters        try:
5488869f9787cd4ceb2298e4b13980beb057687824Tim Peters            f = _open(self._dirfile)
5588869f9787cd4ceb2298e4b13980beb057687824Tim Peters        except IOError:
5688869f9787cd4ceb2298e4b13980beb057687824Tim Peters            pass
5788869f9787cd4ceb2298e4b13980beb057687824Tim Peters        else:
5888869f9787cd4ceb2298e4b13980beb057687824Tim Peters            while 1:
5988869f9787cd4ceb2298e4b13980beb057687824Tim Peters                line = f.readline().rstrip()
6088869f9787cd4ceb2298e4b13980beb057687824Tim Peters                if not line: break
6188869f9787cd4ceb2298e4b13980beb057687824Tim Peters                key, (pos, siz) = eval(line)
6288869f9787cd4ceb2298e4b13980beb057687824Tim Peters                self._index[key] = (pos, siz)
6388869f9787cd4ceb2298e4b13980beb057687824Tim Peters            f.close()
6488869f9787cd4ceb2298e4b13980beb057687824Tim Peters
6588869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def _commit(self):
6688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        try: _os.unlink(self._bakfile)
6788869f9787cd4ceb2298e4b13980beb057687824Tim Peters        except _os.error: pass
6888869f9787cd4ceb2298e4b13980beb057687824Tim Peters        try: _os.rename(self._dirfile, self._bakfile)
6988869f9787cd4ceb2298e4b13980beb057687824Tim Peters        except _os.error: pass
7088869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f = _open(self._dirfile, 'w')
7188869f9787cd4ceb2298e4b13980beb057687824Tim Peters        for key, (pos, siz) in self._index.items():
7288869f9787cd4ceb2298e4b13980beb057687824Tim Peters            f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
7388869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.close()
7488869f9787cd4ceb2298e4b13980beb057687824Tim Peters
7588869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def __getitem__(self, key):
7688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        pos, siz = self._index[key]     # may raise KeyError
7788869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f = _open(self._datfile, 'rb')
7888869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.seek(pos)
7988869f9787cd4ceb2298e4b13980beb057687824Tim Peters        dat = f.read(siz)
8088869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.close()
8188869f9787cd4ceb2298e4b13980beb057687824Tim Peters        return dat
8288869f9787cd4ceb2298e4b13980beb057687824Tim Peters
8388869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def _addval(self, val):
8488869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f = _open(self._datfile, 'rb+')
8588869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.seek(0, 2)
8688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        pos = int(f.tell())
879f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum## Does not work under MW compiler
8888869f9787cd4ceb2298e4b13980beb057687824Tim Peters##              pos = ((pos + _BLOCKSIZE - 1) / _BLOCKSIZE) * _BLOCKSIZE
8988869f9787cd4ceb2298e4b13980beb057687824Tim Peters##              f.seek(pos)
9088869f9787cd4ceb2298e4b13980beb057687824Tim Peters        npos = ((pos + _BLOCKSIZE - 1) / _BLOCKSIZE) * _BLOCKSIZE
9188869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.write('\0'*(npos-pos))
9288869f9787cd4ceb2298e4b13980beb057687824Tim Peters        pos = npos
9388869f9787cd4ceb2298e4b13980beb057687824Tim Peters
9488869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.write(val)
9588869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.close()
9688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        return (pos, len(val))
9788869f9787cd4ceb2298e4b13980beb057687824Tim Peters
9888869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def _setval(self, pos, val):
9988869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f = _open(self._datfile, 'rb+')
10088869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.seek(pos)
10188869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.write(val)
10288869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.close()
10388869f9787cd4ceb2298e4b13980beb057687824Tim Peters        return (pos, len(val))
10488869f9787cd4ceb2298e4b13980beb057687824Tim Peters
10588869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def _addkey(self, key, (pos, siz)):
10688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        self._index[key] = (pos, siz)
10788869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f = _open(self._dirfile, 'a')
10888869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
10988869f9787cd4ceb2298e4b13980beb057687824Tim Peters        f.close()
11088869f9787cd4ceb2298e4b13980beb057687824Tim Peters
11188869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def __setitem__(self, key, val):
11288869f9787cd4ceb2298e4b13980beb057687824Tim Peters        if not type(key) == type('') == type(val):
11388869f9787cd4ceb2298e4b13980beb057687824Tim Peters            raise TypeError, "keys and values must be strings"
11488869f9787cd4ceb2298e4b13980beb057687824Tim Peters        if not self._index.has_key(key):
11588869f9787cd4ceb2298e4b13980beb057687824Tim Peters            (pos, siz) = self._addval(val)
11688869f9787cd4ceb2298e4b13980beb057687824Tim Peters            self._addkey(key, (pos, siz))
11788869f9787cd4ceb2298e4b13980beb057687824Tim Peters        else:
11888869f9787cd4ceb2298e4b13980beb057687824Tim Peters            pos, siz = self._index[key]
11988869f9787cd4ceb2298e4b13980beb057687824Tim Peters            oldblocks = (siz + _BLOCKSIZE - 1) / _BLOCKSIZE
12088869f9787cd4ceb2298e4b13980beb057687824Tim Peters            newblocks = (len(val) + _BLOCKSIZE - 1) / _BLOCKSIZE
12188869f9787cd4ceb2298e4b13980beb057687824Tim Peters            if newblocks <= oldblocks:
12288869f9787cd4ceb2298e4b13980beb057687824Tim Peters                pos, siz = self._setval(pos, val)
12388869f9787cd4ceb2298e4b13980beb057687824Tim Peters                self._index[key] = pos, siz
12488869f9787cd4ceb2298e4b13980beb057687824Tim Peters            else:
12588869f9787cd4ceb2298e4b13980beb057687824Tim Peters                pos, siz = self._addval(val)
12688869f9787cd4ceb2298e4b13980beb057687824Tim Peters                self._index[key] = pos, siz
12788869f9787cd4ceb2298e4b13980beb057687824Tim Peters
12888869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def __delitem__(self, key):
12988869f9787cd4ceb2298e4b13980beb057687824Tim Peters        del self._index[key]
13088869f9787cd4ceb2298e4b13980beb057687824Tim Peters        self._commit()
13188869f9787cd4ceb2298e4b13980beb057687824Tim Peters
13288869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def keys(self):
13388869f9787cd4ceb2298e4b13980beb057687824Tim Peters        return self._index.keys()
13488869f9787cd4ceb2298e4b13980beb057687824Tim Peters
13588869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def has_key(self, key):
13688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        return self._index.has_key(key)
13788869f9787cd4ceb2298e4b13980beb057687824Tim Peters
138a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake    def __contains__(self, key):
139a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake        return self._index.has_key(key)
140a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake
141a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake    def iterkeys(self):
142a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake        return self._index.iterkeys()
143a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake    __iter__ = iterkeys
144a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake
14588869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def __len__(self):
14688869f9787cd4ceb2298e4b13980beb057687824Tim Peters        return len(self._index)
14788869f9787cd4ceb2298e4b13980beb057687824Tim Peters
14888869f9787cd4ceb2298e4b13980beb057687824Tim Peters    def close(self):
14988869f9787cd4ceb2298e4b13980beb057687824Tim Peters        self._index = None
15088869f9787cd4ceb2298e4b13980beb057687824Tim Peters        self._datfile = self._dirfile = self._bakfile = None
1519f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
1529f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum
153a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drakedef open(file, flag=None, mode=None):
15488869f9787cd4ceb2298e4b13980beb057687824Tim Peters    # flag, mode arguments are currently ignored
15588869f9787cd4ceb2298e4b13980beb057687824Tim Peters    return _Database(file)
156