dumbdbm.py revision a7cc69e02ef9ec7014600911a66978898e545cf1
19f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum"""A dumb and slow but simple dbm clone. 29f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 39f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van RossumFor database spam, spam.dir contains the index (a text file), 49f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumspam.bak *may* contain a backup of the index (also a text file), 59f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumwhile spam.dat contains the data (a binary file). 69f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 79f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van RossumXXX TO DO: 89f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 99f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- seems to contain a bug when updating... 109f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 119f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- reclaim free space (currently, space once occupied by deleted or expanded 129f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumitems is never reused) 139f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 149f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- support concurrent access (currently, if two processes take turns making 159f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumupdates, they can mess up the index) 169f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 179f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- support efficient access to large databases (currently, the whole index 189f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumis read when the database is opened, and some updates rewrite the whole index) 199f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 209f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- support opening for read-only (flag = 'm') 219f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 229f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum""" 239f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 249f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum_os = __import__('os') 259f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumimport __builtin__ 269f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 279f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum_open = __builtin__.open 289f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 299f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum_BLOCKSIZE = 512 309f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 3188869f9787cd4ceb2298e4b13980beb057687824Tim Peterserror = IOError # For anydbm 3222a18904e43c47902ae1e58922abeb87b530b0aeGuido van Rossum 339f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumclass _Database: 349f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 3588869f9787cd4ceb2298e4b13980beb057687824Tim Peters def __init__(self, file): 36d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum if _os.sep == '.': 37d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum endsep = '/' 38d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum else: 39d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum endsep = '.' 40d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum self._dirfile = file + endsep + 'dir' 41d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum self._datfile = file + endsep + 'dat' 42d74fb6b12a9a53ce09648484076c84e727561612Guido van Rossum self._bakfile = file + endsep + 'bak' 4388869f9787cd4ceb2298e4b13980beb057687824Tim Peters # Mod by Jack: create data file if needed 4488869f9787cd4ceb2298e4b13980beb057687824Tim Peters try: 4588869f9787cd4ceb2298e4b13980beb057687824Tim Peters f = _open(self._datfile, 'r') 4688869f9787cd4ceb2298e4b13980beb057687824Tim Peters except IOError: 4788869f9787cd4ceb2298e4b13980beb057687824Tim Peters f = _open(self._datfile, 'w') 4888869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 4988869f9787cd4ceb2298e4b13980beb057687824Tim Peters self._update() 5088869f9787cd4ceb2298e4b13980beb057687824Tim Peters 5188869f9787cd4ceb2298e4b13980beb057687824Tim Peters def _update(self): 5288869f9787cd4ceb2298e4b13980beb057687824Tim Peters self._index = {} 5388869f9787cd4ceb2298e4b13980beb057687824Tim Peters try: 5488869f9787cd4ceb2298e4b13980beb057687824Tim Peters f = _open(self._dirfile) 5588869f9787cd4ceb2298e4b13980beb057687824Tim Peters except IOError: 5688869f9787cd4ceb2298e4b13980beb057687824Tim Peters pass 5788869f9787cd4ceb2298e4b13980beb057687824Tim Peters else: 5888869f9787cd4ceb2298e4b13980beb057687824Tim Peters while 1: 5988869f9787cd4ceb2298e4b13980beb057687824Tim Peters line = f.readline().rstrip() 6088869f9787cd4ceb2298e4b13980beb057687824Tim Peters if not line: break 6188869f9787cd4ceb2298e4b13980beb057687824Tim Peters key, (pos, siz) = eval(line) 6288869f9787cd4ceb2298e4b13980beb057687824Tim Peters self._index[key] = (pos, siz) 6388869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 6488869f9787cd4ceb2298e4b13980beb057687824Tim Peters 6588869f9787cd4ceb2298e4b13980beb057687824Tim Peters def _commit(self): 6688869f9787cd4ceb2298e4b13980beb057687824Tim Peters try: _os.unlink(self._bakfile) 6788869f9787cd4ceb2298e4b13980beb057687824Tim Peters except _os.error: pass 6888869f9787cd4ceb2298e4b13980beb057687824Tim Peters try: _os.rename(self._dirfile, self._bakfile) 6988869f9787cd4ceb2298e4b13980beb057687824Tim Peters except _os.error: pass 7088869f9787cd4ceb2298e4b13980beb057687824Tim Peters f = _open(self._dirfile, 'w') 7188869f9787cd4ceb2298e4b13980beb057687824Tim Peters for key, (pos, siz) in self._index.items(): 7288869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`)) 7388869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 7488869f9787cd4ceb2298e4b13980beb057687824Tim Peters 7588869f9787cd4ceb2298e4b13980beb057687824Tim Peters def __getitem__(self, key): 7688869f9787cd4ceb2298e4b13980beb057687824Tim Peters pos, siz = self._index[key] # may raise KeyError 7788869f9787cd4ceb2298e4b13980beb057687824Tim Peters f = _open(self._datfile, 'rb') 7888869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.seek(pos) 7988869f9787cd4ceb2298e4b13980beb057687824Tim Peters dat = f.read(siz) 8088869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 8188869f9787cd4ceb2298e4b13980beb057687824Tim Peters return dat 8288869f9787cd4ceb2298e4b13980beb057687824Tim Peters 8388869f9787cd4ceb2298e4b13980beb057687824Tim Peters def _addval(self, val): 8488869f9787cd4ceb2298e4b13980beb057687824Tim Peters f = _open(self._datfile, 'rb+') 8588869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.seek(0, 2) 8688869f9787cd4ceb2298e4b13980beb057687824Tim Peters pos = int(f.tell()) 879f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum## Does not work under MW compiler 8888869f9787cd4ceb2298e4b13980beb057687824Tim Peters## pos = ((pos + _BLOCKSIZE - 1) / _BLOCKSIZE) * _BLOCKSIZE 8988869f9787cd4ceb2298e4b13980beb057687824Tim Peters## f.seek(pos) 9088869f9787cd4ceb2298e4b13980beb057687824Tim Peters npos = ((pos + _BLOCKSIZE - 1) / _BLOCKSIZE) * _BLOCKSIZE 9188869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.write('\0'*(npos-pos)) 9288869f9787cd4ceb2298e4b13980beb057687824Tim Peters pos = npos 9388869f9787cd4ceb2298e4b13980beb057687824Tim Peters 9488869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.write(val) 9588869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 9688869f9787cd4ceb2298e4b13980beb057687824Tim Peters return (pos, len(val)) 9788869f9787cd4ceb2298e4b13980beb057687824Tim Peters 9888869f9787cd4ceb2298e4b13980beb057687824Tim Peters def _setval(self, pos, val): 9988869f9787cd4ceb2298e4b13980beb057687824Tim Peters f = _open(self._datfile, 'rb+') 10088869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.seek(pos) 10188869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.write(val) 10288869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 10388869f9787cd4ceb2298e4b13980beb057687824Tim Peters return (pos, len(val)) 10488869f9787cd4ceb2298e4b13980beb057687824Tim Peters 10588869f9787cd4ceb2298e4b13980beb057687824Tim Peters def _addkey(self, key, (pos, siz)): 10688869f9787cd4ceb2298e4b13980beb057687824Tim Peters self._index[key] = (pos, siz) 10788869f9787cd4ceb2298e4b13980beb057687824Tim Peters f = _open(self._dirfile, 'a') 10888869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`)) 10988869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 11088869f9787cd4ceb2298e4b13980beb057687824Tim Peters 11188869f9787cd4ceb2298e4b13980beb057687824Tim Peters def __setitem__(self, key, val): 11288869f9787cd4ceb2298e4b13980beb057687824Tim Peters if not type(key) == type('') == type(val): 11388869f9787cd4ceb2298e4b13980beb057687824Tim Peters raise TypeError, "keys and values must be strings" 11488869f9787cd4ceb2298e4b13980beb057687824Tim Peters if not self._index.has_key(key): 11588869f9787cd4ceb2298e4b13980beb057687824Tim Peters (pos, siz) = self._addval(val) 11688869f9787cd4ceb2298e4b13980beb057687824Tim Peters self._addkey(key, (pos, siz)) 11788869f9787cd4ceb2298e4b13980beb057687824Tim Peters else: 11888869f9787cd4ceb2298e4b13980beb057687824Tim Peters pos, siz = self._index[key] 11988869f9787cd4ceb2298e4b13980beb057687824Tim Peters oldblocks = (siz + _BLOCKSIZE - 1) / _BLOCKSIZE 12088869f9787cd4ceb2298e4b13980beb057687824Tim Peters newblocks = (len(val) + _BLOCKSIZE - 1) / _BLOCKSIZE 12188869f9787cd4ceb2298e4b13980beb057687824Tim Peters if newblocks <= oldblocks: 12288869f9787cd4ceb2298e4b13980beb057687824Tim Peters pos, siz = self._setval(pos, val) 12388869f9787cd4ceb2298e4b13980beb057687824Tim Peters self._index[key] = pos, siz 12488869f9787cd4ceb2298e4b13980beb057687824Tim Peters else: 12588869f9787cd4ceb2298e4b13980beb057687824Tim Peters pos, siz = self._addval(val) 12688869f9787cd4ceb2298e4b13980beb057687824Tim Peters self._index[key] = pos, siz 12788869f9787cd4ceb2298e4b13980beb057687824Tim Peters 12888869f9787cd4ceb2298e4b13980beb057687824Tim Peters def __delitem__(self, key): 12988869f9787cd4ceb2298e4b13980beb057687824Tim Peters del self._index[key] 13088869f9787cd4ceb2298e4b13980beb057687824Tim Peters self._commit() 13188869f9787cd4ceb2298e4b13980beb057687824Tim Peters 13288869f9787cd4ceb2298e4b13980beb057687824Tim Peters def keys(self): 13388869f9787cd4ceb2298e4b13980beb057687824Tim Peters return self._index.keys() 13488869f9787cd4ceb2298e4b13980beb057687824Tim Peters 13588869f9787cd4ceb2298e4b13980beb057687824Tim Peters def has_key(self, key): 13688869f9787cd4ceb2298e4b13980beb057687824Tim Peters return self._index.has_key(key) 13788869f9787cd4ceb2298e4b13980beb057687824Tim Peters 138a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake def __contains__(self, key): 139a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake return self._index.has_key(key) 140a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake 141a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake def iterkeys(self): 142a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake return self._index.iterkeys() 143a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake __iter__ = iterkeys 144a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake 14588869f9787cd4ceb2298e4b13980beb057687824Tim Peters def __len__(self): 14688869f9787cd4ceb2298e4b13980beb057687824Tim Peters return len(self._index) 14788869f9787cd4ceb2298e4b13980beb057687824Tim Peters 14888869f9787cd4ceb2298e4b13980beb057687824Tim Peters def close(self): 14988869f9787cd4ceb2298e4b13980beb057687824Tim Peters self._index = None 15088869f9787cd4ceb2298e4b13980beb057687824Tim Peters self._datfile = self._dirfile = self._bakfile = None 1519f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 1529f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 153a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drakedef open(file, flag=None, mode=None): 15488869f9787cd4ceb2298e4b13980beb057687824Tim Peters # flag, mode arguments are currently ignored 15588869f9787cd4ceb2298e4b13980beb057687824Tim Peters return _Database(file) 156