dumbdbm.py revision 6d06815b562e1540ee334d30953354210dfebd24
19f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum"""A dumb and slow but simple dbm clone. 29f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 39f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van RossumFor database spam, spam.dir contains the index (a text file), 49f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumspam.bak *may* contain a backup of the index (also a text file), 59f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumwhile spam.dat contains the data (a binary file). 69f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 79f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van RossumXXX TO DO: 89f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 99f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- seems to contain a bug when updating... 109f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 119f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- reclaim free space (currently, space once occupied by deleted or expanded 129f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumitems is never reused) 139f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 149f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- support concurrent access (currently, if two processes take turns making 159f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumupdates, they can mess up the index) 169f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 179f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- support efficient access to large databases (currently, the whole index 189f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumis read when the database is opened, and some updates rewrite the whole index) 199f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 209f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum- support opening for read-only (flag = 'm') 219f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 229f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum""" 239f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 24d0cd95ce7febd97683d6e113e1b486b93bcf88ebMartin v. Löwisimport os as _os 259f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossumimport __builtin__ 262dd1ed69b425d2f2ac8152548c7581aa1f01216dRaymond Hettingerimport UserDict 279f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 289f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum_open = __builtin__.open 299f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 309f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum_BLOCKSIZE = 512 319f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 3288869f9787cd4ceb2298e4b13980beb057687824Tim Peterserror = IOError # For anydbm 3322a18904e43c47902ae1e58922abeb87b530b0aeGuido van Rossum 342dd1ed69b425d2f2ac8152548c7581aa1f01216dRaymond Hettingerclass _Database(UserDict.DictMixin): 359f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 36d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters # The on-disk directory and data files can remain in mutually 37d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters # inconsistent states for an arbitrarily long time (see comments 38d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters # at the end of __setitem__). This is only repaired when _commit() 39d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters # gets called. One place _commit() gets called is from __del__(), 40d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters # and if that occurs at program shutdown time, module globals may 41d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters # already have gotten rebound to None. Since it's crucial that 420320464583957258031ebee1b8897343211e4339Tim Peters # _commit() finish successfully, we can't ignore shutdown races 43d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters # here, and _commit() must not reference any globals. 44d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters _os = _os # for _commit() 45d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters _open = _open # for _commit() 46d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters 477dfd5701b20c3566166e148f77591a2912164221Tim Peters def __init__(self, filebasename, mode): 482c8373bc234039fff699df9c28d2cb74efd6a37cFred Drake self._mode = mode 497dfd5701b20c3566166e148f77591a2912164221Tim Peters 507dfd5701b20c3566166e148f77591a2912164221Tim Peters # The directory file is a text file. Each line looks like 517dfd5701b20c3566166e148f77591a2912164221Tim Peters # "%r, (%d, %d)\n" % (key, pos, siz) 527dfd5701b20c3566166e148f77591a2912164221Tim Peters # where key is the string key, pos is the offset into the dat 537dfd5701b20c3566166e148f77591a2912164221Tim Peters # file of the associated value's first byte, and siz is the number 547dfd5701b20c3566166e148f77591a2912164221Tim Peters # of bytes in the associated value. 557dfd5701b20c3566166e148f77591a2912164221Tim Peters self._dirfile = filebasename + _os.extsep + 'dir' 567dfd5701b20c3566166e148f77591a2912164221Tim Peters 577dfd5701b20c3566166e148f77591a2912164221Tim Peters # The data file is a binary file pointed into by the directory 587dfd5701b20c3566166e148f77591a2912164221Tim Peters # file, and holds the values associated with keys. Each value 597dfd5701b20c3566166e148f77591a2912164221Tim Peters # begins at a _BLOCKSIZE-aligned byte offset, and is a raw 607dfd5701b20c3566166e148f77591a2912164221Tim Peters # binary 8-bit string value. 617dfd5701b20c3566166e148f77591a2912164221Tim Peters self._datfile = filebasename + _os.extsep + 'dat' 627dfd5701b20c3566166e148f77591a2912164221Tim Peters self._bakfile = filebasename + _os.extsep + 'bak' 637dfd5701b20c3566166e148f77591a2912164221Tim Peters 647dfd5701b20c3566166e148f77591a2912164221Tim Peters # The index is an in-memory dict, mirroring the directory file. 657dfd5701b20c3566166e148f77591a2912164221Tim Peters self._index = None # maps keys to (pos, siz) pairs 667dfd5701b20c3566166e148f77591a2912164221Tim Peters 6788869f9787cd4ceb2298e4b13980beb057687824Tim Peters # Mod by Jack: create data file if needed 6888869f9787cd4ceb2298e4b13980beb057687824Tim Peters try: 6988869f9787cd4ceb2298e4b13980beb057687824Tim Peters f = _open(self._datfile, 'r') 7088869f9787cd4ceb2298e4b13980beb057687824Tim Peters except IOError: 712c8373bc234039fff699df9c28d2cb74efd6a37cFred Drake f = _open(self._datfile, 'w', self._mode) 7288869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 7388869f9787cd4ceb2298e4b13980beb057687824Tim Peters self._update() 7488869f9787cd4ceb2298e4b13980beb057687824Tim Peters 757dfd5701b20c3566166e148f77591a2912164221Tim Peters # Read directory file into the in-memory index dict. 7688869f9787cd4ceb2298e4b13980beb057687824Tim Peters def _update(self): 7788869f9787cd4ceb2298e4b13980beb057687824Tim Peters self._index = {} 7888869f9787cd4ceb2298e4b13980beb057687824Tim Peters try: 7988869f9787cd4ceb2298e4b13980beb057687824Tim Peters f = _open(self._dirfile) 8088869f9787cd4ceb2298e4b13980beb057687824Tim Peters except IOError: 8188869f9787cd4ceb2298e4b13980beb057687824Tim Peters pass 8288869f9787cd4ceb2298e4b13980beb057687824Tim Peters else: 837dfd5701b20c3566166e148f77591a2912164221Tim Peters for line in f: 847dfd5701b20c3566166e148f77591a2912164221Tim Peters key, pos_and_siz_pair = eval(line) 857dfd5701b20c3566166e148f77591a2912164221Tim Peters self._index[key] = pos_and_siz_pair 8688869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 8788869f9787cd4ceb2298e4b13980beb057687824Tim Peters 887dfd5701b20c3566166e148f77591a2912164221Tim Peters # Write the index dict to the directory file. The original directory 897dfd5701b20c3566166e148f77591a2912164221Tim Peters # file (if any) is renamed with a .bak extension first. If a .bak 907dfd5701b20c3566166e148f77591a2912164221Tim Peters # file currently exists, it's deleted. 9188869f9787cd4ceb2298e4b13980beb057687824Tim Peters def _commit(self): 92d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters # CAUTION: It's vital that _commit() succeed, and _commit() can 93d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters # be called from __del__(). Therefore we must never reference a 94d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters # global in this routine. 957a6c733c3b8ee131d7860ecb0877837e412c2959Tim Peters if self._index is None: 967a6c733c3b8ee131d7860ecb0877837e412c2959Tim Peters return # nothing to do 977a6c733c3b8ee131d7860ecb0877837e412c2959Tim Peters 987dfd5701b20c3566166e148f77591a2912164221Tim Peters try: 99d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters self._os.unlink(self._bakfile) 100d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters except self._os.error: 1017dfd5701b20c3566166e148f77591a2912164221Tim Peters pass 1027dfd5701b20c3566166e148f77591a2912164221Tim Peters 1037dfd5701b20c3566166e148f77591a2912164221Tim Peters try: 104d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters self._os.rename(self._dirfile, self._bakfile) 105d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters except self._os.error: 1067dfd5701b20c3566166e148f77591a2912164221Tim Peters pass 1077dfd5701b20c3566166e148f77591a2912164221Tim Peters 108d7472ec13a65c6c5ff00365b1477677d1fecbb3cTim Peters f = self._open(self._dirfile, 'w', self._mode) 1093898a70bcf3be9e21911cd7b99bb498d45f03789Tim Peters for key, pos_and_siz_pair in self._index.iteritems(): 1103898a70bcf3be9e21911cd7b99bb498d45f03789Tim Peters f.write("%r, %r\n" % (key, pos_and_siz_pair)) 11188869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 11288869f9787cd4ceb2298e4b13980beb057687824Tim Peters 1136d06815b562e1540ee334d30953354210dfebd24Skip Montanaro sync = _commit 1146d06815b562e1540ee334d30953354210dfebd24Skip Montanaro 11588869f9787cd4ceb2298e4b13980beb057687824Tim Peters def __getitem__(self, key): 11688869f9787cd4ceb2298e4b13980beb057687824Tim Peters pos, siz = self._index[key] # may raise KeyError 11788869f9787cd4ceb2298e4b13980beb057687824Tim Peters f = _open(self._datfile, 'rb') 11888869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.seek(pos) 11988869f9787cd4ceb2298e4b13980beb057687824Tim Peters dat = f.read(siz) 12088869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 12188869f9787cd4ceb2298e4b13980beb057687824Tim Peters return dat 12288869f9787cd4ceb2298e4b13980beb057687824Tim Peters 1237dfd5701b20c3566166e148f77591a2912164221Tim Peters # Append val to the data file, starting at a _BLOCKSIZE-aligned 1247dfd5701b20c3566166e148f77591a2912164221Tim Peters # offset. The data file is first padded with NUL bytes (if needed) 1257dfd5701b20c3566166e148f77591a2912164221Tim Peters # to get to an aligned offset. Return pair 1267dfd5701b20c3566166e148f77591a2912164221Tim Peters # (starting offset of val, len(val)) 12788869f9787cd4ceb2298e4b13980beb057687824Tim Peters def _addval(self, val): 12888869f9787cd4ceb2298e4b13980beb057687824Tim Peters f = _open(self._datfile, 'rb+') 12988869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.seek(0, 2) 13088869f9787cd4ceb2298e4b13980beb057687824Tim Peters pos = int(f.tell()) 13154e54c6877329e105406c48490f218faff59db39Guido van Rossum npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE 13288869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.write('\0'*(npos-pos)) 13388869f9787cd4ceb2298e4b13980beb057687824Tim Peters pos = npos 13488869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.write(val) 13588869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 13688869f9787cd4ceb2298e4b13980beb057687824Tim Peters return (pos, len(val)) 13788869f9787cd4ceb2298e4b13980beb057687824Tim Peters 1387dfd5701b20c3566166e148f77591a2912164221Tim Peters # Write val to the data file, starting at offset pos. The caller 1397dfd5701b20c3566166e148f77591a2912164221Tim Peters # is responsible for ensuring that there's enough room starting at 1407dfd5701b20c3566166e148f77591a2912164221Tim Peters # pos to hold val, without overwriting some other value. Return 1417dfd5701b20c3566166e148f77591a2912164221Tim Peters # pair (pos, len(val)). 14288869f9787cd4ceb2298e4b13980beb057687824Tim Peters def _setval(self, pos, val): 14388869f9787cd4ceb2298e4b13980beb057687824Tim Peters f = _open(self._datfile, 'rb+') 14488869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.seek(pos) 14588869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.write(val) 14688869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 14788869f9787cd4ceb2298e4b13980beb057687824Tim Peters return (pos, len(val)) 14888869f9787cd4ceb2298e4b13980beb057687824Tim Peters 1497dfd5701b20c3566166e148f77591a2912164221Tim Peters # key is a new key whose associated value starts in the data file 1501d8d729af80c7d45161f20a40428d26698c11af8Tim Peters # at offset pos and with length siz. Add an index record to 1511d8d729af80c7d45161f20a40428d26698c11af8Tim Peters # the in-memory index dict, and append one to the directory file. 1527dfd5701b20c3566166e148f77591a2912164221Tim Peters def _addkey(self, key, pos_and_siz_pair): 1537dfd5701b20c3566166e148f77591a2912164221Tim Peters self._index[key] = pos_and_siz_pair 1542c8373bc234039fff699df9c28d2cb74efd6a37cFred Drake f = _open(self._dirfile, 'a', self._mode) 1557dfd5701b20c3566166e148f77591a2912164221Tim Peters f.write("%r, %r\n" % (key, pos_and_siz_pair)) 15688869f9787cd4ceb2298e4b13980beb057687824Tim Peters f.close() 15788869f9787cd4ceb2298e4b13980beb057687824Tim Peters 15888869f9787cd4ceb2298e4b13980beb057687824Tim Peters def __setitem__(self, key, val): 15988869f9787cd4ceb2298e4b13980beb057687824Tim Peters if not type(key) == type('') == type(val): 16088869f9787cd4ceb2298e4b13980beb057687824Tim Peters raise TypeError, "keys and values must be strings" 1617dfd5701b20c3566166e148f77591a2912164221Tim Peters if key not in self._index: 1627dfd5701b20c3566166e148f77591a2912164221Tim Peters self._addkey(key, self._addval(val)) 16388869f9787cd4ceb2298e4b13980beb057687824Tim Peters else: 1647dfd5701b20c3566166e148f77591a2912164221Tim Peters # See whether the new value is small enough to fit in the 1657dfd5701b20c3566166e148f77591a2912164221Tim Peters # (padded) space currently occupied by the old value. 16688869f9787cd4ceb2298e4b13980beb057687824Tim Peters pos, siz = self._index[key] 167ef6573e52946c70778e29e3b33d61a8a0c6e4052Tim Peters oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE 168ef6573e52946c70778e29e3b33d61a8a0c6e4052Tim Peters newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE 16988869f9787cd4ceb2298e4b13980beb057687824Tim Peters if newblocks <= oldblocks: 1707dfd5701b20c3566166e148f77591a2912164221Tim Peters self._index[key] = self._setval(pos, val) 17188869f9787cd4ceb2298e4b13980beb057687824Tim Peters else: 1727dfd5701b20c3566166e148f77591a2912164221Tim Peters # The new value doesn't fit in the (padded) space used 1737dfd5701b20c3566166e148f77591a2912164221Tim Peters # by the old value. The blocks used by the old value are 1747dfd5701b20c3566166e148f77591a2912164221Tim Peters # forever lost. 1757dfd5701b20c3566166e148f77591a2912164221Tim Peters self._index[key] = self._addval(val) 1767dfd5701b20c3566166e148f77591a2912164221Tim Peters 1777dfd5701b20c3566166e148f77591a2912164221Tim Peters # Note that _index may be out of synch with the directory 1787dfd5701b20c3566166e148f77591a2912164221Tim Peters # file now: _setval() and _addval() don't update the directory 1791d8d729af80c7d45161f20a40428d26698c11af8Tim Peters # file. This also means that the on-disk directory and data 1801d8d729af80c7d45161f20a40428d26698c11af8Tim Peters # files are in a mutually inconsistent state, and they'll 1811d8d729af80c7d45161f20a40428d26698c11af8Tim Peters # remain that way until _commit() is called. Note that this 1821d8d729af80c7d45161f20a40428d26698c11af8Tim Peters # is a disaster (for the database) if the program crashes 1831d8d729af80c7d45161f20a40428d26698c11af8Tim Peters # (so that _commit() never gets called). 18488869f9787cd4ceb2298e4b13980beb057687824Tim Peters 18588869f9787cd4ceb2298e4b13980beb057687824Tim Peters def __delitem__(self, key): 1867dfd5701b20c3566166e148f77591a2912164221Tim Peters # The blocks used by the associated value are lost. 18788869f9787cd4ceb2298e4b13980beb057687824Tim Peters del self._index[key] 1887dfd5701b20c3566166e148f77591a2912164221Tim Peters # XXX It's unclear why we do a _commit() here (the code always 1897dfd5701b20c3566166e148f77591a2912164221Tim Peters # XXX has, so I'm not changing it). _setitem__ doesn't try to 1907dfd5701b20c3566166e148f77591a2912164221Tim Peters # XXX keep the directory file in synch. Why should we? Or 1917dfd5701b20c3566166e148f77591a2912164221Tim Peters # XXX why shouldn't __setitem__? 19288869f9787cd4ceb2298e4b13980beb057687824Tim Peters self._commit() 19388869f9787cd4ceb2298e4b13980beb057687824Tim Peters 19488869f9787cd4ceb2298e4b13980beb057687824Tim Peters def keys(self): 19588869f9787cd4ceb2298e4b13980beb057687824Tim Peters return self._index.keys() 19688869f9787cd4ceb2298e4b13980beb057687824Tim Peters 19788869f9787cd4ceb2298e4b13980beb057687824Tim Peters def has_key(self, key): 198793d4b49361a010e4baa485ec77e3adc430f0236Raymond Hettinger return key in self._index 19988869f9787cd4ceb2298e4b13980beb057687824Tim Peters 200a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake def __contains__(self, key): 201793d4b49361a010e4baa485ec77e3adc430f0236Raymond Hettinger return key in self._index 202a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake 203a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake def iterkeys(self): 204a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake return self._index.iterkeys() 205a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake __iter__ = iterkeys 206a7cc69e02ef9ec7014600911a66978898e545cf1Fred Drake 20788869f9787cd4ceb2298e4b13980beb057687824Tim Peters def __len__(self): 20888869f9787cd4ceb2298e4b13980beb057687824Tim Peters return len(self._index) 20988869f9787cd4ceb2298e4b13980beb057687824Tim Peters 21088869f9787cd4ceb2298e4b13980beb057687824Tim Peters def close(self): 211ed9057083bcdcce37ddb66bc01e9b7fddb5ea206Anthony Baxter self._commit() 2127a6c733c3b8ee131d7860ecb0877837e412c2959Tim Peters self._index = self._datfile = self._dirfile = self._bakfile = None 2139f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 2147a6c733c3b8ee131d7860ecb0877837e412c2959Tim Peters __del__ = close 215e4418609f79b94b91bda2621b5e6f067fb6a31d5Tim Peters 216ed9057083bcdcce37ddb66bc01e9b7fddb5ea206Anthony Baxter 2179f824a7984fcd98d00dfc795e6a1d95317fd4b93Guido van Rossum 2182c8373bc234039fff699df9c28d2cb74efd6a37cFred Drakedef open(file, flag=None, mode=0666): 219aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger """Open the database file, filename, and return corresponding object. 220aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger 221aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger The flag argument, used to control how the database is opened in the 222aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger other DBM implementations, is ignored in the dumbdbm module; the 223aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger database is always opened for update, and will be created if it does 224aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger not exist. 225aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger 226aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger The optional mode argument is the UNIX mode of the file, used only when 227aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger the database has to be created. It defaults to octal code 0666 (and 228aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger will be modified by the prevailing umask). 229aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger 230aef22fb9cdf31fb7f0afc28ad049f08a89e23761Raymond Hettinger """ 231f421e81e4112809380542af378575756247eba00Neal Norwitz # flag argument is currently ignored 2322c8373bc234039fff699df9c28d2cb74efd6a37cFred Drake return _Database(file, mode) 233