dumbdbm.py revision 2dd1ed69b425d2f2ac8152548c7581aa1f01216d
1"""A dumb and slow but simple dbm clone. 2 3For database spam, spam.dir contains the index (a text file), 4spam.bak *may* contain a backup of the index (also a text file), 5while spam.dat contains the data (a binary file). 6 7XXX TO DO: 8 9- seems to contain a bug when updating... 10 11- reclaim free space (currently, space once occupied by deleted or expanded 12items is never reused) 13 14- support concurrent access (currently, if two processes take turns making 15updates, they can mess up the index) 16 17- support efficient access to large databases (currently, the whole index 18is read when the database is opened, and some updates rewrite the whole index) 19 20- support opening for read-only (flag = 'm') 21 22""" 23 24import os as _os 25import __builtin__ 26import UserDict 27 28_open = __builtin__.open 29 30_BLOCKSIZE = 512 31 32error = IOError # For anydbm 33 34class _Database(UserDict.DictMixin): 35 36 def __init__(self, file, mode): 37 self._mode = mode 38 self._dirfile = file + _os.extsep + 'dir' 39 self._datfile = file + _os.extsep + 'dat' 40 self._bakfile = file + _os.extsep + 'bak' 41 # Mod by Jack: create data file if needed 42 try: 43 f = _open(self._datfile, 'r') 44 except IOError: 45 f = _open(self._datfile, 'w', self._mode) 46 f.close() 47 self._update() 48 49 def _update(self): 50 self._index = {} 51 try: 52 f = _open(self._dirfile) 53 except IOError: 54 pass 55 else: 56 while 1: 57 line = f.readline().rstrip() 58 if not line: break 59 key, (pos, siz) = eval(line) 60 self._index[key] = (pos, siz) 61 f.close() 62 63 def _commit(self): 64 try: _os.unlink(self._bakfile) 65 except _os.error: pass 66 try: _os.rename(self._dirfile, self._bakfile) 67 except _os.error: pass 68 f = _open(self._dirfile, 'w', self._mode) 69 for key, (pos, siz) in self._index.items(): 70 f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`)) 71 f.close() 72 73 def __getitem__(self, key): 74 pos, siz = self._index[key] # may raise KeyError 75 f = _open(self._datfile, 'rb') 76 f.seek(pos) 77 dat = f.read(siz) 78 f.close() 79 return dat 80 81 def _addval(self, val): 82 f = _open(self._datfile, 'rb+') 83 f.seek(0, 2) 84 pos = int(f.tell()) 85## Does not work under MW compiler 86## pos = ((pos + _BLOCKSIZE - 1) / _BLOCKSIZE) * _BLOCKSIZE 87## f.seek(pos) 88 npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE 89 f.write('\0'*(npos-pos)) 90 pos = npos 91 92 f.write(val) 93 f.close() 94 return (pos, len(val)) 95 96 def _setval(self, pos, val): 97 f = _open(self._datfile, 'rb+') 98 f.seek(pos) 99 f.write(val) 100 f.close() 101 return (pos, len(val)) 102 103 def _addkey(self, key, (pos, siz)): 104 self._index[key] = (pos, siz) 105 f = _open(self._dirfile, 'a', self._mode) 106 f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`)) 107 f.close() 108 109 def __setitem__(self, key, val): 110 if not type(key) == type('') == type(val): 111 raise TypeError, "keys and values must be strings" 112 if not key in self._index: 113 (pos, siz) = self._addval(val) 114 self._addkey(key, (pos, siz)) 115 else: 116 pos, siz = self._index[key] 117 oldblocks = (siz + _BLOCKSIZE - 1) / _BLOCKSIZE 118 newblocks = (len(val) + _BLOCKSIZE - 1) / _BLOCKSIZE 119 if newblocks <= oldblocks: 120 pos, siz = self._setval(pos, val) 121 self._index[key] = pos, siz 122 else: 123 pos, siz = self._addval(val) 124 self._index[key] = pos, siz 125 126 def __delitem__(self, key): 127 del self._index[key] 128 self._commit() 129 130 def keys(self): 131 return self._index.keys() 132 133 def has_key(self, key): 134 return key in self._index 135 136 def __contains__(self, key): 137 return key in self._index 138 139 def iterkeys(self): 140 return self._index.iterkeys() 141 __iter__ = iterkeys 142 143 def __len__(self): 144 return len(self._index) 145 146 def close(self): 147 self._commit() 148 self._index = None 149 self._datfile = self._dirfile = self._bakfile = None 150 151 def __del__(self): 152 if self._index is not None: 153 self._commit() 154 155 156 157def open(file, flag=None, mode=0666): 158 """Open the database file, filename, and return corresponding object. 159 160 The flag argument, used to control how the database is opened in the 161 other DBM implementations, is ignored in the dumbdbm module; the 162 database is always opened for update, and will be created if it does 163 not exist. 164 165 The optional mode argument is the UNIX mode of the file, used only when 166 the database has to be created. It defaults to octal code 0666 (and 167 will be modified by the prevailing umask). 168 169 """ 170 # flag argument is currently ignored 171 return _Database(file, mode) 172