dumbdbm.py revision 2dd1ed69b425d2f2ac8152548c7581aa1f01216d
1"""A dumb and slow but simple dbm clone.
2
3For database spam, spam.dir contains the index (a text file),
4spam.bak *may* contain a backup of the index (also a text file),
5while spam.dat contains the data (a binary file).
6
7XXX TO DO:
8
9- seems to contain a bug when updating...
10
11- reclaim free space (currently, space once occupied by deleted or expanded
12items is never reused)
13
14- support concurrent access (currently, if two processes take turns making
15updates, they can mess up the index)
16
17- support efficient access to large databases (currently, the whole index
18is read when the database is opened, and some updates rewrite the whole index)
19
20- support opening for read-only (flag = 'm')
21
22"""
23
24import os as _os
25import __builtin__
26import UserDict
27
28_open = __builtin__.open
29
30_BLOCKSIZE = 512
31
32error = IOError                         # For anydbm
33
34class _Database(UserDict.DictMixin):
35
36    def __init__(self, file, mode):
37        self._mode = mode
38        self._dirfile = file + _os.extsep + 'dir'
39        self._datfile = file + _os.extsep + 'dat'
40        self._bakfile = file + _os.extsep + 'bak'
41        # Mod by Jack: create data file if needed
42        try:
43            f = _open(self._datfile, 'r')
44        except IOError:
45            f = _open(self._datfile, 'w', self._mode)
46        f.close()
47        self._update()
48
49    def _update(self):
50        self._index = {}
51        try:
52            f = _open(self._dirfile)
53        except IOError:
54            pass
55        else:
56            while 1:
57                line = f.readline().rstrip()
58                if not line: break
59                key, (pos, siz) = eval(line)
60                self._index[key] = (pos, siz)
61            f.close()
62
63    def _commit(self):
64        try: _os.unlink(self._bakfile)
65        except _os.error: pass
66        try: _os.rename(self._dirfile, self._bakfile)
67        except _os.error: pass
68        f = _open(self._dirfile, 'w', self._mode)
69        for key, (pos, siz) in self._index.items():
70            f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
71        f.close()
72
73    def __getitem__(self, key):
74        pos, siz = self._index[key]     # may raise KeyError
75        f = _open(self._datfile, 'rb')
76        f.seek(pos)
77        dat = f.read(siz)
78        f.close()
79        return dat
80
81    def _addval(self, val):
82        f = _open(self._datfile, 'rb+')
83        f.seek(0, 2)
84        pos = int(f.tell())
85## Does not work under MW compiler
86##              pos = ((pos + _BLOCKSIZE - 1) / _BLOCKSIZE) * _BLOCKSIZE
87##              f.seek(pos)
88        npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
89        f.write('\0'*(npos-pos))
90        pos = npos
91
92        f.write(val)
93        f.close()
94        return (pos, len(val))
95
96    def _setval(self, pos, val):
97        f = _open(self._datfile, 'rb+')
98        f.seek(pos)
99        f.write(val)
100        f.close()
101        return (pos, len(val))
102
103    def _addkey(self, key, (pos, siz)):
104        self._index[key] = (pos, siz)
105        f = _open(self._dirfile, 'a', self._mode)
106        f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
107        f.close()
108
109    def __setitem__(self, key, val):
110        if not type(key) == type('') == type(val):
111            raise TypeError, "keys and values must be strings"
112        if not key in self._index:
113            (pos, siz) = self._addval(val)
114            self._addkey(key, (pos, siz))
115        else:
116            pos, siz = self._index[key]
117            oldblocks = (siz + _BLOCKSIZE - 1) / _BLOCKSIZE
118            newblocks = (len(val) + _BLOCKSIZE - 1) / _BLOCKSIZE
119            if newblocks <= oldblocks:
120                pos, siz = self._setval(pos, val)
121                self._index[key] = pos, siz
122            else:
123                pos, siz = self._addval(val)
124                self._index[key] = pos, siz
125
126    def __delitem__(self, key):
127        del self._index[key]
128        self._commit()
129
130    def keys(self):
131        return self._index.keys()
132
133    def has_key(self, key):
134        return key in self._index
135
136    def __contains__(self, key):
137        return key in self._index
138
139    def iterkeys(self):
140        return self._index.iterkeys()
141    __iter__ = iterkeys
142
143    def __len__(self):
144        return len(self._index)
145
146    def close(self):
147        self._commit()
148        self._index = None
149        self._datfile = self._dirfile = self._bakfile = None
150
151    def __del__(self):
152        if self._index is not None:
153            self._commit()
154
155
156
157def open(file, flag=None, mode=0666):
158    """Open the database file, filename, and return corresponding object.
159
160    The flag argument, used to control how the database is opened in the
161    other DBM implementations, is ignored in the dumbdbm module; the
162    database is always opened for update, and will be created if it does
163    not exist.
164
165    The optional mode argument is the UNIX mode of the file, used only when
166    the database has to be created.  It defaults to octal code 0666 (and
167    will be modified by the prevailing umask).
168
169    """
170    # flag argument is currently ignored
171    return _Database(file, mode)
172