dumbdbm.py revision f421e81e4112809380542af378575756247eba00
1"""A dumb and slow but simple dbm clone.
2
3For database spam, spam.dir contains the index (a text file),
4spam.bak *may* contain a backup of the index (also a text file),
5while spam.dat contains the data (a binary file).
6
7XXX TO DO:
8
9- seems to contain a bug when updating...
10
11- reclaim free space (currently, space once occupied by deleted or expanded
12items is never reused)
13
14- support concurrent access (currently, if two processes take turns making
15updates, they can mess up the index)
16
17- support efficient access to large databases (currently, the whole index
18is read when the database is opened, and some updates rewrite the whole index)
19
20- support opening for read-only (flag = 'm')
21
22"""
23
24import os as _os
25import __builtin__
26
27_open = __builtin__.open
28
29_BLOCKSIZE = 512
30
31error = IOError                         # For anydbm
32
33class _Database:
34
35    def __init__(self, file, mode):
36        self._mode = mode
37        self._dirfile = file + _os.extsep + 'dir'
38        self._datfile = file + _os.extsep + 'dat'
39        self._bakfile = file + _os.extsep + 'bak'
40        # Mod by Jack: create data file if needed
41        try:
42            f = _open(self._datfile, 'r')
43        except IOError:
44            f = _open(self._datfile, 'w', self._mode)
45        f.close()
46        self._update()
47
48    def _update(self):
49        self._index = {}
50        try:
51            f = _open(self._dirfile)
52        except IOError:
53            pass
54        else:
55            while 1:
56                line = f.readline().rstrip()
57                if not line: break
58                key, (pos, siz) = eval(line)
59                self._index[key] = (pos, siz)
60            f.close()
61
62    def _commit(self):
63        try: _os.unlink(self._bakfile)
64        except _os.error: pass
65        try: _os.rename(self._dirfile, self._bakfile)
66        except _os.error: pass
67        f = _open(self._dirfile, 'w', self._mode)
68        for key, (pos, siz) in self._index.items():
69            f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
70        f.close()
71
72    def __getitem__(self, key):
73        pos, siz = self._index[key]     # may raise KeyError
74        f = _open(self._datfile, 'rb')
75        f.seek(pos)
76        dat = f.read(siz)
77        f.close()
78        return dat
79
80    def _addval(self, val):
81        f = _open(self._datfile, 'rb+')
82        f.seek(0, 2)
83        pos = int(f.tell())
84## Does not work under MW compiler
85##              pos = ((pos + _BLOCKSIZE - 1) / _BLOCKSIZE) * _BLOCKSIZE
86##              f.seek(pos)
87        npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
88        f.write('\0'*(npos-pos))
89        pos = npos
90
91        f.write(val)
92        f.close()
93        return (pos, len(val))
94
95    def _setval(self, pos, val):
96        f = _open(self._datfile, 'rb+')
97        f.seek(pos)
98        f.write(val)
99        f.close()
100        return (pos, len(val))
101
102    def _addkey(self, key, (pos, siz)):
103        self._index[key] = (pos, siz)
104        f = _open(self._dirfile, 'a', self._mode)
105        f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
106        f.close()
107
108    def __setitem__(self, key, val):
109        if not type(key) == type('') == type(val):
110            raise TypeError, "keys and values must be strings"
111        if not key in self._index:
112            (pos, siz) = self._addval(val)
113            self._addkey(key, (pos, siz))
114        else:
115            pos, siz = self._index[key]
116            oldblocks = (siz + _BLOCKSIZE - 1) / _BLOCKSIZE
117            newblocks = (len(val) + _BLOCKSIZE - 1) / _BLOCKSIZE
118            if newblocks <= oldblocks:
119                pos, siz = self._setval(pos, val)
120                self._index[key] = pos, siz
121            else:
122                pos, siz = self._addval(val)
123                self._index[key] = pos, siz
124
125    def __delitem__(self, key):
126        del self._index[key]
127        self._commit()
128
129    def keys(self):
130        return self._index.keys()
131
132    def has_key(self, key):
133        return key in self._index
134
135    def __contains__(self, key):
136        return key in self._index
137
138    def iterkeys(self):
139        return self._index.iterkeys()
140    __iter__ = iterkeys
141
142    def __len__(self):
143        return len(self._index)
144
145    def close(self):
146        self._commit()
147        self._index = None
148        self._datfile = self._dirfile = self._bakfile = None
149
150    def __del__(self):
151        if self._index is not None:
152            self._commit()
153
154
155
156def open(file, flag=None, mode=0666):
157    """Open the database file, filename, and return corresponding object.
158
159    The flag argument, used to control how the database is opened in the
160    other DBM implementations, is ignored in the dumbdbm module; the
161    database is always opened for update, and will be created if it does
162    not exist.
163
164    The optional mode argument is the UNIX mode of the file, used only when
165    the database has to be created.  It defaults to octal code 0666 (and
166    will be modified by the prevailing umask).
167
168    """
169    # flag argument is currently ignored
170    return _Database(file, mode)
171