dumbdbm.py revision 78349072f7c8f2d3da6f552665f9f381930c3c42
1"""A dumb and slow but simple dbm clone.
2
3For database spam, spam.dir contains the index (a text file),
4spam.bak *may* contain a backup of the index (also a text file),
5while spam.dat contains the data (a binary file).
6
7XXX TO DO:
8
9- seems to contain a bug when updating...
10
11- reclaim free space (currently, space once occupied by deleted or expanded
12items is never reused)
13
14- support concurrent access (currently, if two processes take turns making
15updates, they can mess up the index)
16
17- support efficient access to large databases (currently, the whole index
18is read when the database is opened, and some updates rewrite the whole index)
19
20- support opening for read-only (flag = 'm')
21
22"""
23
24_os = __import__('os')
25import __builtin__
26
27_open = __builtin__.open
28
29_BLOCKSIZE = 512
30
31error = IOError                         # For anydbm
32
33class _Database:
34
35    def __init__(self, file):
36        self._dirfile = file + '.dir'
37        self._datfile = file + '.dat'
38        self._bakfile = file + '.bak'
39        # Mod by Jack: create data file if needed
40        try:
41            f = _open(self._datfile, 'r')
42        except IOError:
43            f = _open(self._datfile, 'w')
44        f.close()
45        self._update()
46
47    def _update(self):
48        self._index = {}
49        try:
50            f = _open(self._dirfile)
51        except IOError:
52            pass
53        else:
54            while 1:
55                line = f.readline().rstrip()
56                if not line: break
57                key, (pos, siz) = eval(line)
58                self._index[key] = (pos, siz)
59            f.close()
60
61    def _commit(self):
62        try: _os.unlink(self._bakfile)
63        except _os.error: pass
64        try: _os.rename(self._dirfile, self._bakfile)
65        except _os.error: pass
66        f = _open(self._dirfile, 'w')
67        for key, (pos, siz) in self._index.items():
68            f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
69        f.close()
70
71    def __getitem__(self, key):
72        pos, siz = self._index[key]     # may raise KeyError
73        f = _open(self._datfile, 'rb')
74        f.seek(pos)
75        dat = f.read(siz)
76        f.close()
77        return dat
78
79    def _addval(self, val):
80        f = _open(self._datfile, 'rb+')
81        f.seek(0, 2)
82        pos = int(f.tell())
83## Does not work under MW compiler
84##              pos = ((pos + _BLOCKSIZE - 1) / _BLOCKSIZE) * _BLOCKSIZE
85##              f.seek(pos)
86        npos = ((pos + _BLOCKSIZE - 1) / _BLOCKSIZE) * _BLOCKSIZE
87        f.write('\0'*(npos-pos))
88        pos = npos
89
90        f.write(val)
91        f.close()
92        return (pos, len(val))
93
94    def _setval(self, pos, val):
95        f = _open(self._datfile, 'rb+')
96        f.seek(pos)
97        f.write(val)
98        f.close()
99        return (pos, len(val))
100
101    def _addkey(self, key, (pos, siz)):
102        self._index[key] = (pos, siz)
103        f = _open(self._dirfile, 'a')
104        f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
105        f.close()
106
107    def __setitem__(self, key, val):
108        if not type(key) == type('') == type(val):
109            raise TypeError, "keys and values must be strings"
110        if not self._index.has_key(key):
111            (pos, siz) = self._addval(val)
112            self._addkey(key, (pos, siz))
113        else:
114            pos, siz = self._index[key]
115            oldblocks = (siz + _BLOCKSIZE - 1) / _BLOCKSIZE
116            newblocks = (len(val) + _BLOCKSIZE - 1) / _BLOCKSIZE
117            if newblocks <= oldblocks:
118                pos, siz = self._setval(pos, val)
119                self._index[key] = pos, siz
120            else:
121                pos, siz = self._addval(val)
122                self._index[key] = pos, siz
123
124    def __delitem__(self, key):
125        del self._index[key]
126        self._commit()
127
128    def keys(self):
129        return self._index.keys()
130
131    def has_key(self, key):
132        return self._index.has_key(key)
133
134    def __len__(self):
135        return len(self._index)
136
137    def close(self):
138        self._index = None
139        self._datfile = self._dirfile = self._bakfile = None
140
141
142def open(file, flag = None, mode = None):
143    # flag, mode arguments are currently ignored
144    return _Database(file)
145