13eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/*
23eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oURL: svn://svnanon.samba.org/samba/branches/SAMBA_4_0/source/lib/tdb/common
33eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oRev: 23590
43eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oLast Changed Date: 2007-06-22 13:36:10 -0400 (Fri, 22 Jun 2007)
5106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
6efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o /*
7106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   trivial database library - standalone version
8106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
9106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   Copyright (C) Andrew Tridgell              1999-2005
10106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   Copyright (C) Jeremy Allison               2000-2006
11106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   Copyright (C) Paul `Rusty' Russell         2000
12efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
13106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o     ** NOTE! The following LGPL license applies to the tdb
14106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o     ** library. This does NOT imply that all of Samba is released
15106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o     ** under the LGPL
16efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
17106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   This library is free software; you can redistribute it and/or
18106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   modify it under the terms of the GNU Lesser General Public
19106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   License as published by the Free Software Foundation; either
20106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   version 2 of the License, or (at your option) any later version.
21106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
22106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   This library is distributed in the hope that it will be useful,
23106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   but WITHOUT ANY WARRANTY; without even the implied warranty of
24106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   Lesser General Public License for more details.
26106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
27106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   You should have received a copy of the GNU Lesser General Public
28106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   License along with this library; if not, write to the Free Software
29106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
30106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
31106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
32106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#ifdef CONFIG_STAND_ALONE
33106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define HAVE_MMAP
34106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define HAVE_STRDUP
35106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define HAVE_SYS_MMAN_H
36106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define HAVE_UTIME_H
37106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define HAVE_UTIME
38106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
39ebabf2ad6d19af5c674b624bafe619dedbc94403Theodore Ts'o#define _XOPEN_SOURCE 600
40106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
41106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <unistd.h>
42106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <stdio.h>
43106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <stdlib.h>
44106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <stdarg.h>
45106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <stddef.h>
46106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <errno.h>
47106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <string.h>
48e7cc6f7d0b86d76963058ef099ca553da29d2c3fChristophe GRENIER#ifdef HAVE_SYS_SELECT_H
49106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <sys/select.h>
50e7cc6f7d0b86d76963058ef099ca553da29d2c3fChristophe GRENIER#endif
51106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <sys/time.h>
52106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <sys/types.h>
53106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <time.h>
54106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#ifdef HAVE_UTIME_H
55106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <utime.h>
56106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
57106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <sys/stat.h>
58106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <sys/file.h>
59106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <fcntl.h>
60106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
61106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#ifdef HAVE_SYS_MMAN_H
62106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include <sys/mman.h>
63106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
64106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
65106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#ifndef MAP_FILE
66106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define MAP_FILE 0
67106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
68106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
69106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#ifndef MAP_FAILED
70106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define MAP_FAILED ((void *)-1)
71106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
72106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
73106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#ifndef HAVE_STRDUP
74106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define strdup rep_strdup
75106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic char *rep_strdup(const char *s)
76106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
77106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	char *ret;
78106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int length;
79106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!s)
80106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return NULL;
81106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
82106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!length)
83106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		length = strlen(s);
84106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
85106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	ret = malloc(length + 1);
86106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (ret) {
87106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		strncpy(ret, s, length);
88106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		ret[length] = '\0';
89106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
90106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return ret;
91106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
92106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
93106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
94106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#ifndef PRINTF_ATTRIBUTE
95106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#if (__GNUC__ >= 3) && (__GNUC_MINOR__ >= 1 )
96106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/** Use gcc attribute to check printf fns.  a1 is the 1-based index of
97106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * the parameter containing the format, and a2 the index of the first
98106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * argument. Note that some gcc 2.x versions don't handle this
99106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * properly **/
100106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define PRINTF_ATTRIBUTE(a1, a2) __attribute__ ((format (__printf__, a1, a2)))
101106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#else
102106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define PRINTF_ATTRIBUTE(a1, a2)
103106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
104106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
105106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1063eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'otypedef int bool;
1073eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
108106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#include "tdb.h"
109106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
110e0ed7404719a9ddd2ba427a80db5365c8bad18c0JP Abgrallstatic TDB_DATA tdb_null;
111e0ed7404719a9ddd2ba427a80db5365c8bad18c0JP Abgrall
112106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#ifndef u32
113106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define u32 unsigned
114106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
115106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
116106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'otypedef u32 tdb_len_t;
117106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'otypedef u32 tdb_off_t;
118106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
119106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#ifndef offsetof
120106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define offsetof(t,f) ((unsigned int)&((t *)0)->f)
121106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
122106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
123106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_MAGIC_FOOD "TDB file\n"
124106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_VERSION (0x26011967 + 6)
125106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_MAGIC (0x26011999U)
126106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_FREE_MAGIC (~TDB_MAGIC)
127106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_DEAD_MAGIC (0xFEE1DEAD)
128106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_RECOVERY_MAGIC (0xf53bc0e7U)
129106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_ALIGNMENT 4
130106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGNMENT)
131106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define DEFAULT_HASH_SIZE 131
132106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define FREELIST_TOP (sizeof(struct tdb_header))
133106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_ALIGN(x,a) (((x) + (a)-1) & ~((a)-1))
134106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_BYTEREV(x) (((((x)&0xff)<<24)|((x)&0xFF00)<<8)|(((x)>>8)&0xFF00)|((x)>>24))
135106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_DEAD(r) ((r)->magic == TDB_DEAD_MAGIC)
136106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r))
137106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off_t))
138106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_HASHTABLE_SIZE(tdb) ((tdb->header.hash_size+1)*sizeof(tdb_off_t))
139106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_DATA_START(hash_size) TDB_HASH_TOP(hash_size-1)
140106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_RECOVERY_HEAD offsetof(struct tdb_header, recovery_start)
141106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_SEQNUM_OFS    offsetof(struct tdb_header, sequence_number)
142106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_PAD_BYTE 0x42
143106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_PAD_U32  0x42424242
144106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
145106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* NB assumes there is a local variable called "tdb" that is the
146106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * current context, also takes doubly-parenthesized print-style
147106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * argument. */
148106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TDB_LOG(x) tdb->log.log_fn x
149106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
150106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* lock offsets */
151106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define GLOBAL_LOCK      0
152106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define ACTIVE_LOCK      4
153106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define TRANSACTION_LOCK 8
154106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
155106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* free memory if the pointer is valid and zero the pointer */
156106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#ifndef SAFE_FREE
157106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define SAFE_FREE(x) do { if ((x) != NULL) {free(x); (x)=NULL;} } while(0)
158106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
159106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
160106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define BUCKET(hash) ((hash) % tdb->header.hash_size)
161106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
162106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define DOCONV() (tdb->flags & TDB_CONVERT)
163106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#define CONVERT(x) (DOCONV() ? tdb_convert(&x, sizeof(x)) : &x)
164106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
165106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
166106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* the body of the database is made of one list_struct for the free space
167106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   plus a separate data list for each hash value */
168106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostruct list_struct {
169106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t next; /* offset of the next record in the list */
170106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_len_t rec_len; /* total byte length of record */
171106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_len_t key_len; /* byte length of key */
172106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_len_t data_len; /* byte length of data */
173106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 full_hash; /* the full 32 bit hash of the key */
174106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 magic;   /* try to catch errors */
175106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* the following union is implied:
176106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		union {
177106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			char record[rec_len];
178106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			struct {
179106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				char key[key_len];
180106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				char data[data_len];
181106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
182106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			u32 totalsize; (tailer)
183106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
184106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	*/
185106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o};
186106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
187106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
188106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* this is stored at the front of every database */
189106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostruct tdb_header {
190106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	char magic_food[32]; /* for /etc/magic */
191106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 version; /* version of the code */
192106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 hash_size; /* number of hash entries */
193106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t rwlocks; /* obsolete - kept to detect old formats */
194106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t recovery_start; /* offset of transaction recovery region */
195106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t sequence_number; /* used when TDB_SEQNUM is set */
196106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t reserved[29];
197106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o};
198106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
199106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostruct tdb_lock_type {
200106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int list;
201106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 count;
202106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 ltype;
203106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o};
204106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
205106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostruct tdb_traverse_lock {
206106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_traverse_lock *next;
207106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 off;
208106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 hash;
209106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int lock_rw;
210106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o};
211106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
212106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
213106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostruct tdb_methods {
214106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int (*tdb_read)(struct tdb_context *, tdb_off_t , void *, tdb_len_t , int );
215106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int (*tdb_write)(struct tdb_context *, tdb_off_t, const void *, tdb_len_t);
216106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	void (*next_hash_chain)(struct tdb_context *, u32 *);
217106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int (*tdb_oob)(struct tdb_context *, tdb_off_t , int );
218106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int (*tdb_expand_file)(struct tdb_context *, tdb_off_t , tdb_off_t );
219106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int (*tdb_brlock)(struct tdb_context *, tdb_off_t , int, int, int, size_t);
220106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o};
221106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
222106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostruct tdb_context {
223106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	char *name; /* the name of the database */
224106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	void *map_ptr; /* where it is currently mapped */
225106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int fd; /* open file descriptor for the database */
226106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_len_t map_size; /* how much space has been mapped */
227106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int read_only; /* opened read-only */
228106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int traverse_read; /* read-only traversal */
229106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_lock_type global_lock;
230106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int num_lockrecs;
231106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */
232106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	enum TDB_ERROR ecode; /* error code for last tdb error */
233106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_header header; /* a cached copy of the header */
234106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 flags; /* the flags passed to tdb_open */
235106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_traverse_lock travlocks; /* current traversal locks */
236106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_context *next; /* all tdbs to avoid multiple opens */
237106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	dev_t device;	/* uniquely identifies this tdb */
238106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	ino_t inode;	/* uniquely identifies this tdb */
239106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_logging_context log;
240106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	unsigned int (*hash_fn)(TDB_DATA *key);
241106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int open_flags; /* flags used in the open - needed by reopen */
242106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	unsigned int num_locks; /* number of chain locks held */
243106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	const struct tdb_methods *methods;
244106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_transaction *transaction;
245106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int page_size;
246106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int max_dead_records;
2473eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	bool have_transaction_lock;
248106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o};
249106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
250106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
251106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
252106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  internal prototypes
253106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
254106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_munmap(struct tdb_context *tdb);
255106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic void tdb_mmap(struct tdb_context *tdb);
256106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_lock(struct tdb_context *tdb, int list, int ltype);
257106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_unlock(struct tdb_context *tdb, int list, int ltype);
258106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset, int rw_type, int lck_type, int probe, size_t len);
2593eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'ostatic int tdb_transaction_lock(struct tdb_context *tdb, int ltype);
2603eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'ostatic int tdb_transaction_unlock(struct tdb_context *tdb);
261106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_brlock_upgrade(struct tdb_context *tdb, tdb_off_t offset, size_t len);
262106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_write_lock_record(struct tdb_context *tdb, tdb_off_t off);
263106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_write_unlock_record(struct tdb_context *tdb, tdb_off_t off);
264106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
265106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
266106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic void *tdb_convert(void *buf, u32 size);
267106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec);
268106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_struct *rec);
269106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
270106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
271106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off);
272106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off);
273106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec);
274106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec);
275106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct *rec);
276106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len);
277106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
278106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   tdb_off_t offset, tdb_len_t len,
279106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   int (*parser)(TDB_DATA key, TDB_DATA data,
280106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 void *private_data),
281106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   void *private_data);
282106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, int locktype,
283106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   struct list_struct *rec);
284106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic void tdb_io_init(struct tdb_context *tdb);
285106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_expand(struct tdb_context *tdb, tdb_off_t size);
2863eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'ostatic int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off,
2873eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		      struct list_struct *rec);
288106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
289106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
290106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* file: error.c */
291106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
292106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oenum TDB_ERROR tdb_error(struct tdb_context *tdb)
293106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
294106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->ecode;
295106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
296106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
297106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic struct tdb_errname {
298106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	enum TDB_ERROR ecode; const char *estring;
299106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o} emap[] = { {TDB_SUCCESS, "Success"},
300106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	     {TDB_ERR_CORRUPT, "Corrupt database"},
301106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	     {TDB_ERR_IO, "IO Error"},
302106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	     {TDB_ERR_LOCK, "Locking error"},
303106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	     {TDB_ERR_OOM, "Out of memory"},
304106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	     {TDB_ERR_EXISTS, "Record exists"},
305106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	     {TDB_ERR_NOLOCK, "Lock exists on other keys"},
306106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	     {TDB_ERR_EINVAL, "Invalid parameter"},
307106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	     {TDB_ERR_NOEXIST, "Record does not exist"},
308106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	     {TDB_ERR_RDONLY, "write not permitted"} };
309106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
310106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* Error string for the last tdb error */
311106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oconst char *tdb_errorstr(struct tdb_context *tdb)
312106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
313106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 i;
314106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (i = 0; i < sizeof(emap) / sizeof(struct tdb_errname); i++)
315106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->ecode == emap[i].ecode)
316106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return emap[i].estring;
317106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return "Invalid error code";
318106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
319106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
320106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* file: lock.c */
321106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3223eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o#define TDB_MARK_LOCK 0x80000000
3233eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
324106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* a byte range locking function - return 0 on success
325106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   this functions locks/unlocks 1 byte at the specified offset.
326106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
327106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   On error, errno is also set so that errors are passed back properly
328efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o   through tdb_open().
329106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
330106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   note that a len of zero means lock to end of file
331106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
332efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'oint tdb_brlock(struct tdb_context *tdb, tdb_off_t offset,
333106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	       int rw_type, int lck_type, int probe, size_t len)
334106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
335106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct flock fl;
336106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int ret;
337106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
338106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->flags & TDB_NOLOCK) {
339106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
340106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
341106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
342106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if ((rw_type == F_WRLCK) && (tdb->read_only || tdb->traverse_read)) {
343106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_RDONLY;
344106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
345106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
346106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
347106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	fl.l_type = rw_type;
348106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	fl.l_whence = SEEK_SET;
349106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	fl.l_start = offset;
350106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	fl.l_len = len;
351106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	fl.l_pid = 0;
352106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
353106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	do {
354106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		ret = fcntl(tdb->fd,lck_type,&fl);
355106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} while (ret == -1 && errno == EINTR);
356106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
357106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (ret == -1) {
358106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Generic lock error. errno set by fcntl.
359106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * EAGAIN is an expected return from non-blocking
360106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * locks. */
361106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (!probe && lck_type != F_SETLK) {
362106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* Ensure error code is set for log fun to examine. */
363106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->ecode = TDB_ERR_LOCK;
364efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d len=%d\n",
365106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 tdb->fd, offset, rw_type, lck_type, (int)len));
366106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
367106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_LOCK, -1);
368106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
369106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
370106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
371106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
372106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
373106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
374106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  upgrade a read lock to a write lock. This needs to be handled in a
375106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  special way as some OSes (such as solaris) have too conservative
376106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  deadlock detection and claim a deadlock when progress can be
377efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o  made. For those OSes we may loop for a while.
378106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
379106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_brlock_upgrade(struct tdb_context *tdb, tdb_off_t offset, size_t len)
380106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
381106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int count = 1000;
382106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (count--) {
383106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		struct timeval tv;
384106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_brlock(tdb, offset, F_WRLCK, F_SETLKW, 1, len) == 0) {
385106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return 0;
386106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
387106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (errno != EDEADLK) {
388106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			break;
389106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
390106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* sleep for as short a time as we can - more portable than usleep() */
391106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tv.tv_sec = 0;
392106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tv.tv_usec = 1;
393106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		select(0, NULL, NULL, NULL, &tv);
394106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
395106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock_upgrade failed at offset %d\n", offset));
396106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return -1;
397106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
398106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
399106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
400106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* lock a list in the database. list -1 is the alloc list */
4013eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'ostatic int _tdb_lock(struct tdb_context *tdb, int list, int ltype, int op)
402106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
403106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_lock_type *new_lck;
404106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int i;
4053eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK);
4063eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
4073eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	ltype &= ~TDB_MARK_LOCK;
408106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
409106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* a global lock allows us to avoid per chain locks */
410efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	if (tdb->global_lock.count &&
411106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    (ltype == tdb->global_lock.ltype || ltype == F_RDLCK)) {
412106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
413106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
414106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
415106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->global_lock.count) {
416106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_LOCK, -1);
417106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
418106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
419106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (list < -1 || list >= (int)tdb->header.hash_size) {
420efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_lock: invalid list %d for ltype=%d\n",
421106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   list, ltype));
422106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
423106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
424106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->flags & TDB_NOLOCK)
425106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
426106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
427106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (i=0; i<tdb->num_lockrecs; i++) {
428106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->lockrecs[i].list == list) {
429106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (tdb->lockrecs[i].count == 0) {
430106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				/*
431106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 * Can't happen, see tdb_unlock(). It should
432106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 * be an assert.
433106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 */
434106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_lock: "
435106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o					 "lck->count == 0 for list %d", list));
436106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
437106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/*
438106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 * Just increment the in-memory struct, posix locks
439106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 * don't stack.
440106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 */
441106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->lockrecs[i].count++;
442106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return 0;
443106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
444106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
445106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
446106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	new_lck = (struct tdb_lock_type *)realloc(
447106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->lockrecs,
448106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		sizeof(*tdb->lockrecs) * (tdb->num_lockrecs+1));
449106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (new_lck == NULL) {
450106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		errno = ENOMEM;
451106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
452106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
453106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->lockrecs = new_lck;
454106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
455106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Since fcntl locks don't nest, we do a lock for the first one,
456106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   and simply bump the count for future ones */
4573eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (!mark_lock &&
4583eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	    tdb->methods->tdb_brlock(tdb,FREELIST_TOP+4*list, ltype, op,
459106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				     0, 1)) {
460106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
461106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
462106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
463106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->num_locks++;
464106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
465106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->lockrecs[tdb->num_lockrecs].list = list;
466106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->lockrecs[tdb->num_lockrecs].count = 1;
467106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->lockrecs[tdb->num_lockrecs].ltype = ltype;
468106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->num_lockrecs += 1;
469106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
470106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
471106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
472106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4733eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/* lock a list in the database. list -1 is the alloc list */
4743eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oint tdb_lock(struct tdb_context *tdb, int list, int ltype)
4753eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o{
4763eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	int ret;
4773eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	ret = _tdb_lock(tdb, list, ltype, F_SETLKW);
4783eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (ret) {
4793eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_lock failed on list %d "
4803eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o			 "ltype=%d (%s)\n",  list, ltype, strerror(errno)));
4813eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	}
4823eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	return ret;
4833eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o}
4843eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
4853eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/* lock a list in the database. list -1 is the alloc list. non-blocking lock */
4863eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oint tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype)
4873eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o{
4883eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	return _tdb_lock(tdb, list, ltype, F_SETLK);
4893eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o}
4903eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
4913eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
492106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* unlock the database: returns void because it's too late for errors. */
493106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* changed to return int it may be interesting to know there
494106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   has been an error  --simo */
495106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_unlock(struct tdb_context *tdb, int list, int ltype)
496106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
497106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int ret = -1;
498106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int i;
499106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_lock_type *lck = NULL;
5003eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK);
5013eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
5023eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	ltype &= ~TDB_MARK_LOCK;
503106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
504106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* a global lock allows us to avoid per chain locks */
505efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	if (tdb->global_lock.count &&
506106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    (ltype == tdb->global_lock.ltype || ltype == F_RDLCK)) {
507106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
508106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
509106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
510106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->global_lock.count) {
511106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_LOCK, -1);
512106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
513106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
514106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->flags & TDB_NOLOCK)
515106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
516106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
517106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Sanity checks */
518106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (list < -1 || list >= (int)tdb->header.hash_size) {
519106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: list %d invalid (%d)\n", list, tdb->header.hash_size));
520106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return ret;
521106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
522106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
523106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (i=0; i<tdb->num_lockrecs; i++) {
524106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->lockrecs[i].list == list) {
525106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			lck = &tdb->lockrecs[i];
526106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			break;
527106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
528106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
529106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
530106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if ((lck == NULL) || (lck->count == 0)) {
531106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: count is 0\n"));
532106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
533106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
534106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
535106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (lck->count > 1) {
536106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		lck->count--;
537106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
538106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
539106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
540106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/*
541106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * This lock has count==1 left, so we need to unlock it in the
542106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * kernel. We don't bother with decrementing the in-memory array
543106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * element, we're about to overwrite it with the last array element
544106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * anyway.
545106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 */
546106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
5473eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (mark_lock) {
5483eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		ret = 0;
5493eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	} else {
5503eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		ret = tdb->methods->tdb_brlock(tdb, FREELIST_TOP+4*list, F_UNLCK,
5513eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o					       F_SETLKW, 0, 1);
5523eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	}
553106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->num_locks--;
554106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
555106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/*
556106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * Shrink the array by overwriting the element just unlocked with the
557106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * last array element.
558106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 */
559106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
560106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->num_lockrecs > 1) {
561106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		*lck = tdb->lockrecs[tdb->num_lockrecs-1];
562106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
563106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->num_lockrecs -= 1;
564106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
565106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/*
566106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * We don't bother with realloc when the array shrinks, but if we have
567106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * a completely idle tdb we should get rid of the locked array.
568106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 */
569106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
570106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->num_lockrecs == 0) {
571106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		SAFE_FREE(tdb->lockrecs);
572106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
573106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
574106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (ret)
575efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: An error occurred unlocking!\n"));
576106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return ret;
577106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
578106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
5793eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/*
5803eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o  get the transaction lock
5813eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o */
5823eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oint tdb_transaction_lock(struct tdb_context *tdb, int ltype)
5833eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o{
5843eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (tdb->have_transaction_lock || tdb->global_lock.count) {
5853eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		return 0;
5863eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	}
587efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	if (tdb->methods->tdb_brlock(tdb, TRANSACTION_LOCK, ltype,
5883eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o				     F_SETLKW, 0, 1) == -1) {
5893eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_lock: failed to get transaction lock\n"));
5903eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		tdb->ecode = TDB_ERR_LOCK;
5913eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		return -1;
5923eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	}
5933eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	tdb->have_transaction_lock = 1;
5943eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	return 0;
5953eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o}
5963eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
5973eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/*
5983eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o  release the transaction lock
5993eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o */
6003eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oint tdb_transaction_unlock(struct tdb_context *tdb)
6013eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o{
6023eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	int ret;
6033eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (!tdb->have_transaction_lock) {
6043eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		return 0;
6053eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	}
6063eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	ret = tdb->methods->tdb_brlock(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0, 1);
6073eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (ret == 0) {
6083eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		tdb->have_transaction_lock = 0;
6093eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	}
6103eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	return ret;
6113eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o}
6123eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
6133eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
614106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
615106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
616106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* lock/unlock entire database */
6173eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'ostatic int _tdb_lockall(struct tdb_context *tdb, int ltype, int op)
618106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
6193eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK);
6203eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
6213eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	ltype &= ~TDB_MARK_LOCK;
6223eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
623106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* There are no locks on read-only dbs */
624106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->read_only || tdb->traverse_read)
625106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_LOCK, -1);
626106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
627106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->global_lock.count && tdb->global_lock.ltype == ltype) {
628106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->global_lock.count++;
629106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
630106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
631106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
632106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->global_lock.count) {
633106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* a global lock of a different type exists */
634106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_LOCK, -1);
635106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
636efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
637106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->num_locks != 0) {
638106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* can't combine global and chain locks */
639106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_LOCK, -1);
640106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
641106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
6423eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (!mark_lock &&
6433eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	    tdb->methods->tdb_brlock(tdb, FREELIST_TOP, ltype, op,
644106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				     0, 4*tdb->header.hash_size)) {
6453eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		if (op == F_SETLKW) {
6463eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_lockall failed (%s)\n", strerror(errno)));
6473eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		}
648106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
649106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
650106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
651106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->global_lock.count = 1;
652106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->global_lock.ltype = ltype;
653106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
654106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
655106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
656106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
6573eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
6583eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
659106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* unlock entire db */
660106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int _tdb_unlockall(struct tdb_context *tdb, int ltype)
661106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
6623eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK);
6633eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
6643eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	ltype &= ~TDB_MARK_LOCK;
6653eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
666106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* There are no locks on read-only dbs */
667106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->read_only || tdb->traverse_read) {
668106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_LOCK, -1);
669106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
670106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
671106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->global_lock.ltype != ltype || tdb->global_lock.count == 0) {
672106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_LOCK, -1);
673106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
674106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
675106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->global_lock.count > 1) {
676106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->global_lock.count--;
677106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
678106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
679106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
6803eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (!mark_lock &&
681efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	    tdb->methods->tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW,
682106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				     0, 4*tdb->header.hash_size)) {
683106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed (%s)\n", strerror(errno)));
684106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
685106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
686106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
687106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->global_lock.count = 0;
688106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->global_lock.ltype = 0;
689106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
690106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
691106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
692106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
693106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* lock entire database with write lock */
694106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_lockall(struct tdb_context *tdb)
695106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
6963eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	return _tdb_lockall(tdb, F_WRLCK, F_SETLKW);
6973eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o}
6983eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
6993eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/* lock entire database with write lock - mark only */
7003eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oint tdb_lockall_mark(struct tdb_context *tdb)
7013eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o{
7023eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	return _tdb_lockall(tdb, F_WRLCK | TDB_MARK_LOCK, F_SETLKW);
7033eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o}
7043eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
7053eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/* unlock entire database with write lock - unmark only */
7063eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oint tdb_lockall_unmark(struct tdb_context *tdb)
7073eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o{
7083eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	return _tdb_unlockall(tdb, F_WRLCK | TDB_MARK_LOCK);
7093eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o}
7103eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
7113eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/* lock entire database with write lock - nonblocking varient */
7123eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oint tdb_lockall_nonblock(struct tdb_context *tdb)
7133eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o{
7143eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	return _tdb_lockall(tdb, F_WRLCK, F_SETLK);
715106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
716106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
717106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* unlock entire database with write lock */
718106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_unlockall(struct tdb_context *tdb)
719106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
720106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return _tdb_unlockall(tdb, F_WRLCK);
721106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
722106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
723106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* lock entire database with read lock */
724106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_lockall_read(struct tdb_context *tdb)
725106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
7263eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	return _tdb_lockall(tdb, F_RDLCK, F_SETLKW);
7273eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o}
7283eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
7293eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/* lock entire database with read lock - nonblock varient */
7303eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oint tdb_lockall_read_nonblock(struct tdb_context *tdb)
7313eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o{
7323eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	return _tdb_lockall(tdb, F_RDLCK, F_SETLK);
733106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
734106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
735106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* unlock entire database with read lock */
736106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_unlockall_read(struct tdb_context *tdb)
737106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
738106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return _tdb_unlockall(tdb, F_RDLCK);
739106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
740106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
741106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* lock/unlock one hash chain. This is meant to be used to reduce
742106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   contention - it cannot guarantee how many records will be locked */
743106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_chainlock(struct tdb_context *tdb, TDB_DATA key)
744106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
745106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK);
746106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
747106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
7483eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/* lock/unlock one hash chain, non-blocking. This is meant to be used
7493eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o   to reduce contention - it cannot guarantee how many records will be
7503eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o   locked */
7513eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oint tdb_chainlock_nonblock(struct tdb_context *tdb, TDB_DATA key)
7523eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o{
7533eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	return tdb_lock_nonblock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK);
7543eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o}
7553eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
7563eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/* mark a chain as locked without actually locking it. Warning! use with great caution! */
7573eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oint tdb_chainlock_mark(struct tdb_context *tdb, TDB_DATA key)
7583eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o{
7593eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	return tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK | TDB_MARK_LOCK);
7603eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o}
7613eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
7623eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/* unmark a chain as locked without actually locking it. Warning! use with great caution! */
7633eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'oint tdb_chainlock_unmark(struct tdb_context *tdb, TDB_DATA key)
7643eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o{
7653eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK | TDB_MARK_LOCK);
7663eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o}
7673eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
768106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key)
769106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
770106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK);
771106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
772106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
773106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key)
774106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
775106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK);
776106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
777106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
778106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key)
779106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
780106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK);
781106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
782106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
783106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
784106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
785106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* record lock stops delete underneath */
786106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_lock_record(struct tdb_context *tdb, tdb_off_t off)
787106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
788106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return off ? tdb->methods->tdb_brlock(tdb, off, F_RDLCK, F_SETLKW, 0, 1) : 0;
789106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
790106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
791106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
792106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  Write locks override our own fcntl readlocks, so check it here.
793106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not
794106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  an error to fail to get the lock here.
795106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
796106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_write_lock_record(struct tdb_context *tdb, tdb_off_t off)
797106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
798106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_traverse_lock *i;
799106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (i = &tdb->travlocks; i; i = i->next)
800106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (i->off == off)
801106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
802106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->methods->tdb_brlock(tdb, off, F_WRLCK, F_SETLK, 1, 1);
803106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
804106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
805106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
806106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not
807106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  an error to fail to get the lock here.
808106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
809106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_write_unlock_record(struct tdb_context *tdb, tdb_off_t off)
810106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
811106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->methods->tdb_brlock(tdb, off, F_UNLCK, F_SETLK, 0, 1);
812106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
813106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
814106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* fcntl locks don't stack: avoid unlocking someone else's */
815106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off)
816106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
817106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_traverse_lock *i;
818106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 count = 0;
819106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
820106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (off == 0)
821106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
822106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (i = &tdb->travlocks; i; i = i->next)
823106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (i->off == off)
824106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			count++;
825106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return (count == 1 ? tdb->methods->tdb_brlock(tdb, off, F_UNLCK, F_SETLKW, 0, 1) : 0);
826106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
827106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
828106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* file: io.c */
829106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
830106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* check for an out of bounds access - if it is out of bounds then
831106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   see if the database has been expanded by someone else and expand
832efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o   if necessary
833106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   note that "len" is the minimum length needed for the db
834106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
835106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
836106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
837106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct stat st;
838106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (len <= tdb->map_size)
839106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
840106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->flags & TDB_INTERNAL) {
841106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (!probe) {
842106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* Ensure ecode is set for log fn. */
843106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->ecode = TDB_ERR_IO;
844106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond internal malloc size %d\n",
845106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 (int)len, (int)tdb->map_size));
846106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
847106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_IO, -1);
848106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
849106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
850106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (fstat(tdb->fd, &st) == -1) {
851106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_IO, -1);
852106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
853106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
854106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (st.st_size < (size_t)len) {
855106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (!probe) {
856106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* Ensure ecode is set for log fn. */
857106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->ecode = TDB_ERR_IO;
858106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond eof at %d\n",
859106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 (int)len, (int)st.st_size));
860106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
861106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_IO, -1);
862106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
863106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
864106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Unmap, update size, remap */
865106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_munmap(tdb) == -1)
866106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_IO, -1);
867106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->map_size = st.st_size;
868106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_mmap(tdb);
869106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
870106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
871106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
872106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* write a lump of data at a specified offset */
873efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'ostatic int tdb_write(struct tdb_context *tdb, tdb_off_t off,
874106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		     const void *buf, tdb_len_t len)
875106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
876106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (len == 0) {
877106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
878106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
879106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
880106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->read_only || tdb->traverse_read) {
881106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_RDONLY;
882106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
883106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
884106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
885106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
886106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
887106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
888106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->map_ptr) {
889106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		memcpy(off + (char *)tdb->map_ptr, buf, len);
890106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) {
891106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Ensure ecode is set for log fn. */
892106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
893106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d len=%d (%s)\n",
894106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   off, len, strerror(errno)));
895106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_IO, -1);
896106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
897106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
898106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
899106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
900106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* Endian conversion: we only ever deal with 4 byte quantities */
901106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ovoid *tdb_convert(void *buf, u32 size)
902106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
903106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 i, *p = (u32 *)buf;
904106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (i = 0; i < size / 4; i++)
905106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		p[i] = TDB_BYTEREV(p[i]);
906106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return buf;
907106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
908106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
909106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
910106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* read a lump of data at a specified offset, maybe convert */
911efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'ostatic int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
912106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		    tdb_len_t len, int cv)
913106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
914106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) {
915106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
916106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
917106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
918106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->map_ptr) {
919106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		memcpy(buf, off + (char *)tdb->map_ptr, len);
920106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} else {
921106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		ssize_t ret = pread(tdb->fd, buf, len, off);
922106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (ret != (ssize_t)len) {
923106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* Ensure ecode is set for log fn. */
924106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->ecode = TDB_ERR_IO;
925106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %d "
926106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 "len=%d ret=%d (%s) map_size=%d\n",
927106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 (int)off, (int)len, (int)ret, strerror(errno),
928106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 (int)tdb->map_size));
929106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return TDB_ERRCODE(TDB_ERR_IO, -1);
930106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
931106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
932106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (cv) {
933106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_convert(buf, len);
934106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
935106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
936106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
937106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
938106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
939106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
940106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
941106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  do an unlocked scan of the hash table heads to find the next non-zero head. The value
942106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  will then be confirmed with the lock held
943efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o*/
944106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic void tdb_next_hash_chain(struct tdb_context *tdb, u32 *chain)
945106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
946106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 h = *chain;
947106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->map_ptr) {
948106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		for (;h < tdb->header.hash_size;h++) {
949106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (0 != *(u32 *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
950106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				break;
951106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
952106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
953106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} else {
954106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		u32 off=0;
955106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		for (;h < tdb->header.hash_size;h++) {
956106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
957106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				break;
958106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
959106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
960106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
961106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	(*chain) = h;
962106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
963106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
964106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
965106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_munmap(struct tdb_context *tdb)
966106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
967106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->flags & TDB_INTERNAL)
968106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
969106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
970106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#ifdef HAVE_MMAP
971106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->map_ptr) {
972106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		int ret = munmap(tdb->map_ptr, tdb->map_size);
973106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (ret != 0)
974106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return ret;
975106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
976106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
977106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->map_ptr = NULL;
978106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
979106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
980106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
981106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ovoid tdb_mmap(struct tdb_context *tdb)
982106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
983106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->flags & TDB_INTERNAL)
984106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return;
985106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
986106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#ifdef HAVE_MMAP
987106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(tdb->flags & TDB_NOMMAP)) {
988efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		tdb->map_ptr = mmap(NULL, tdb->map_size,
989efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o				    PROT_READ|(tdb->read_only? 0:PROT_WRITE),
990106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				    MAP_SHARED|MAP_FILE, tdb->fd, 0);
991106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
992106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/*
993106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
994106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 */
995106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
996106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->map_ptr == MAP_FAILED) {
997106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->map_ptr = NULL;
998efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %d (%s)\n",
999106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 tdb->map_size, strerror(errno)));
1000106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1001106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} else {
1002106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->map_ptr = NULL;
1003106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1004106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#else
1005106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->map_ptr = NULL;
1006106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
1007106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1008106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1009106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* expand a file.  we prefer to use ftruncate, as that is what posix
1010106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  says to use for mmap expansion */
1011106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition)
1012106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1013106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	char buf[1024];
1014106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1015106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->read_only || tdb->traverse_read) {
1016106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_RDONLY;
1017106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1018106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1019106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1020106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (ftruncate(tdb->fd, size+addition) == -1) {
1021106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		char b = 0;
1022106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (pwrite(tdb->fd,  &b, 1, (size+addition) - 1) != 1) {
1023efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %d failed (%s)\n",
1024106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 size+addition, strerror(errno)));
1025106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
1026106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1027106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1028106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1029106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* now fill the file with something. This ensures that the
1030106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   file isn't sparse, which would be very bad if we ran out of
1031106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   disk. This must be done with write, not via mmap */
1032106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	memset(buf, TDB_PAD_BYTE, sizeof(buf));
1033106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (addition) {
1034106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		int n = addition>sizeof(buf)?sizeof(buf):addition;
1035106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		int ret = pwrite(tdb->fd, buf, n, size);
1036106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (ret != n) {
1037efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of %d failed (%s)\n",
1038106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				   n, strerror(errno)));
1039106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
1040106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1041106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		addition -= n;
1042106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		size += n;
1043106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1044106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
1045106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1046106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1047106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1048106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* expand the database at least size bytes by expanding the underlying
1049106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   file and doing the mmap again if necessary */
1050106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_expand(struct tdb_context *tdb, tdb_off_t size)
1051106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1052106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
1053106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t offset;
1054106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1055106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
1056106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n"));
1057106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1058106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1059106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1060106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* must know about any previous expansions by another process */
1061106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
1062106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1063106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* always make room for at least 10 more records, and round
1064106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o           the database up to a multiple of the page size */
1065106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	size = TDB_ALIGN(tdb->map_size + size*10, tdb->page_size) - tdb->map_size;
1066106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1067106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(tdb->flags & TDB_INTERNAL))
1068106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_munmap(tdb);
1069106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1070106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/*
1071106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * We must ensure the file is unmapped before doing this
1072106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * to ensure consistency with systems like OpenBSD where
1073106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * writes and mmaps are not consistent.
1074106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 */
1075106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1076106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* expand the file itself */
1077106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(tdb->flags & TDB_INTERNAL)) {
1078106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0)
1079106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
1080106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1081106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1082106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->map_size += size;
1083106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1084106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->flags & TDB_INTERNAL) {
1085106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		char *new_map_ptr = (char *)realloc(tdb->map_ptr,
1086106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o						    tdb->map_size);
1087106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (!new_map_ptr) {
1088106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->map_size -= size;
1089106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
1090106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1091106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->map_ptr = new_map_ptr;
1092106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} else {
1093106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/*
1094106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * We must ensure the file is remapped before adding the space
1095106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * to ensure consistency with systems like OpenBSD where
1096106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * writes and mmaps are not consistent.
1097106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 */
1098106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1099106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* We're ok if the mmap fails as we'll fallback to read/write */
1100106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_mmap(tdb);
1101106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1102106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1103106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* form a new freelist record */
1104106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	memset(&rec,'\0',sizeof(rec));
1105106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec.rec_len = size - sizeof(rec);
1106106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1107106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* link it into the free list */
1108106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	offset = tdb->map_size - size;
1109106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_free(tdb, offset, &rec) == -1)
1110106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
1111106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1112106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_unlock(tdb, -1, F_WRLCK);
1113106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
1114106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o fail:
1115106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_unlock(tdb, -1, F_WRLCK);
1116106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return -1;
1117106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1118106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1119106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* read/write a tdb_off_t */
1120106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
1121106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1122106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
1123106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1124106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1125106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
1126106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1127106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t off = *d;
1128106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
1129106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1130106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1131106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1132106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* read a lump of data, allocating the space for it */
1133106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ounsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
1134106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1135106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	unsigned char *buf;
1136106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1137106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* some systems don't like zero length malloc */
1138106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (len == 0) {
1139106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		len = 1;
1140106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1141106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1142106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(buf = (unsigned char *)malloc(len))) {
1143106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Ensure ecode is set for log fn. */
1144106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_OOM;
1145106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%d (%s)\n",
1146106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   len, strerror(errno)));
1147106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_OOM, buf);
1148106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1149106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->methods->tdb_read(tdb, offset, buf, len, 0) == -1) {
1150106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		SAFE_FREE(buf);
1151106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return NULL;
1152106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1153106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return buf;
1154106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1155106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1156106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* Give a piece of tdb data to a parser */
1157106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1158106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
1159106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   tdb_off_t offset, tdb_len_t len,
1160106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   int (*parser)(TDB_DATA key, TDB_DATA data,
1161106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 void *private_data),
1162106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   void *private_data)
1163106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1164106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	TDB_DATA data;
1165106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int result;
1166106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1167106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	data.dsize = len;
1168106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1169106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if ((tdb->transaction == NULL) && (tdb->map_ptr != NULL)) {
1170106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/*
1171106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * Optimize by avoiding the malloc/memcpy/free, point the
1172106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * parser directly at the mmap area.
1173106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 */
1174106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->methods->tdb_oob(tdb, offset+len, 0) != 0) {
1175106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
1176106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1177106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		data.dptr = offset + (unsigned char *)tdb->map_ptr;
1178106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return parser(key, data, private_data);
1179106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1180106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1181106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) {
1182106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1183106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1184106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1185106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	result = parser(key, data, private_data);
1186106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	free(data.dptr);
1187106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return result;
1188106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1189106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1190106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* read/write a record */
1191106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
1192106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1193106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
1194106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1195106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (TDB_BAD_MAGIC(rec)) {
1196106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Ensure ecode is set for log fn. */
1197106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_CORRUPT;
1198106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
1199106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
1200106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1201106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
1202106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1203106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1204106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
1205106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1206106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct r = *rec;
1207106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
1208106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1209106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1210106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic const struct tdb_methods io_methods = {
1211106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_read,
1212106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_write,
1213106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_next_hash_chain,
1214106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_oob,
1215106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_expand_file,
1216106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_brlock
1217106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o};
1218106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1219106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1220106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  initialise the default methods table
1221106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1222106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ovoid tdb_io_init(struct tdb_context *tdb)
1223106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1224106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->methods = &io_methods;
1225106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1226106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1227106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* file: transaction.c */
1228106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1229106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1230106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  transaction design:
1231106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1232106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  - only allow a single transaction at a time per database. This makes
1233106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    using the transaction API simpler, as otherwise the caller would
1234106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    have to cope with temporary failures in transactions that conflict
1235106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    with other current transactions
1236106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1237106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  - keep the transaction recovery information in the same file as the
1238106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    database, using a special 'transaction recovery' record pointed at
1239106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    by the header. This removes the need for extra journal files as
1240106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    used by some other databases
1241106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1242106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  - dynamically allocated the transaction recover record, re-using it
1243106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    for subsequent transactions. If a larger record is needed then
1244106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    tdb_free() the old record to place it on the normal tdb freelist
1245106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    before allocating the new record
1246106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1247106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  - during transactions, keep a linked list of writes all that have
1248106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    been performed by intercepting all tdb_write() calls. The hooked
1249106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    transaction versions of tdb_read() and tdb_write() check this
1250106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    linked list and try to use the elements of the list in preference
1251106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    to the real database.
1252106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1253106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  - don't allow any locks to be held when a transaction starts,
1254106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    otherwise we can end up with deadlock (plus lack of lock nesting
1255106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    in posix locks would mean the lock is lost)
1256106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1257106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  - if the caller gains a lock during the transaction but doesn't
1258106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    release it then fail the commit
1259106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1260106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  - allow for nested calls to tdb_transaction_start(), re-using the
1261106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    existing transaction record. If the inner transaction is cancelled
1262106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    then a subsequent commit will fail
1263efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
1264106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  - keep a mirrored copy of the tdb hash chain heads to allow for the
1265106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    fast hash heads scan on traverse, updating the mirrored copy in
1266106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    the transaction version of tdb_write
1267106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1268106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  - allow callers to mix transaction and non-transaction use of tdb,
1269106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    although once a transaction is started then an exclusive lock is
1270106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    gained until the transaction is committed or cancelled
1271106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1272106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  - the commit stategy involves first saving away all modified data
1273106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    into a linearised buffer in the transaction recovery area, then
1274106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    marking the transaction recovery area with a magic value to
1275106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    indicate a valid recovery record. In total 4 fsync/msync calls are
1276106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    needed per commit to prevent race conditions. It might be possible
1277106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    to reduce this to 3 or even 2 with some more work.
1278106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1279106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  - check for a valid recovery record on open of the tdb, while the
1280106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    global lock is held. Automatically recover from the transaction
1281106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    recovery area if needed, then continue with the open as
1282106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    usual. This allows for smooth crash recovery with no administrator
1283106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    intervention.
1284106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1285106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  - if TDB_NOSYNC is passed to flags in tdb_open then transactions are
1286106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    still available, but no transaction recovery area is used and no
1287106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o    fsync/msync calls are made.
1288106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1289106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1290106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1291106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostruct tdb_transaction_el {
1292106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_transaction_el *next, *prev;
1293106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t offset;
1294106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_len_t length;
1295106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	unsigned char *data;
1296106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o};
1297106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1298106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1299106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  hold the context of any current transaction
1300106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1301106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostruct tdb_transaction {
1302106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* we keep a mirrored copy of the tdb hash heads here so
1303106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   tdb_next_hash_chain() can operate efficiently */
1304106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 *hash_heads;
1305106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1306106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* the original io methods - used to do IOs to the real db */
1307106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	const struct tdb_methods *io_methods;
1308106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1309106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* the list of transaction elements. We use a doubly linked
1310106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   list with a last pointer to allow us to keep the list
1311106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   ordered, with first element at the front of the list. It
1312106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   needs to be doubly linked as the read/write traversals need
1313106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   to be backwards, while the commit needs to be forwards */
1314106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_transaction_el *elements, *elements_last;
1315106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1316106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* non-zero when an internal transaction error has
1317106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   occurred. All write operations will then fail until the
1318106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   transaction is ended */
1319106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int transaction_error;
1320106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1321106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* when inside a transaction we need to keep track of any
1322106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   nested tdb_transaction_start() calls, as these are allowed,
1323106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   but don't create a new transaction */
1324106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int nesting;
1325106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1326106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* old file size before transaction */
1327106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_len_t old_map_size;
1328106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o};
1329106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1330106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1331106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1332106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  read while in a transaction. We need to check first if the data is in our list
1333106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  of transaction elements, then if not do a real read
1334106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1335efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'ostatic int transaction_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
1336106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			    tdb_len_t len, int cv)
1337106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1338106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_transaction_el *el;
1339106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1340106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* we need to walk the list backwards to get the most recent data */
1341106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (el=tdb->transaction->elements_last;el;el=el->prev) {
1342106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_len_t partial;
1343106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1344106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (off+len <= el->offset) {
1345106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			continue;
1346106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1347106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (off >= el->offset + el->length) {
1348106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			continue;
1349106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1350106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1351106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* an overlapping read - needs to be split into up to
1352106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   2 reads and a memcpy */
1353106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (off < el->offset) {
1354106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			partial = el->offset - off;
1355106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (transaction_read(tdb, off, buf, partial, cv) != 0) {
1356106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				goto fail;
1357106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
1358106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			len -= partial;
1359106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			off += partial;
1360106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			buf = (void *)(partial + (char *)buf);
1361106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1362106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (off + len <= el->offset + el->length) {
1363106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			partial = len;
1364106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		} else {
1365106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			partial = el->offset + el->length - off;
1366106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1367106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		memcpy(buf, el->data + (off - el->offset), partial);
1368106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (cv) {
1369106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_convert(buf, len);
1370106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1371106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		len -= partial;
1372106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		off += partial;
1373106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		buf = (void *)(partial + (char *)buf);
1374efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
1375106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (len != 0 && transaction_read(tdb, off, buf, len, cv) != 0) {
1376106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
1377106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1378106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1379106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
1380106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1381106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1382106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* its not in the transaction elements - do a real read */
1383106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->transaction->io_methods->tdb_read(tdb, off, buf, len, cv);
1384106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1385106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ofail:
1386106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_read: failed at off=%d len=%d\n", off, len));
1387106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->ecode = TDB_ERR_IO;
1388106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->transaction->transaction_error = 1;
1389106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return -1;
1390106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1391106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1392106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1393106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1394106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  write while in a transaction
1395106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1396efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'ostatic int transaction_write(struct tdb_context *tdb, tdb_off_t off,
1397106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			     const void *buf, tdb_len_t len)
1398106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1399106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_transaction_el *el, *best_el=NULL;
1400106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1401106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (len == 0) {
1402106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
1403106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1404efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
1405106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* if the write is to a hash head, then update the transaction
1406106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   hash heads */
1407106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (len == sizeof(tdb_off_t) && off >= FREELIST_TOP &&
1408106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    off < FREELIST_TOP+TDB_HASHTABLE_SIZE(tdb)) {
1409106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		u32 chain = (off-FREELIST_TOP) / sizeof(tdb_off_t);
1410106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		memcpy(&tdb->transaction->hash_heads[chain], buf, len);
1411106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1412106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1413106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* first see if we can replace an existing entry */
1414106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (el=tdb->transaction->elements_last;el;el=el->prev) {
1415106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_len_t partial;
1416106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1417106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (best_el == NULL && off == el->offset+el->length) {
1418106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			best_el = el;
1419106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1420106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1421106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (off+len <= el->offset) {
1422106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			continue;
1423106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1424106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (off >= el->offset + el->length) {
1425106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			continue;
1426106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1427106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1428106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* an overlapping write - needs to be split into up to
1429106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   2 writes and a memcpy */
1430106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (off < el->offset) {
1431106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			partial = el->offset - off;
1432106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (transaction_write(tdb, off, buf, partial) != 0) {
1433106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				goto fail;
1434106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
1435106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			len -= partial;
1436106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			off += partial;
1437106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			buf = (const void *)(partial + (const char *)buf);
1438106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1439106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (off + len <= el->offset + el->length) {
1440106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			partial = len;
1441106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		} else {
1442106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			partial = el->offset + el->length - off;
1443106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1444106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		memcpy(el->data + (off - el->offset), buf, partial);
1445106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		len -= partial;
1446106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		off += partial;
1447106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		buf = (const void *)(partial + (const char *)buf);
1448efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
1449106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (len != 0 && transaction_write(tdb, off, buf, len) != 0) {
1450106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
1451106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1452106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1453106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
1454106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1455106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1456106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* see if we can append the new entry to an existing entry */
1457efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	if (best_el && best_el->offset + best_el->length == off &&
1458106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    (off+len < tdb->transaction->old_map_size ||
1459106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	     off > tdb->transaction->old_map_size)) {
1460106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		unsigned char *data = best_el->data;
1461106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		el = best_el;
1462106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		el->data = (unsigned char *)realloc(el->data,
1463106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o						    el->length + len);
1464106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (el->data == NULL) {
1465106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->ecode = TDB_ERR_OOM;
1466106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->transaction->transaction_error = 1;
1467106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			el->data = data;
1468106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
1469106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1470106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (buf) {
1471106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			memcpy(el->data + el->length, buf, len);
1472106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		} else {
1473106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			memset(el->data + el->length, TDB_PAD_BYTE, len);
1474106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1475106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		el->length += len;
1476106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
1477106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1478106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1479106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* add a new entry at the end of the list */
1480106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	el = (struct tdb_transaction_el *)malloc(sizeof(*el));
1481106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (el == NULL) {
1482106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_OOM;
1483efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		tdb->transaction->transaction_error = 1;
1484106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1485106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1486106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	el->next = NULL;
1487106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	el->prev = tdb->transaction->elements_last;
1488106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	el->offset = off;
1489106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	el->length = len;
1490106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	el->data = (unsigned char *)malloc(len);
1491106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (el->data == NULL) {
1492106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		free(el);
1493106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_OOM;
1494efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		tdb->transaction->transaction_error = 1;
1495106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1496106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1497106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (buf) {
1498106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		memcpy(el->data, buf, len);
1499106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} else {
1500106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		memset(el->data, TDB_PAD_BYTE, len);
1501106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1502106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (el->prev) {
1503106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		el->prev->next = el;
1504106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} else {
1505106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->transaction->elements = el;
1506106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1507106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->transaction->elements_last = el;
1508106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
1509106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1510106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ofail:
1511106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n", off, len));
1512106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->ecode = TDB_ERR_IO;
1513106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->transaction->transaction_error = 1;
1514106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return -1;
1515106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1516106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1517106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1518106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  accelerated hash chain head search, using the cached hash heads
1519106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1520106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic void transaction_next_hash_chain(struct tdb_context *tdb, u32 *chain)
1521106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1522106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 h = *chain;
1523106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (;h < tdb->header.hash_size;h++) {
1524106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* the +1 takes account of the freelist */
1525106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (0 != tdb->transaction->hash_heads[h+1]) {
1526106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			break;
1527106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1528106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1529106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	(*chain) = h;
1530106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1531106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1532106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1533106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  out of bounds check during a transaction
1534106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1535106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int transaction_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
1536106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1537106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (len <= tdb->map_size) {
1538106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
1539106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1540106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return TDB_ERRCODE(TDB_ERR_IO, -1);
1541106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1542106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1543106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1544106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  transaction version of tdb_expand().
1545106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1546efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'ostatic int transaction_expand_file(struct tdb_context *tdb, tdb_off_t size,
1547106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				   tdb_off_t addition)
1548106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1549106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* add a write to the transaction elements, so subsequent
1550106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   reads see the zero data */
1551106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (transaction_write(tdb, size, NULL, addition) != 0) {
1552106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1553106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1554106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1555106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
1556106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1557106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1558106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1559106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  brlock during a transaction - ignore them
1560106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1561efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'ostatic int transaction_brlock(struct tdb_context *tdb, tdb_off_t offset,
1562106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			      int rw_type, int lck_type, int probe, size_t len)
1563106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1564106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
1565106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1566106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1567106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic const struct tdb_methods transaction_methods = {
1568106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	transaction_read,
1569106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	transaction_write,
1570106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	transaction_next_hash_chain,
1571106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	transaction_oob,
1572106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	transaction_expand_file,
1573106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	transaction_brlock
1574106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o};
1575106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1576106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1577106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1578106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  start a tdb transaction. No token is returned, as only a single
1579106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  transaction is allowed to be pending per tdb_context
1580106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1581106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_transaction_start(struct tdb_context *tdb)
1582106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1583106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* some sanity checks */
1584106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) {
1585106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction on a read-only or internal db\n"));
1586106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_EINVAL;
1587106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1588106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1589106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1590106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* cope with nested tdb_transaction_start() calls */
1591106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->transaction != NULL) {
1592106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->transaction->nesting++;
1593efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_start: nesting %d\n",
1594106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 tdb->transaction->nesting));
1595106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
1596106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1597106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1598106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->num_locks != 0 || tdb->global_lock.count) {
1599106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* the caller must not have any locks when starting a
1600106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   transaction as otherwise we'll be screwed by lack
1601106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   of nested locks in posix */
1602106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction with locks held\n"));
1603106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_LOCK;
1604106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1605106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1606106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1607106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->travlocks.next != NULL) {
1608106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* you cannot use transactions inside a traverse (although you can use
1609106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   traverse inside a transaction) as otherwise you can end up with
1610106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   deadlock */
1611106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction within a traverse\n"));
1612106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_LOCK;
1613106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1614106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1615106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1616106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->transaction = (struct tdb_transaction *)
1617106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		calloc(sizeof(struct tdb_transaction), 1);
1618106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->transaction == NULL) {
1619106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_OOM;
1620106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1621106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1622106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1623106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* get the transaction write lock. This is a blocking lock. As
1624106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   discussed with Volker, there are a number of ways we could
1625106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   make this async, which we will probably do in the future */
16263eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (tdb_transaction_lock(tdb, F_WRLCK) == -1) {
1627106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		SAFE_FREE(tdb->transaction);
1628106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1629106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1630efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
1631106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* get a read lock from the freelist to the end of file. This
1632106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   is upgraded to a write lock during the commit */
1633106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_brlock(tdb, FREELIST_TOP, F_RDLCK, F_SETLKW, 0, 0) == -1) {
1634106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to get hash locks\n"));
1635106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_LOCK;
1636106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
1637106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1638106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1639106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* setup a copy of the hash table heads so the hash scan in
1640106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   traverse can be fast */
1641106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->transaction->hash_heads = (u32 *)
1642106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		calloc(tdb->header.hash_size+1, sizeof(u32));
1643106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->transaction->hash_heads == NULL) {
1644106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_OOM;
1645106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
1646106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1647106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->methods->tdb_read(tdb, FREELIST_TOP, tdb->transaction->hash_heads,
1648106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				   TDB_HASHTABLE_SIZE(tdb), 0) != 0) {
1649106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_start: failed to read hash heads\n"));
1650106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
1651106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
1652106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1653106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1654106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* make sure we know about any file expansions already done by
1655106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   anyone else */
1656106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
1657106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->transaction->old_map_size = tdb->map_size;
1658106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1659106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* finally hook the io methods, replacing them with
1660106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   transaction specific methods */
1661106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->transaction->io_methods = tdb->methods;
1662106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->methods = &transaction_methods;
1663106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1664106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* by calling this transaction write here, we ensure that we don't grow the
1665106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   transaction linked list due to hash table updates */
1666efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	if (transaction_write(tdb, FREELIST_TOP, tdb->transaction->hash_heads,
1667106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			      TDB_HASHTABLE_SIZE(tdb)) != 0) {
1668106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_start: failed to prime hash table\n"));
1669106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
16703eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		tdb->methods = tdb->transaction->io_methods;
1671106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
1672106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1673106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1674106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
1675efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
1676106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ofail:
1677106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0);
16783eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	tdb_transaction_unlock(tdb);
1679106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	SAFE_FREE(tdb->transaction->hash_heads);
1680106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	SAFE_FREE(tdb->transaction);
1681106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return -1;
1682106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1683106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1684106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1685106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1686106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  cancel the current transaction
1687106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1688106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_transaction_cancel(struct tdb_context *tdb)
1689efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o{
1690106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->transaction == NULL) {
1691106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_cancel: no transaction\n"));
1692106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1693106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1694106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1695106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->transaction->nesting != 0) {
1696106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->transaction->transaction_error = 1;
1697106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->transaction->nesting--;
1698106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
1699efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	}
1700106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1701106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->map_size = tdb->transaction->old_map_size;
1702106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1703106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* free all the transaction elements */
1704106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (tdb->transaction->elements) {
1705106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		struct tdb_transaction_el *el = tdb->transaction->elements;
1706106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->transaction->elements = el->next;
1707106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		free(el->data);
1708106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		free(el);
1709106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1710106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1711106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* remove any global lock created during the transaction */
1712106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->global_lock.count != 0) {
1713106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 4*tdb->header.hash_size);
1714106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->global_lock.count = 0;
1715106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1716106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1717106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* remove any locks created during the transaction */
1718106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->num_locks != 0) {
1719106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		int i;
1720106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		for (i=0;i<tdb->num_lockrecs;i++) {
1721106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_brlock(tdb,FREELIST_TOP+4*tdb->lockrecs[i].list,
1722106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				   F_UNLCK,F_SETLKW, 0, 1);
1723106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1724106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->num_locks = 0;
17253eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		tdb->num_lockrecs = 0;
17263eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		SAFE_FREE(tdb->lockrecs);
1727106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1728106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1729106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* restore the normal io methods */
1730106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->methods = tdb->transaction->io_methods;
1731106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1732106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0);
17333eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	tdb_transaction_unlock(tdb);
1734106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	SAFE_FREE(tdb->transaction->hash_heads);
1735106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	SAFE_FREE(tdb->transaction);
1736efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
1737106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
1738106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1739106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1740106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1741106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  sync to disk
1742106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1743106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t length)
1744efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o{
1745106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (fsync(tdb->fd) != 0) {
1746106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
1747106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction: fsync failed\n"));
1748106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1749106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1750e0ed7404719a9ddd2ba427a80db5365c8bad18c0JP Abgrall#if defined(HAVE_MSYNC) && defined(MS_SYNC)
1751106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->map_ptr) {
1752106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_off_t moffset = offset & ~(tdb->page_size-1);
1753efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		if (msync(moffset + (char *)tdb->map_ptr,
1754106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			  length + (offset - moffset), MS_SYNC) != 0) {
1755106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->ecode = TDB_ERR_IO;
1756106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction: msync failed - %s\n",
1757106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 strerror(errno)));
1758106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
1759106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1760106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1761106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
1762106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
1763106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1764106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1765106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1766106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1767106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  work out how much space the linearised recovery data will consume
1768106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1769106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic tdb_len_t tdb_recovery_size(struct tdb_context *tdb)
1770106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1771106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_transaction_el *el;
1772106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_len_t recovery_size = 0;
1773106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1774106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	recovery_size = sizeof(u32);
1775106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (el=tdb->transaction->elements;el;el=el->next) {
1776106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (el->offset >= tdb->transaction->old_map_size) {
1777106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			continue;
1778106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1779106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		recovery_size += 2*sizeof(tdb_off_t) + el->length;
1780106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1781106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1782106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return recovery_size;
1783106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1784106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1785106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1786106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  allocate the recovery area, or use an existing recovery area if it is
1787106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  large enough
1788106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1789efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'ostatic int tdb_recovery_allocate(struct tdb_context *tdb,
1790106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 tdb_len_t *recovery_size,
1791106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 tdb_off_t *recovery_offset,
1792106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 tdb_len_t *recovery_max_size)
1793106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1794106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
1795106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	const struct tdb_methods *methods = tdb->transaction->io_methods;
1796106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t recovery_head;
1797106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1798106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
1799106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to read recovery head\n"));
1800106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1801106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1802106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1803106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec.rec_len = 0;
1804106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1805efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	if (recovery_head != 0 &&
1806106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
1807106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to read recovery record\n"));
1808106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1809106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1810106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1811106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	*recovery_size = tdb_recovery_size(tdb);
1812106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1813106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (recovery_head != 0 && *recovery_size <= rec.rec_len) {
1814106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* it fits in the existing area */
1815106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		*recovery_max_size = rec.rec_len;
1816106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		*recovery_offset = recovery_head;
1817106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
1818106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1819106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1820106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* we need to free up the old recovery area, then allocate a
1821106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   new one at the end of the file. Note that we cannot use
1822106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   tdb_allocate() to allocate the new one as that might return
1823106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   us an area that is being currently used (as of the start of
1824106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   the transaction) */
1825106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (recovery_head != 0) {
1826106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_free(tdb, recovery_head, &rec) == -1) {
1827106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to free previous recovery area\n"));
1828106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
1829106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1830106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1831106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1832106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* the tdb_free() call might have increased the recovery size */
1833106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	*recovery_size = tdb_recovery_size(tdb);
1834106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1835106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* round up to a multiple of page size */
1836106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	*recovery_max_size = TDB_ALIGN(sizeof(rec) + *recovery_size, tdb->page_size) - sizeof(rec);
1837106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	*recovery_offset = tdb->map_size;
1838106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	recovery_head = *recovery_offset;
1839106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1840efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size,
1841106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				     (tdb->map_size - tdb->transaction->old_map_size) +
1842106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				     sizeof(rec) + *recovery_max_size) == -1) {
1843106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to create recovery area\n"));
1844106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1845106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1846106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1847106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* remap the file (if using mmap) */
1848106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	methods->tdb_oob(tdb, tdb->map_size + 1, 1);
1849106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1850106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* we have to reset the old map size so that we don't try to expand the file
1851106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   again in the transaction commit, which would destroy the recovery area */
1852106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->transaction->old_map_size = tdb->map_size;
1853106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1854106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* write the recovery header offset and sync - we can sync without a race here
1855106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   as the magic ptr in the recovery record has not been set */
1856106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	CONVERT(recovery_head);
1857efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	if (methods->tdb_write(tdb, TDB_RECOVERY_HEAD,
1858106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			       &recovery_head, sizeof(tdb_off_t)) == -1) {
1859106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to write recovery head\n"));
1860106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1861106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1862106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1863106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
1864106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1865106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1866106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1867106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1868106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  setup the recovery data that will be used on a crash during commit
1869106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1870efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'ostatic int transaction_setup_recovery(struct tdb_context *tdb,
1871106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				      tdb_off_t *magic_offset)
1872106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
1873106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_transaction_el *el;
1874106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_len_t recovery_size;
1875106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	unsigned char *data, *p;
1876106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	const struct tdb_methods *methods = tdb->transaction->io_methods;
1877106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct *rec;
1878106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t recovery_offset, recovery_max_size;
1879106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t old_map_size = tdb->transaction->old_map_size;
1880106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 magic, tailer;
1881106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1882106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/*
1883106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	  check that the recovery area has enough space
1884106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	*/
1885efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	if (tdb_recovery_allocate(tdb, &recovery_size,
1886106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				  &recovery_offset, &recovery_max_size) == -1) {
1887106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1888106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1889106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1890106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	data = (unsigned char *)malloc(recovery_size + sizeof(*rec));
1891106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (data == NULL) {
1892106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_OOM;
1893106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1894106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1895106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1896106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec = (struct list_struct *)data;
1897106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	memset(rec, 0, sizeof(*rec));
1898106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1899106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec->magic    = 0;
1900106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec->data_len = recovery_size;
1901106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec->rec_len  = recovery_max_size;
1902106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec->key_len  = old_map_size;
1903106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	CONVERT(rec);
1904106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1905106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* build the recovery data into a single blob to allow us to do a single
1906106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   large write, which should be more efficient */
1907106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	p = data + sizeof(*rec);
1908106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (el=tdb->transaction->elements;el;el=el->next) {
1909106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (el->offset >= old_map_size) {
1910106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			continue;
1911106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1912106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (el->offset + el->length > tdb->transaction->old_map_size) {
1913106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: transaction data over new region boundary\n"));
1914106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			free(data);
1915106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->ecode = TDB_ERR_CORRUPT;
1916106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
1917106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1918106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		memcpy(p, &el->offset, 4);
1919106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		memcpy(p+4, &el->length, 4);
1920106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (DOCONV()) {
1921106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_convert(p, 8);
1922106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1923106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* the recovery area contains the old data, not the
1924106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   new data, so we have to call the original tdb_read
1925106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   method to get it */
1926106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (methods->tdb_read(tdb, el->offset, p + 8, el->length, 0) != 0) {
1927106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			free(data);
1928106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->ecode = TDB_ERR_IO;
1929106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
1930106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
1931106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		p += 8 + el->length;
1932106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1933106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1934106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* and the tailer */
1935106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tailer = sizeof(*rec) + recovery_max_size;
1936106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	memcpy(p, &tailer, 4);
1937106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	CONVERT(p);
1938106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1939106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* write the recovery data to the recovery area */
1940106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (methods->tdb_write(tdb, recovery_offset, data, sizeof(*rec) + recovery_size) == -1) {
1941106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: failed to write recovery data\n"));
1942106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		free(data);
1943106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
1944106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1945106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1946106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1947106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* as we don't have ordered writes, we have to sync the recovery
1948106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   data before we update the magic to indicate that the recovery
1949106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   data is present */
1950106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (transaction_sync(tdb, recovery_offset, sizeof(*rec) + recovery_size) == -1) {
1951106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		free(data);
1952106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1953106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1954106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1955106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	free(data);
1956106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1957106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	magic = TDB_RECOVERY_MAGIC;
1958106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	CONVERT(magic);
1959106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1960106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	*magic_offset = recovery_offset + offsetof(struct list_struct, magic);
1961106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1962106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (methods->tdb_write(tdb, *magic_offset, &magic, sizeof(magic)) == -1) {
1963106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: failed to write recovery magic\n"));
1964106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
1965106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1966106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1967106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1968106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* ensure the recovery magic marker is on disk */
1969106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (transaction_sync(tdb, *magic_offset, sizeof(magic)) == -1) {
1970106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1971106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1972106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1973106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
1974106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
1975106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1976106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
1977106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  commit the current transaction
1978106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
1979106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_transaction_commit(struct tdb_context *tdb)
1980efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o{
1981106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	const struct tdb_methods *methods;
1982106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t magic_offset = 0;
1983106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 zero = 0;
1984106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1985106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->transaction == NULL) {
1986106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: no transaction\n"));
1987106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1988106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1989106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1990106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->transaction->transaction_error) {
1991106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
1992106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_transaction_cancel(tdb);
1993106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: transaction error pending\n"));
1994106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
1995106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
1996106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
1997106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->transaction->nesting != 0) {
1998106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->transaction->nesting--;
1999106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
2000efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	}
2001106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2002106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* check for a null transaction */
2003106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->transaction->elements == NULL) {
2004106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_transaction_cancel(tdb);
2005106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
2006106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2007106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2008106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	methods = tdb->transaction->io_methods;
2009efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2010106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* if there are any locks pending then the caller has not
2011106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   nested their locks properly, so fail the transaction */
2012106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->num_locks || tdb->global_lock.count) {
2013106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_LOCK;
2014106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: locks pending on commit\n"));
2015106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_transaction_cancel(tdb);
2016106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2017106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2018106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2019106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* upgrade the main transaction lock region to a write lock */
2020106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_brlock_upgrade(tdb, FREELIST_TOP, 0) == -1) {
2021106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to upgrade hash locks\n"));
2022106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_LOCK;
2023106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_transaction_cancel(tdb);
2024106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2025106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2026106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2027106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* get the global lock - this prevents new users attaching to the database
2028106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   during the commit */
2029106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) {
2030106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: failed to get global lock\n"));
2031106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_LOCK;
2032106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_transaction_cancel(tdb);
2033106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2034106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2035106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2036106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(tdb->flags & TDB_NOSYNC)) {
2037106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* write the recovery data to the end of the file */
2038106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (transaction_setup_recovery(tdb, &magic_offset) == -1) {
2039106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: failed to setup recovery data\n"));
2040106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
2041106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_transaction_cancel(tdb);
2042106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
2043106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2044106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2045106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2046106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* expand the file to the new size if needed */
2047106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->map_size != tdb->transaction->old_map_size) {
2048efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size,
2049efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o					     tdb->map_size -
2050106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o					     tdb->transaction->old_map_size) == -1) {
2051106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->ecode = TDB_ERR_IO;
2052106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: expansion failed\n"));
2053106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
2054106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_transaction_cancel(tdb);
2055106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
2056106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2057106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->map_size = tdb->transaction->old_map_size;
2058106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		methods->tdb_oob(tdb, tdb->map_size + 1, 1);
2059106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2060106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2061106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* perform all the writes */
2062106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (tdb->transaction->elements) {
2063106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		struct tdb_transaction_el *el = tdb->transaction->elements;
2064106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2065106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (methods->tdb_write(tdb, el->offset, el->data, el->length) == -1) {
2066106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed during commit\n"));
2067efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2068106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* we've overwritten part of the data and
2069106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   possibly expanded the file, so we need to
2070106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   run the crash recovery code */
2071106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->methods = methods;
2072efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o			tdb_transaction_recover(tdb);
2073106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2074106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_transaction_cancel(tdb);
2075106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
2076106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2077106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed\n"));
2078106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
2079106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2080106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->transaction->elements = el->next;
2081efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		free(el->data);
2082106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		free(el);
2083efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	}
2084106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2085106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(tdb->flags & TDB_NOSYNC)) {
2086106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* ensure the new data is on disk */
2087106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (transaction_sync(tdb, 0, tdb->map_size) == -1) {
2088106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
2089106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2090106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2091106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* remove the recovery marker */
2092106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (methods->tdb_write(tdb, magic_offset, &zero, 4) == -1) {
2093106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: failed to remove recovery magic\n"));
2094106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
2095106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2096106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2097106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* ensure the recovery marker has been removed on disk */
2098106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (transaction_sync(tdb, magic_offset, 4) == -1) {
2099106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
2100106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2101106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2102106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2103106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
2104106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2105106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/*
2106106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	  TODO: maybe write to some dummy hdr field, or write to magic
2107106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	  offset without mmap, before the last sync, instead of the
2108106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	  utime() call
2109106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	*/
2110106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2111106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* on some systems (like Linux 2.6.x) changes via mmap/msync
2112106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   don't change the mtime of the file, this means the file may
2113106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   not be backed up (as tdb rounding to block sizes means that
2114106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   file size changes are quite rare too). The following forces
2115106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   mtime changes when a transaction completes */
2116106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#ifdef HAVE_UTIME
2117106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	utime(tdb->name, NULL);
2118106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o#endif
2119106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2120106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* use a transaction cancel to free memory and remove the
2121106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   transaction locks */
2122106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_transaction_cancel(tdb);
2123106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
2124106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2125106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2126106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2127106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
2128106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  recover from an aborted transaction. Must be called with exclusive
2129106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  database write access already established (including the global
2130106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  lock to prevent new processes attaching)
2131106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
2132106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_transaction_recover(struct tdb_context *tdb)
2133106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2134106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t recovery_head, recovery_eof;
2135106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	unsigned char *data, *p;
2136106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 zero = 0;
2137106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
2138106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2139106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* find the recovery area */
2140106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
2141106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery head\n"));
2142106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
2143106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2144106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2145106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2146106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (recovery_head == 0) {
2147106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* we have never allocated a recovery record */
2148106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
2149106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2150106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2151106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* read the recovery record */
2152efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	if (tdb->methods->tdb_read(tdb, recovery_head, &rec,
2153106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				   sizeof(rec), DOCONV()) == -1) {
2154efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery record\n"));
2155106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
2156106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2157106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2158106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2159106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (rec.magic != TDB_RECOVERY_MAGIC) {
2160106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* there is no valid recovery data */
2161106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
2162106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2163106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2164106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->read_only) {
2165106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: attempt to recover read only database\n"));
2166106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_CORRUPT;
2167106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2168106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2169106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2170106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	recovery_eof = rec.key_len;
2171106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2172106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	data = (unsigned char *)malloc(rec.data_len);
2173106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (data == NULL) {
2174efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to allocate recovery data\n"));
2175106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_OOM;
2176106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2177106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2178106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2179106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* read the full recovery data */
2180106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->methods->tdb_read(tdb, recovery_head + sizeof(rec), data,
2181106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				   rec.data_len, 0) == -1) {
2182efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery data\n"));
2183106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
2184106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2185106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2186106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2187106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* recover the file data */
2188106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	p = data;
2189106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (p+8 < data + rec.data_len) {
2190106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		u32 ofs, len;
2191106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (DOCONV()) {
2192106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_convert(p, 8);
2193106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2194106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		memcpy(&ofs, p, 4);
2195106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		memcpy(&len, p+4, 4);
2196106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2197106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->methods->tdb_write(tdb, ofs, p+8, len) == -1) {
2198106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			free(data);
2199106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to recover %d bytes at offset %d\n", len, ofs));
2200106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->ecode = TDB_ERR_IO;
2201106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
2202106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2203106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		p += 8 + len;
2204106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2205106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2206106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	free(data);
2207106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2208106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (transaction_sync(tdb, 0, tdb->map_size) == -1) {
2209106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to sync recovery\n"));
2210106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
2211106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2212106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2213106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2214106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* if the recovery area is after the recovered eof then remove it */
2215106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (recovery_eof <= recovery_head) {
2216106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_ofs_write(tdb, TDB_RECOVERY_HEAD, &zero) == -1) {
2217106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to remove recovery head\n"));
2218106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->ecode = TDB_ERR_IO;
2219efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o			return -1;
2220106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2221106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2222106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2223106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* remove the recovery magic */
2224efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	if (tdb_ofs_write(tdb, recovery_head + offsetof(struct list_struct, magic),
2225106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			  &zero) == -1) {
2226106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to remove recovery magic\n"));
2227106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
2228efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		return -1;
2229106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2230efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2231106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* reduce the file size to the old size */
2232106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_munmap(tdb);
2233106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (ftruncate(tdb->fd, recovery_eof) != 0) {
2234106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to reduce to recovery size\n"));
2235106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
2236efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		return -1;
2237106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2238106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->map_size = recovery_eof;
2239106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_mmap(tdb);
2240106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2241106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (transaction_sync(tdb, 0, recovery_eof) == -1) {
2242106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to sync2 recovery\n"));
2243106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_IO;
2244106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2245106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2246106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2247efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_recover: recovered %d byte database\n",
2248106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 recovery_eof));
2249106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2250106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* all done */
2251106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
2252106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2253106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2254106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* file: freelist.c */
2255106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2256106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* read a freelist record and check for simple errors */
22573eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'ostatic int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct list_struct *rec)
2258106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2259106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->methods->tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1)
2260106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2261106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2262106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (rec->magic == TDB_MAGIC) {
2263106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* this happens when a app is showdown while deleting a record - we should
2264106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   not completely fail when this happens */
2265efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read non-free magic 0x%x at offset=%d - fixing\n",
2266106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 rec->magic, off));
2267106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rec->magic = TDB_FREE_MAGIC;
2268106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->methods->tdb_write(tdb, off, rec, sizeof(*rec)) == -1)
2269106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
2270106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2271106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2272106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (rec->magic != TDB_FREE_MAGIC) {
2273106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Ensure ecode is set for log fn. */
2274106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_CORRUPT;
2275efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read bad magic 0x%x at offset=%d\n",
2276106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   rec->magic, off));
2277106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
2278106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2279106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0) != 0)
2280106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2281106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
2282106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2283106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2284106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2285106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2286106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* Remove an element from the freelist.  Must have alloc lock. */
2287106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int remove_from_freelist(struct tdb_context *tdb, tdb_off_t off, tdb_off_t next)
2288106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2289106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t last_ptr, i;
2290106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2291106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* read in the freelist top */
2292106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	last_ptr = FREELIST_TOP;
2293106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (tdb_ofs_read(tdb, last_ptr, &i) != -1 && i != 0) {
2294106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (i == off) {
2295106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* We've found it! */
2296106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return tdb_ofs_write(tdb, last_ptr, &next);
2297106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2298106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Follow chain (next offset is at start of record) */
2299106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		last_ptr = i;
2300106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2301106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	TDB_LOG((tdb, TDB_DEBUG_FATAL,"remove_from_freelist: not on list at off=%d\n", off));
2302106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
2303106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2304106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2305106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2306106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* update a record tailer (must hold allocation lock) */
2307106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int update_tailer(struct tdb_context *tdb, tdb_off_t offset,
2308106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 const struct list_struct *rec)
2309106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2310106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t totalsize;
2311106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2312106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Offset of tailer from record header */
2313106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	totalsize = sizeof(*rec) + rec->rec_len;
2314106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb_ofs_write(tdb, offset + totalsize - sizeof(tdb_off_t),
2315106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 &totalsize);
2316106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2317106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2318106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* Add an element into the freelist. Merge adjacent records if
2319106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   neccessary. */
2320106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
2321106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2322106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t right, left;
2323106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2324106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Allocation and tailer lock */
2325106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_lock(tdb, -1, F_WRLCK) != 0)
2326106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2327106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2328106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* set an initial tailer, so if we fail we don't leave a bogus record */
2329106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (update_tailer(tdb, offset, rec) != 0) {
2330106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed!\n"));
2331106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
2332106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2333106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2334106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Look right first (I'm an Australian, dammit) */
2335106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	right = offset + sizeof(*rec) + rec->rec_len;
2336106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (right + sizeof(*rec) <= tdb->map_size) {
2337106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		struct list_struct r;
2338106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2339106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->methods->tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) {
2340106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: right read failed at %u\n", right));
2341106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto left;
2342106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2343106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2344106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* If it's free, expand to include it. */
2345106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (r.magic == TDB_FREE_MAGIC) {
2346106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (remove_from_freelist(tdb, right, r.next) == -1) {
2347106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: right free failed at %u\n", right));
2348106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				goto left;
2349106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
2350106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			rec->rec_len += sizeof(r) + r.rec_len;
2351106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2352106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2353106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2354106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oleft:
2355106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Look left */
2356106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	left = offset - sizeof(tdb_off_t);
2357106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (left > TDB_DATA_START(tdb->header.hash_size)) {
2358106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		struct list_struct l;
2359106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_off_t leftsize;
2360efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2361106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Read in tailer and jump back to header */
2362106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_ofs_read(tdb, left, &leftsize) == -1) {
2363106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left offset read failed at %u\n", left));
2364106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto update;
2365106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2366106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2367106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* it could be uninitialised data */
2368106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (leftsize == 0 || leftsize == TDB_PAD_U32) {
2369106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto update;
2370106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2371106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2372106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		left = offset - leftsize;
2373106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2374106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Now read in record */
2375106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->methods->tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) {
2376106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left read failed at %u (%u)\n", left, leftsize));
2377106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto update;
2378106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2379106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2380106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* If it's free, expand to include it. */
2381106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (l.magic == TDB_FREE_MAGIC) {
2382106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (remove_from_freelist(tdb, left, l.next) == -1) {
2383106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left free failed at %u\n", left));
2384106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				goto update;
2385106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			} else {
2386106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				offset = left;
2387106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				rec->rec_len += leftsize;
2388106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
2389106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2390106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2391106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2392106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oupdate:
2393106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (update_tailer(tdb, offset, rec) == -1) {
2394106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset));
2395106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
2396106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2397106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2398106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Now, prepend to free list */
2399106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec->magic = TDB_FREE_MAGIC;
2400106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2401106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 ||
2402106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    tdb_rec_write(tdb, offset, rec) == -1 ||
2403106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
2404106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free record write failed at offset=%d\n", offset));
2405106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
2406106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2407106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2408106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* And we're done. */
2409106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_unlock(tdb, -1, F_WRLCK);
2410106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
2411106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2412106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o fail:
2413106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_unlock(tdb, -1, F_WRLCK);
2414106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return -1;
2415106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2416106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2417106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2418efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o/*
2419106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   the core of tdb_allocate - called when we have decided which
2420106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   free list entry to use
2421106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o */
2422106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb, tdb_len_t length, tdb_off_t rec_ptr,
2423106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				struct list_struct *rec, tdb_off_t last_ptr)
2424106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2425106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct newrec;
2426106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t newrec_ptr;
2427106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2428106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	memset(&newrec, '\0', sizeof(newrec));
2429106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2430106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* found it - now possibly split it up  */
2431106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (rec->rec_len > length + MIN_REC_SIZE) {
2432106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Length of left piece */
2433106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		length = TDB_ALIGN(length, TDB_ALIGNMENT);
2434efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2435106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Right piece to go on free list */
2436106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		newrec.rec_len = rec->rec_len - (sizeof(*rec) + length);
2437106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		newrec_ptr = rec_ptr + sizeof(*rec) + length;
2438efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2439106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* And left record is shortened */
2440106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rec->rec_len = length;
2441106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} else {
2442106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		newrec_ptr = 0;
2443106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2444efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2445106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Remove allocated record from the free list */
2446106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1) {
2447106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
2448106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2449efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2450106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Update header: do this before we drop alloc
2451106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   lock, otherwise tdb_free() might try to
2452106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   merge with us, thinking we're free.
2453106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   (Thanks Jeremy Allison). */
2454106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec->magic = TDB_MAGIC;
2455106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_rec_write(tdb, rec_ptr, rec) == -1) {
2456106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
2457106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2458efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2459106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Did we create new block? */
2460106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (newrec_ptr) {
2461106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Update allocated record tailer (we
2462106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   shortened it). */
2463106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (update_tailer(tdb, rec_ptr, rec) == -1) {
2464106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return 0;
2465106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2466efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2467106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Free new record */
2468106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_free(tdb, newrec_ptr, &newrec) == -1) {
2469106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return 0;
2470106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2471106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2472efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2473106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* all done - return the new record offset */
2474106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return rec_ptr;
2475106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2476106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2477106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* allocate some space from the free list. The offset returned points
2478106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   to a unconnected list_struct within the database with room for at
2479106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   least length bytes of total data
2480106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2481106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   0 is returned if the space could not be allocated
2482106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o */
2483106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'otdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_struct *rec)
2484106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2485106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t rec_ptr, last_ptr, newrec_ptr;
2486106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct {
2487106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_off_t rec_ptr, last_ptr;
2488106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_len_t rec_len;
2489106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} bestfit;
2490106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2491106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_lock(tdb, -1, F_WRLCK) == -1)
2492106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
2493106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2494106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Extra bytes required for tailer */
2495106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	length += sizeof(tdb_off_t);
2496106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2497106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o again:
2498106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	last_ptr = FREELIST_TOP;
2499106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2500106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* read in the freelist top */
2501106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)
2502106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
2503106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2504106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	bestfit.rec_ptr = 0;
2505106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	bestfit.last_ptr = 0;
2506106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	bestfit.rec_len = 0;
2507106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2508efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	/*
2509106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   this is a best fit allocation strategy. Originally we used
2510106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   a first fit strategy, but it suffered from massive fragmentation
2511106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   issues when faced with a slowly increasing record size.
2512106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 */
2513106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (rec_ptr) {
25143eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		if (tdb_rec_free_read(tdb, rec_ptr, rec) == -1) {
2515106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
2516106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2517106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2518106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (rec->rec_len >= length) {
2519106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (bestfit.rec_ptr == 0 ||
2520106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			    rec->rec_len < bestfit.rec_len) {
2521106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				bestfit.rec_len = rec->rec_len;
2522106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				bestfit.rec_ptr = rec_ptr;
2523106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				bestfit.last_ptr = last_ptr;
2524106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				/* consider a fit to be good enough if
2525106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				   we aren't wasting more than half
2526106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				   the space */
2527106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				if (bestfit.rec_len < 2*length) {
2528106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o					break;
2529106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				}
2530106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
2531106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2532106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2533106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* move to the next record */
2534106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		last_ptr = rec_ptr;
2535106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rec_ptr = rec->next;
2536106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2537106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2538106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (bestfit.rec_ptr != 0) {
25393eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		if (tdb_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
2540106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
2541106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2542106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2543106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr);
2544106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_unlock(tdb, -1, F_WRLCK);
2545106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return newrec_ptr;
2546106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2547106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2548106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* we didn't find enough space. See if we can expand the
2549106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   database and if we can then try again */
2550106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_expand(tdb, length + sizeof(*rec)) == 0)
2551106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto again;
2552106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o fail:
2553106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_unlock(tdb, -1, F_WRLCK);
2554106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
2555106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2556106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2557106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* file: freelistcheck.c */
2558106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2559106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* Check the freelist is good and contains no loops.
2560106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   Very memory intensive - only do this as a consistency
2561106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   checker. Heh heh - uses an in memory tdb as the storage
2562106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   for the "seen" record list. For some reason this strikes
2563106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   me as extremely clever as I don't have to write another tree
2564106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   data structure implementation :-).
2565106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o */
2566106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2567106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int seen_insert(struct tdb_context *mem_tdb, tdb_off_t rec_ptr)
2568106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2569106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	TDB_DATA key, data;
2570106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2571106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	memset(&data, '\0', sizeof(data));
2572106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	key.dptr = (unsigned char *)&rec_ptr;
2573106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	key.dsize = sizeof(rec_ptr);
2574106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb_store(mem_tdb, key, data, TDB_INSERT);
2575106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2576106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2577106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_validate_freelist(struct tdb_context *tdb, int *pnum_entries)
2578106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2579106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_context *mem_tdb = NULL;
2580106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
2581106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t rec_ptr, last_ptr;
2582106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int ret = -1;
2583106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2584106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	*pnum_entries = 0;
2585106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2586106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	mem_tdb = tdb_open("flval", tdb->header.hash_size,
2587106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				TDB_INTERNAL, O_RDWR, 0600);
2588106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!mem_tdb) {
2589106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2590106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2591106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2592106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
2593106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_close(mem_tdb);
2594106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
2595106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2596106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2597106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	last_ptr = FREELIST_TOP;
2598106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2599106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Store the FREELIST_TOP record. */
2600106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (seen_insert(mem_tdb, last_ptr) == -1) {
2601106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		ret = TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
2602106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
2603106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2604106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2605106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* read in the freelist top */
2606106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1) {
2607106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
2608106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2609106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2610106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (rec_ptr) {
2611106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2612106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* If we can't store this record (we've seen it
2613106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   before) then the free list has a loop and must
2614106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		   be corrupt. */
2615106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2616106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (seen_insert(mem_tdb, rec_ptr)) {
2617106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			ret = TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
2618106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
2619106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2620106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
26213eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		if (tdb_rec_free_read(tdb, rec_ptr, &rec) == -1) {
2622106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
2623106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2624106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2625106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* move to the next record */
2626106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		last_ptr = rec_ptr;
2627106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rec_ptr = rec.next;
2628106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		*pnum_entries += 1;
2629106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2630106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2631106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	ret = 0;
2632106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2633106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  fail:
2634106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2635106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_close(mem_tdb);
2636106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_unlock(tdb, -1, F_WRLCK);
2637106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return ret;
2638106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2639106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2640106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* file: traverse.c */
2641106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2642106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* Uses traverse lock: 0 = finish, -1 = error, other = record offset */
2643106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tlock,
2644106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 struct list_struct *rec)
2645106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2646106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int want_next = (tlock->off != 0);
2647106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2648106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Lock each chain from the start one. */
2649106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (; tlock->hash < tdb->header.hash_size; tlock->hash++) {
2650106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (!tlock->off && tlock->hash != 0) {
2651106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* this is an optimisation for the common case where
2652106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   the hash chain is empty, which is particularly
2653106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   common for the use of tdb with ldb, where large
2654106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   hashes are used. In that case we spend most of our
2655106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   time in tdb_brlock(), locking empty hash chains.
2656efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2657106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   To avoid this, we do an unlocked pre-check to see
2658106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   if the hash chain is empty before starting to look
2659106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   inside it. If it is empty then we can avoid that
2660106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   hash chain. If it isn't empty then we can't believe
2661106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   the value we get back, as we read it without a
2662106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   lock, so instead we get the lock and re-fetch the
2663106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   value below.
2664efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2665106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   Notice that not doing this optimisation on the
2666106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   first hash chain is critical. We must guarantee
2667106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   that we have done at least one fcntl lock at the
2668106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   start of a search to guarantee that memory is
2669106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   coherent on SMP systems. If records are added by
2670106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   others during the search then thats OK, and we
2671106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   could possibly miss those with this trick, but we
2672106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   could miss them anyway without this trick, so the
2673106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   semantics don't change.
2674efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
2675106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   With a non-indexed ldb search this trick gains us a
2676106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   factor of around 80 in speed on a linux 2.6.x
2677106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   system (testing using ldbtest).
2678106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			*/
2679106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->methods->next_hash_chain(tdb, &tlock->hash);
2680106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (tlock->hash == tdb->header.hash_size) {
2681106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				continue;
2682106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
2683106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2684106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2685106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_lock(tdb, tlock->hash, tlock->lock_rw) == -1)
2686106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
2687106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2688106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* No previous record?  Start at top of chain. */
2689106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (!tlock->off) {
2690106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (tdb_ofs_read(tdb, TDB_HASH_TOP(tlock->hash),
2691106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				     &tlock->off) == -1)
2692106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				goto fail;
2693106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		} else {
2694106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* Otherwise unlock the previous record. */
2695106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (tdb_unlock_record(tdb, tlock->off) != 0)
2696106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				goto fail;
2697106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2698106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2699106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (want_next) {
2700106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* We have offset of old record: grab next */
2701106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (tdb_rec_read(tdb, tlock->off, rec) == -1)
2702106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				goto fail;
2703106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tlock->off = rec->next;
2704106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2705106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2706106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Iterate through chain */
2707106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		while( tlock->off) {
2708106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_off_t current;
2709106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (tdb_rec_read(tdb, tlock->off, rec) == -1)
2710106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				goto fail;
2711106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2712106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* Detect infinite loops. From "Shlomi Yaakobovich" <Shlomi@exanet.com>. */
2713106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (tlock->off == rec->next) {
2714106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: loop detected.\n"));
2715106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				goto fail;
2716106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
2717106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2718106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (!TDB_DEAD(rec)) {
2719106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				/* Woohoo: we found one! */
2720106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				if (tdb_lock_record(tdb, tlock->off) != 0)
2721106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o					goto fail;
2722106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				return tlock->off;
2723106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
2724106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2725106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* Try to clean dead ones from old traverses */
2726106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			current = tlock->off;
2727106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tlock->off = rec->next;
2728efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o			if (!(tdb->read_only || tdb->traverse_read) &&
2729106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			    tdb_do_delete(tdb, current, rec) != 0)
2730106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				goto fail;
2731106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2732106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_unlock(tdb, tlock->hash, tlock->lock_rw);
2733106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		want_next = 0;
2734106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2735106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* We finished iteration without finding anything */
2736106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return TDB_ERRCODE(TDB_SUCCESS, 0);
2737106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2738106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o fail:
2739106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tlock->off = 0;
2740106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_unlock(tdb, tlock->hash, tlock->lock_rw) != 0)
2741106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: On error unlock failed!\n"));
2742106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return -1;
2743106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2744106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2745106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* traverse the entire database - calling fn(tdb, key, data) on each element.
2746106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   return -1 on error or the record count traversed
2747106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   if fn is NULL then it is not called
2748106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   a non-zero return value from fn() indicates that the traversal should stop
2749106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  */
2750efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'ostatic int tdb_traverse_internal(struct tdb_context *tdb,
2751106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 tdb_traverse_func fn, void *private_data,
2752106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 struct tdb_traverse_lock *tl)
2753106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2754106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	TDB_DATA key, dbuf;
2755106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
2756106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int ret, count = 0;
2757106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2758106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* This was in the initializaton, above, but the IRIX compiler
2759106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * did not like it.  crh
2760106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 */
2761106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tl->next = tdb->travlocks.next;
2762106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2763106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* fcntl locks don't stack: beware traverse inside traverse */
2764106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->travlocks.next = tl;
2765106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2766106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* tdb_next_lock places locks on the record returned, and its chain */
2767106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while ((ret = tdb_next_lock(tdb, tl, &rec)) > 0) {
2768106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		count++;
2769106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* now read the full record */
2770efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		key.dptr = tdb_alloc_read(tdb, tl->off + sizeof(rec),
2771106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o					  rec.key_len + rec.data_len);
2772106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (!key.dptr) {
2773106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			ret = -1;
2774106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0)
2775106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				goto out;
2776106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (tdb_unlock_record(tdb, tl->off) != 0)
2777106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_traverse: key.dptr == NULL and unlock_record failed!\n"));
2778106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto out;
2779106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2780106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		key.dsize = rec.key_len;
2781106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		dbuf.dptr = key.dptr + rec.key_len;
2782106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		dbuf.dsize = rec.data_len;
2783106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2784106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Drop chain lock, call out */
2785106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0) {
2786106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			ret = -1;
2787106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			SAFE_FREE(key.dptr);
2788106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto out;
2789106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2790106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (fn && fn(tdb, key, dbuf, private_data)) {
2791106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* They want us to terminate traversal */
2792106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			ret = count;
2793106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (tdb_unlock_record(tdb, tl->off) != 0) {
2794106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_traverse: unlock_record failed!\n"));;
2795106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				ret = -1;
2796106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
2797106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			SAFE_FREE(key.dptr);
2798106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto out;
2799106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2800106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		SAFE_FREE(key.dptr);
2801106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2802106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oout:
2803106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->travlocks.next = tl->next;
2804106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (ret < 0)
2805106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2806106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	else
2807106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return count;
2808106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2809106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2810106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2811106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
2812106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  a write style traverse - temporarily marks the db read only
2813106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
2814efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'oint tdb_traverse_read(struct tdb_context *tdb,
2815106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		      tdb_traverse_func fn, void *private_data)
2816106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2817106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_traverse_lock tl = { NULL, 0, 0, F_RDLCK };
2818106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int ret;
28193eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
2820106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* we need to get a read lock on the transaction lock here to
2821106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   cope with the lock ordering semantics of solaris10 */
28223eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (tdb_transaction_lock(tdb, F_RDLCK)) {
2823106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2824106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2825106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2826106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->traverse_read++;
2827106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
2828106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->traverse_read--;
2829106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
28303eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	tdb_transaction_unlock(tdb);
2831106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2832106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return ret;
2833106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2834106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2835106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
2836106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  a write style traverse - needs to get the transaction lock to
2837106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  prevent deadlocks
2838106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
2839efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'oint tdb_traverse(struct tdb_context *tdb,
2840106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 tdb_traverse_func fn, void *private_data)
2841106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2842106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_traverse_lock tl = { NULL, 0, 0, F_WRLCK };
2843106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int ret;
2844106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2845106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->read_only || tdb->traverse_read) {
2846106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return tdb_traverse_read(tdb, fn, private_data);
2847106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2848efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
28493eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (tdb_transaction_lock(tdb, F_WRLCK)) {
2850106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2851106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2852106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2853106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
2854106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
28553eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	tdb_transaction_unlock(tdb);
2856106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2857106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return ret;
2858106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2859106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2860106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2861106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* find the first entry in the database and return its key */
2862106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oTDB_DATA tdb_firstkey(struct tdb_context *tdb)
2863106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2864106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	TDB_DATA key;
2865106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
2866106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2867106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* release any old lock */
2868106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_unlock_record(tdb, tdb->travlocks.off) != 0)
2869106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return tdb_null;
2870106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->travlocks.off = tdb->travlocks.hash = 0;
2871106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->travlocks.lock_rw = F_RDLCK;
2872106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
28733eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	/* Grab first record: locks chain and returned record. */
2874106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_next_lock(tdb, &tdb->travlocks, &rec) <= 0)
2875106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return tdb_null;
2876106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* now read the key */
2877106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	key.dsize = rec.key_len;
2878106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	key.dptr =tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),key.dsize);
28793eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
28803eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	/* Unlock the hash chain of the record we just read. */
28813eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0)
2882106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_firstkey: error occurred while tdb_unlocking!\n"));
2883106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return key;
2884106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2885106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2886106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* find the next entry in the database, returning its key */
2887106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oTDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
2888106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2889106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 oldhash;
2890106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	TDB_DATA key = tdb_null;
2891106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
2892106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	unsigned char *k = NULL;
2893106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2894106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Is locked key the old key?  If so, traverse will be reliable. */
2895106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->travlocks.off) {
28963eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		if (tdb_lock(tdb,tdb->travlocks.hash,tdb->travlocks.lock_rw))
2897106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return tdb_null;
2898106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_rec_read(tdb, tdb->travlocks.off, &rec) == -1
2899106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		    || !(k = tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),
2900106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o					    rec.key_len))
2901106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		    || memcmp(k, oldkey.dptr, oldkey.dsize) != 0) {
2902106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* No, it wasn't: unlock it and start from scratch */
2903106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (tdb_unlock_record(tdb, tdb->travlocks.off) != 0) {
2904106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				SAFE_FREE(k);
2905106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				return tdb_null;
2906106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
29073eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o			if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0) {
2908106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				SAFE_FREE(k);
2909106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				return tdb_null;
2910106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
2911106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->travlocks.off = 0;
2912106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2913106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2914106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		SAFE_FREE(k);
2915106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2916106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2917106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!tdb->travlocks.off) {
2918106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* No previous element: do normal find, and lock record */
29193eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb->hash_fn(&oldkey), tdb->travlocks.lock_rw, &rec);
2920106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (!tdb->travlocks.off)
2921106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return tdb_null;
2922106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->travlocks.hash = BUCKET(rec.full_hash);
2923106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_lock_record(tdb, tdb->travlocks.off) != 0) {
2924106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: lock_record failed (%s)!\n", strerror(errno)));
2925106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return tdb_null;
2926106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
2927106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2928106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	oldhash = tdb->travlocks.hash;
2929106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2930106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Grab next record: locks chain and returned record,
2931106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   unlocks old record */
2932106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_next_lock(tdb, &tdb->travlocks, &rec) > 0) {
2933106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		key.dsize = rec.key_len;
2934106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		key.dptr = tdb_alloc_read(tdb, tdb->travlocks.off+sizeof(rec),
2935106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o					  key.dsize);
2936106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Unlock the chain of this new record */
29373eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0)
2938106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
2939106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2940106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Unlock the chain of old record */
29413eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (tdb_unlock(tdb, BUCKET(oldhash), tdb->travlocks.lock_rw) != 0)
2942106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
2943106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return key;
2944106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2945106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2946106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* file: dump.c */
2947106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
29483eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'ostatic tdb_off_t tdb_dump_record(struct tdb_context *tdb, int hash,
29493eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o				 tdb_off_t offset)
2950106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2951106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
2952106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t tailer_ofs, tailer;
2953106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2954efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	if (tdb->methods->tdb_read(tdb, offset, (char *)&rec,
2955106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				   sizeof(rec), DOCONV()) == -1) {
2956106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		printf("ERROR: failed to read record at %u\n", offset);
2957106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
2958106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2959106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
29603eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	printf(" rec: hash=%d offset=0x%08x next=0x%08x rec_len=%d "
29613eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	       "key_len=%d data_len=%d full_hash=0x%x magic=0x%x\n",
29623eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	       hash, offset, rec.next, rec.rec_len, rec.key_len, rec.data_len,
29633eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	       rec.full_hash, rec.magic);
2964106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2965106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tailer_ofs = offset + sizeof(rec) + rec.rec_len - sizeof(tdb_off_t);
2966106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2967106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, tailer_ofs, &tailer) == -1) {
2968106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		printf("ERROR: failed to read tailer at %u\n", tailer_ofs);
2969106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return rec.next;
2970106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2971106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2972106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tailer != rec.rec_len + sizeof(rec)) {
2973106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		printf("ERROR: tailer does not match record! tailer=%u totalsize=%u\n",
2974106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				(unsigned int)tailer, (unsigned int)(rec.rec_len + sizeof(rec)));
2975106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2976106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return rec.next;
2977106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
2978106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2979106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_dump_chain(struct tdb_context *tdb, int i)
2980106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
2981106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t rec_ptr, top;
2982106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2983106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	top = TDB_HASH_TOP(i);
2984106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2985106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_lock(tdb, i, F_WRLCK) != 0)
2986106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
2987106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2988106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, top, &rec_ptr) == -1)
2989106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return tdb_unlock(tdb, i, F_WRLCK);
2990106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2991106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (rec_ptr)
2992106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		printf("hash=%d\n", i);
2993106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2994106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (rec_ptr) {
29953eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		rec_ptr = tdb_dump_record(tdb, i, rec_ptr);
2996106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
2997106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
2998106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb_unlock(tdb, i, F_WRLCK);
2999106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3000106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3001106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ovoid tdb_dump_all(struct tdb_context *tdb)
3002106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3003106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int i;
3004106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (i=0;i<tdb->header.hash_size;i++) {
3005106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_dump_chain(tdb, i);
3006106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3007106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	printf("freelist:\n");
3008106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_dump_chain(tdb, -1);
3009106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3010106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3011106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_printfreelist(struct tdb_context *tdb)
3012106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3013106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int ret;
3014106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	long total_free = 0;
3015106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t offset, rec_ptr;
3016106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
3017106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3018106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if ((ret = tdb_lock(tdb, -1, F_WRLCK)) != 0)
3019106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return ret;
3020106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3021106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	offset = FREELIST_TOP;
3022106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3023106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* read in the freelist top */
3024106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, offset, &rec_ptr) == -1) {
3025106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_unlock(tdb, -1, F_WRLCK);
3026106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
3027106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3028106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3029106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	printf("freelist top=[0x%08x]\n", rec_ptr );
3030106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (rec_ptr) {
3031efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		if (tdb->methods->tdb_read(tdb, rec_ptr, (char *)&rec,
3032106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o					   sizeof(rec), DOCONV()) == -1) {
3033106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_unlock(tdb, -1, F_WRLCK);
3034106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
3035106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3036106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3037106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (rec.magic != TDB_FREE_MAGIC) {
3038106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			printf("bad magic 0x%08x in free list\n", rec.magic);
3039106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_unlock(tdb, -1, F_WRLCK);
3040106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
3041106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3042106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3043efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o		printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%d)] (end = 0x%08x)\n",
3044106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		       rec_ptr, rec.rec_len, rec.rec_len, rec_ptr + rec.rec_len);
3045106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		total_free += rec.rec_len;
3046106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3047106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* move to the next record */
3048106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rec_ptr = rec.next;
3049106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3050efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	printf("total rec_len = [0x%08x (%d)]\n", (int)total_free,
3051106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o               (int)total_free);
3052106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3053106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb_unlock(tdb, -1, F_WRLCK);
3054106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3055106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3056106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* file: tdb.c */
3057106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3058106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
30593eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o  non-blocking increment of the tdb sequence number if the tdb has been opened using
3060106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  the TDB_SEQNUM flag
3061106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
30623eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'ovoid tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
3063106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3064106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t seqnum=0;
3065efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
3066106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(tdb->flags & TDB_SEQNUM)) {
3067106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return;
3068106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3069106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3070106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* we ignore errors from this, as we have no sane way of
3071106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   dealing with them.
3072106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	*/
3073106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
3074106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	seqnum++;
3075106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
30763eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o}
30773eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
30783eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/*
30793eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o  increment the tdb sequence number if the tdb has been opened using
30803eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o  the TDB_SEQNUM flag
30813eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o*/
30823eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'ostatic void tdb_increment_seqnum(struct tdb_context *tdb)
30833eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o{
30843eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (!(tdb->flags & TDB_SEQNUM)) {
30853eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		return;
30863eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	}
30873eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
30883eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
30893eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		return;
30903eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	}
30913eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
30923eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	tdb_increment_seqnum_nonblock(tdb);
3093106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3094106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
3095106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3096106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3097106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
3098106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3099106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return memcmp(data.dptr, key.dptr, data.dsize);
3100106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3101106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3102106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* Returns 0 on fail.  On success, return offset of record, and fills
3103106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   in rec */
3104106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, u32 hash,
3105106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			struct list_struct *r)
3106106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3107106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t rec_ptr;
3108efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
3109106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* read in the hash top */
3110106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
3111106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
3112106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3113106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* keep looking until we find the right record */
3114106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (rec_ptr) {
3115106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_rec_read(tdb, rec_ptr, r) == -1)
3116106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return 0;
3117106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3118106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (!TDB_DEAD(r) && hash==r->full_hash
3119106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		    && key.dsize==r->key_len
3120106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		    && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
3121106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				      r->key_len, tdb_key_compare,
3122106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				      NULL) == 0) {
3123106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return rec_ptr;
3124106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3125106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rec_ptr = r->next;
3126106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3127106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
3128106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3129106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3130106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* As tdb_find, but if you succeed, keep the lock */
3131106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'otdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, int locktype,
3132106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			   struct list_struct *rec)
3133106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3134106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 rec_ptr;
3135106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3136106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
3137106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
3138106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
3139106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_unlock(tdb, BUCKET(hash), locktype);
3140106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return rec_ptr;
3141106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3142106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3143106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3144106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* update an entry in place - this only works if the new data size
3145106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   is <= the old data size and the key exists.
3146106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   on failure return -1.
3147106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
3148106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, TDB_DATA dbuf)
3149106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3150106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
3151106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t rec_ptr;
3152106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3153106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* find entry */
3154106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
3155106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
3156106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3157106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* must be long enough key, data and tailer */
3158106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
3159106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_SUCCESS; /* Not really an error */
3160106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
3161106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3162106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3163106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
3164106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		      dbuf.dptr, dbuf.dsize) == -1)
3165106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
3166106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3167106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (dbuf.dsize != rec.data_len) {
3168106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* update size */
3169106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rec.data_len = dbuf.dsize;
3170106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return tdb_rec_write(tdb, rec_ptr, &rec);
3171106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3172efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
3173106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
3174106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3175106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3176106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* find an entry in the database given a key */
3177106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* If an entry doesn't exist tdb_err will be set to
3178106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * TDB_ERR_NOEXIST. If a key has no data attached
3179106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * then the TDB_DATA will have zero length but
3180106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * a non-zero pointer
3181106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o */
3182106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oTDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
3183106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3184106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t rec_ptr;
3185106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
3186106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	TDB_DATA ret;
3187106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 hash;
3188106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3189106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* find which hash bucket it is in */
3190106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	hash = tdb->hash_fn(&key);
3191106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
3192106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return tdb_null;
3193106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3194106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
3195106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				  rec.data_len);
3196106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	ret.dsize = rec.data_len;
3197106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
3198106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return ret;
3199106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3200106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3201106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
3202106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * Find an entry in the database and hand the record's data to a parsing
3203106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * function. The parsing function is executed under the chain read lock, so it
3204106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * should be fast and should not block on other syscalls.
3205106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o *
3206106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
3207106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o *
3208106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * For mmapped tdb's that do not have a transaction open it points the parsing
3209106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * function directly at the mmap area, it avoids the malloc/memcpy in this
3210106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * case. If a transaction is open or no mmap is available, it has to do
3211106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * malloc/read/parse/free.
3212106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o *
3213106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * This is interesting for all readers of potentially large data structures in
3214106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * the tdb records, ldb indexes being one example.
3215106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o */
3216106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3217106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
3218106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		     int (*parser)(TDB_DATA key, TDB_DATA data,
3219106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				   void *private_data),
3220106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		     void *private_data)
3221106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3222106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t rec_ptr;
3223106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
3224106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int ret;
3225106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 hash;
3226106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3227106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* find which hash bucket it is in */
3228106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	hash = tdb->hash_fn(&key);
3229106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3230106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
3231106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
3232106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3233106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3234106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
3235106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			     rec.data_len, parser, private_data);
3236106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3237106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
3238106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3239106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return ret;
3240106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3241106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3242efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o/* check if an entry in the database exists
3243106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3244106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   note that 1 is returned if the key is found and 0 is returned if not found
3245106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   this doesn't match the conventions in the rest of this module, but is
3246106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   compatible with gdbm
3247106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
3248106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
3249106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3250106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
3251efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
3252106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
3253106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
3254106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
3255106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 1;
3256106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3257106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3258106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_exists(struct tdb_context *tdb, TDB_DATA key)
3259106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3260106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 hash = tdb->hash_fn(&key);
3261106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb_exists_hash(tdb, key, hash);
3262106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3263106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3264106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* actually delete an entry in the database given the offset */
3265106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct*rec)
3266106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3267106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t last_ptr, i;
3268106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct lastrec;
3269106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3270106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->read_only || tdb->traverse_read) return -1;
3271106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3272106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_write_lock_record(tdb, rec_ptr) == -1) {
3273106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Someone traversing here: mark it as dead */
3274106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rec->magic = TDB_DEAD_MAGIC;
3275106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return tdb_rec_write(tdb, rec_ptr, rec);
3276106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3277106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
3278106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
3279106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3280106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* find previous record in hash chain */
3281106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
3282106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
3283106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
3284106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_rec_read(tdb, i, &lastrec) == -1)
3285106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
3286106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3287106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* unlink it: next ptr is at start of record. */
3288106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (last_ptr == 0)
3289106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		last_ptr = TDB_HASH_TOP(rec->full_hash);
3290106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
3291106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
3292106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3293106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* recover the space */
3294106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_free(tdb, rec_ptr, rec) == -1)
3295106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
3296106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
3297106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3298106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3299106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_count_dead(struct tdb_context *tdb, u32 hash)
3300106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3301106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int res = 0;
3302106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t rec_ptr;
3303106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
3304efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
3305106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* read in the hash top */
3306106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
3307106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
3308106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3309106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (rec_ptr) {
3310106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
3311106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return 0;
3312106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3313106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (rec.magic == TDB_DEAD_MAGIC) {
3314106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			res += 1;
3315106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3316106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rec_ptr = rec.next;
3317106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3318106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return res;
3319106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3320106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3321106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
3322106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * Purge all DEAD records from a hash chain
3323106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o */
3324106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_purge_dead(struct tdb_context *tdb, u32 hash)
3325106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3326106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int res = -1;
3327106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
3328106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t rec_ptr;
3329106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3330106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
3331106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
3332106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3333efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
3334106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* read in the hash top */
3335106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
3336106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3337106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3338106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (rec_ptr) {
3339106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_off_t next;
3340106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3341106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
3342106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
3343106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3344106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3345106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		next = rec.next;
3346106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3347106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (rec.magic == TDB_DEAD_MAGIC
3348106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		    && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
3349106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
3350106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3351106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rec_ptr = next;
3352106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3353106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	res = 0;
3354106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o fail:
3355106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_unlock(tdb, -1, F_WRLCK);
3356106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return res;
3357106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3358106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3359106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* delete an entry in the database given a key */
3360106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
3361106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3362106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t rec_ptr;
3363106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
3364106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int ret;
3365106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3366106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->max_dead_records != 0) {
3367106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3368106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/*
3369106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * Allow for some dead records per hash chain, mainly for
3370106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * tdb's with a very high create/delete rate like locking.tdb.
3371106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 */
3372106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3373106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
3374106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
3375106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3376106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
3377106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/*
3378106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 * Don't let the per-chain freelist grow too large,
3379106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 * delete all existing dead records
3380106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 */
3381106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_purge_dead(tdb, hash);
3382106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3383106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3384106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
3385106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
3386106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
3387106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3388106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3389106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/*
3390106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * Just mark the record as dead.
3391106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 */
3392106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rec.magic = TDB_DEAD_MAGIC;
3393106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		ret = tdb_rec_write(tdb, rec_ptr, &rec);
3394106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3395106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	else {
3396106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
3397106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o						   &rec)))
3398106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
3399106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3400106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		ret = tdb_do_delete(tdb, rec_ptr, &rec);
3401106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3402106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3403106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (ret == 0) {
3404106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_increment_seqnum(tdb);
3405106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3406106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3407106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
3408106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
3409106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return ret;
3410106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3411106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3412106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_delete(struct tdb_context *tdb, TDB_DATA key)
3413106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3414106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 hash = tdb->hash_fn(&key);
3415106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb_delete_hash(tdb, key, hash);
3416106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3417106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3418106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
3419106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * See if we have a dead record around with enough space
3420106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o */
3421106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic tdb_off_t tdb_find_dead(struct tdb_context *tdb, u32 hash,
3422106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			       struct list_struct *r, tdb_len_t length)
3423106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3424106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t rec_ptr;
3425efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
3426106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* read in the hash top */
3427106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
3428106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
3429106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3430106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* keep looking until we find the right record */
3431106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	while (rec_ptr) {
3432106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_rec_read(tdb, rec_ptr, r) == -1)
3433106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return 0;
3434106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3435106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (TDB_DEAD(r) && r->rec_len >= length) {
3436106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/*
3437106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 * First fit for simple coding, TODO: change to best
3438106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 * fit
3439106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 */
3440106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return rec_ptr;
3441106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3442106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rec_ptr = r->next;
3443106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3444106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
3445106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3446106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3447106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* store an element in the database, replacing any existing element
3448efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o   with the same key
3449106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3450106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   return 0 on success, -1 on failure
3451106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
3452106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
3453106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3454106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct list_struct rec;
3455106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 hash;
3456106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t rec_ptr;
3457106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	char *p = NULL;
3458106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int ret = -1;
3459106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3460106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->read_only || tdb->traverse_read) {
3461106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_RDONLY;
3462106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
3463106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3464106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3465106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* find which hash bucket it is in */
3466106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	hash = tdb->hash_fn(&key);
3467106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
3468106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
3469106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3470106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* check for it existing, on insert. */
3471106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (flag == TDB_INSERT) {
3472106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_exists_hash(tdb, key, hash)) {
3473106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->ecode = TDB_ERR_EXISTS;
3474106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
3475106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3476106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} else {
3477106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* first try in-place update, on modify or replace. */
3478106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
3479106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto done;
3480106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3481106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->ecode == TDB_ERR_NOEXIST &&
3482106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		    flag == TDB_MODIFY) {
3483106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* if the record doesn't exist and we are in TDB_MODIFY mode then
3484106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 we should fail the store */
3485106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
3486106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3487106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3488106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* reset the error code potentially set by the tdb_update() */
3489106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->ecode = TDB_SUCCESS;
3490106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3491106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* delete any existing record - if it doesn't exist we don't
3492106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o           care.  Doing this first reduces fragmentation, and avoids
3493106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o           coalescing with `allocated' block before it's updated. */
3494106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (flag != TDB_INSERT)
3495106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_delete_hash(tdb, key, hash);
3496106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3497106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Copy key+value *before* allocating free space in case malloc
3498106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   fails and we are left with a dead spot in the tdb. */
3499106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3500106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
3501106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_OOM;
3502106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3503106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3504106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3505106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	memcpy(p, key.dptr, key.dsize);
3506106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (dbuf.dsize)
3507106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
3508106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3509106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->max_dead_records != 0) {
3510106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/*
3511106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * Allow for some dead records per hash chain, look if we can
3512106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * find one that can hold the new record. We need enough space
3513106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * for key, data and tailer. If we find one, we don't have to
3514106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * consult the central freelist.
3515106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 */
3516106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rec_ptr = tdb_find_dead(
3517106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb, hash, &rec,
3518106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			key.dsize + dbuf.dsize + sizeof(tdb_off_t));
3519106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3520106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (rec_ptr != 0) {
3521106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			rec.key_len = key.dsize;
3522106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			rec.data_len = dbuf.dsize;
3523106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			rec.full_hash = hash;
3524106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			rec.magic = TDB_MAGIC;
3525106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
3526106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			    || tdb->methods->tdb_write(
3527106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				    tdb, rec_ptr + sizeof(rec),
3528106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				    p, key.dsize + dbuf.dsize) == -1) {
3529106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				goto fail;
3530106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			}
3531106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto done;
3532106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3533106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3534106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3535106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/*
3536106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * We have to allocate some space from the freelist, so this means we
3537106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * have to lock it. Use the chance to purge all the DEAD records from
3538106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * the hash chain under the freelist lock.
3539106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 */
3540106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3541106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
3542106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3543106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3544106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3545106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if ((tdb->max_dead_records != 0)
3546106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    && (tdb_purge_dead(tdb, hash) == -1)) {
3547106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_unlock(tdb, -1, F_WRLCK);
3548106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3549106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3550106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3551106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* we have to allocate some space */
3552106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
3553106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3554106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_unlock(tdb, -1, F_WRLCK);
3555106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3556106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (rec_ptr == 0) {
3557106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3558106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3559106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3560106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Read hash top into next ptr */
3561106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
3562106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3563106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3564106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec.key_len = key.dsize;
3565106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec.data_len = dbuf.dsize;
3566106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec.full_hash = hash;
3567106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	rec.magic = TDB_MAGIC;
3568106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3569106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* write out and point the top of the hash chain at it */
3570106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
3571106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
3572106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
3573106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Need to tdb_unallocate() here */
3574106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3575106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3576106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3577106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o done:
3578106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	ret = 0;
3579106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o fail:
3580106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (ret == 0) {
3581106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_increment_seqnum(tdb);
3582106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3583106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3584efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	SAFE_FREE(p);
3585106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
3586106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return ret;
3587106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3588106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3589106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3590106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* Append to an entry. Create if not exist. */
3591106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
3592106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3593106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 hash;
3594106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	TDB_DATA dbuf;
3595106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int ret = -1;
3596106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3597106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* find which hash bucket it is in */
3598106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	hash = tdb->hash_fn(&key);
3599106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
3600106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return -1;
3601106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3602106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	dbuf = tdb_fetch(tdb, key);
3603106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3604106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (dbuf.dptr == NULL) {
3605106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
3606106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} else {
36073eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		unsigned char *new_dptr = (unsigned char *)realloc(dbuf.dptr,
3608106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o						     dbuf.dsize + new_dbuf.dsize);
36093eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		if (new_dptr == NULL) {
36103eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o			free(dbuf.dptr);
36113eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		}
36123eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o		dbuf.dptr = new_dptr;
3613106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3614106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3615106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (dbuf.dptr == NULL) {
3616106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->ecode = TDB_ERR_OOM;
3617106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto failed;
3618106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3619106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3620106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
3621106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	dbuf.dsize += new_dbuf.dsize;
3622106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3623106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	ret = tdb_store(tdb, key, dbuf, 0);
3624efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
3625106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ofailed:
3626106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
3627106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	SAFE_FREE(dbuf.dptr);
3628106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return ret;
3629106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3630106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3631106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3632106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
3633106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  return the name of the current tdb file
3634106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  useful for external logging functions
3635106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
3636106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oconst char *tdb_name(struct tdb_context *tdb)
3637106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3638106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->name;
3639106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3640106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3641106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
3642106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  return the underlying file descriptor being used by tdb, or -1
3643106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  useful for external routines that want to check the device/inode
3644106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  of the fd
3645106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
3646106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_fd(struct tdb_context *tdb)
3647106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3648106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->fd;
3649106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3650106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3651106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
3652106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  return the current logging function
3653106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  useful for external tdb routines that wish to log tdb errors
3654106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
3655106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'otdb_log_func tdb_log_fn(struct tdb_context *tdb)
3656106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3657106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->log.log_fn;
3658106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3659106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3660106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3661106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
3662106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  get the tdb sequence number. Only makes sense if the writers opened
3663106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  with TDB_SEQNUM set. Note that this sequence number will wrap quite
3664106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  quickly, so it should only be used for a 'has something changed'
3665106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  test, not for code that relies on the count of the number of changes
3666106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  made. If you want a counter then use a tdb record.
3667106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3668106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  The aim of this sequence number is to allow for a very lightweight
3669106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  test of a possible tdb change.
3670106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o*/
3671106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_get_seqnum(struct tdb_context *tdb)
3672106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3673106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_off_t seqnum=0;
3674106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3675106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
3676106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return seqnum;
3677106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3678106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3679106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_hash_size(struct tdb_context *tdb)
3680106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3681106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->header.hash_size;
3682106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3683106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3684106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'osize_t tdb_map_size(struct tdb_context *tdb)
3685106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3686106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->map_size;
3687106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3688106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3689106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_get_flags(struct tdb_context *tdb)
3690106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3691106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->flags;
3692106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3693106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
36943eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
36953eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o/*
36963eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o  enable sequence number handling on an open tdb
36973eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o*/
36983eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'ovoid tdb_enable_seqnum(struct tdb_context *tdb)
36993eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o{
37003eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o	tdb->flags |= TDB_SEQNUM;
37013eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o}
37023eed36b25409dea0cc41c001766456026b5504c8Theodore Ts'o
3703106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* file: open.c */
3704106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3705106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* all contexts, to ensure no double-opens (fcntl locks don't nest!) */
3706106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic struct tdb_context *tdbs = NULL;
3707106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3708106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3709e0ed7404719a9ddd2ba427a80db5365c8bad18c0JP Abgrall/* This is from a hash algorithm suggested by Rogier Wolff */
3710106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic unsigned int default_tdb_hash(TDB_DATA *key)
3711106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3712106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 value;	/* Used to compute the hash value.  */
3713106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32   i;	/* Used to cycle through random values. */
3714106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3715106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Set the initial value from the key size. */
3716e0ed7404719a9ddd2ba427a80db5365c8bad18c0JP Abgrall	for (value = 0, i=0; i < key->dsize; i++)
3717e0ed7404719a9ddd2ba427a80db5365c8bad18c0JP Abgrall		value = value * 256 + key->dptr[i] + (value >> 24) * 241;
3718106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3719e0ed7404719a9ddd2ba427a80db5365c8bad18c0JP Abgrall	return value;
3720106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3721106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3722106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3723106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* initialise a new database with a specified hash size */
3724106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_new_database(struct tdb_context *tdb, int hash_size)
3725106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3726106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_header *newdb;
3727106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int size, ret = -1;
3728106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3729106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* We make it up in memory, then write it out if not internal */
3730106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	size = sizeof(struct tdb_header) + (hash_size+1)*sizeof(tdb_off_t);
3731106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(newdb = (struct tdb_header *)calloc(size, 1)))
3732106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return TDB_ERRCODE(TDB_ERR_OOM, -1);
3733106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3734106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Fill in the header */
3735106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	newdb->version = TDB_VERSION;
3736106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	newdb->hash_size = hash_size;
3737106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->flags & TDB_INTERNAL) {
3738106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->map_size = size;
3739106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->map_ptr = (char *)newdb;
3740106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		memcpy(&tdb->header, newdb, sizeof(tdb->header));
3741106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Convert the `ondisk' version if asked. */
3742106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		CONVERT(*newdb);
3743106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0;
3744106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3745106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (lseek(tdb->fd, 0, SEEK_SET) == -1)
3746106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3747106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3748106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (ftruncate(tdb->fd, 0) == -1)
3749106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3750106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3751106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* This creates an endian-converted header, as if read from disk */
3752106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	CONVERT(*newdb);
3753106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	memcpy(&tdb->header, newdb, sizeof(tdb->header));
3754106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Don't endian-convert the magic food! */
3755106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1);
3756106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (write(tdb->fd, newdb, size) != size) {
3757106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		ret = -1;
3758106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} else {
3759106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		ret = 0;
3760106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3761106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3762106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o  fail:
3763106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	SAFE_FREE(newdb);
3764106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return ret;
3765106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3766106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3767106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3768106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3769106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic int tdb_already_open(dev_t device,
3770106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			    ino_t ino)
3771106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3772106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_context *i;
3773efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
3774106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (i = tdbs; i; i = i->next) {
3775106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (i->device == device && i->inode == ino) {
3776106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return 1;
3777106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3778106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3779106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3780106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
3781106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3782106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3783efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o/* open the database, creating it if necessary
3784106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3785106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   The open_flags and mode are passed straight to the open call on the
3786106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   database file. A flags value of O_WRONLY is invalid. The hash size
3787106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   is advisory, use zero for a default value.
3788106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3789efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o   Return is NULL on error, in which case errno is also set.  Don't
3790106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   try to call tdb_error or tdb_errname, just do strerror(errno).
3791106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3792106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   @param name may be NULL for internal databases. */
3793106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostruct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags,
3794106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		      int open_flags, mode_t mode)
3795106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3796106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb_open_ex(name, hash_size, tdb_flags, open_flags, mode, NULL, NULL);
3797106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3798106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3799106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* a default logging function */
3800106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic void null_log_fn(struct tdb_context *tdb, enum tdb_debug_level level, const char *fmt, ...) PRINTF_ATTRIBUTE(3, 4);
3801106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostatic void null_log_fn(struct tdb_context *tdb, enum tdb_debug_level level, const char *fmt, ...)
3802106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3803106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3804106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3805106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3806106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ostruct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
3807106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				int open_flags, mode_t mode,
3808106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				const struct tdb_logging_context *log_ctx,
3809106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				tdb_hash_func hash_fn)
3810106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
3811106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_context *tdb;
3812106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct stat st;
3813106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int rev = 0, locked = 0;
3814106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	unsigned char *vp;
3815106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	u32 vertest;
3816106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3817106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(tdb = (struct tdb_context *)calloc(1, sizeof *tdb))) {
3818106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* Can't log this */
3819106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		errno = ENOMEM;
3820106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3821106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3822106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_io_init(tdb);
3823106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->fd = -1;
3824106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->name = NULL;
3825106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->map_ptr = NULL;
3826106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->flags = tdb_flags;
3827106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->open_flags = open_flags;
3828106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (log_ctx) {
3829106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->log = *log_ctx;
3830106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	} else {
3831106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->log.log_fn = null_log_fn;
3832106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->log.log_private = NULL;
3833106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3834106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->hash_fn = hash_fn ? hash_fn : default_tdb_hash;
3835106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3836106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* cache the page size */
3837e0ed7404719a9ddd2ba427a80db5365c8bad18c0JP Abgrall	tdb->page_size = sysconf(_SC_PAGESIZE);
3838106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->page_size <= 0) {
3839106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->page_size = 0x2000;
3840106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3841106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3842106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if ((open_flags & O_ACCMODE) == O_WRONLY) {
3843106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: can't open tdb %s write-only\n",
3844106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 name));
3845106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		errno = EINVAL;
3846106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3847106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3848efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
3849106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (hash_size == 0)
3850106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		hash_size = DEFAULT_HASH_SIZE;
3851106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if ((open_flags & O_ACCMODE) == O_RDONLY) {
3852106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->read_only = 1;
3853106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* read only databases don't do locking or clear if first */
3854106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->flags |= TDB_NOLOCK;
3855106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->flags &= ~TDB_CLEAR_IF_FIRST;
3856106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3857106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3858106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* internal databases don't mmap or lock, and start off cleared */
3859106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->flags & TDB_INTERNAL) {
3860106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP);
3861106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->flags &= ~TDB_CLEAR_IF_FIRST;
3862106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_new_database(tdb, hash_size) != 0) {
3863106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: tdb_new_database failed!"));
3864106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
3865106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3866106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto internal;
3867106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3868106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3869106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if ((tdb->fd = open(name, open_flags, mode)) == -1) {
3870106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_open_ex: could not open file %s: %s\n",
3871106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 name, strerror(errno)));
3872106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;	/* errno set by open(2) */
3873106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3874106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3875106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* ensure there is only one process initialising at once */
3876106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) {
3877106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to get global lock on %s: %s\n",
3878106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 name, strerror(errno)));
3879106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;	/* errno set by tdb_brlock */
3880106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3881106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3882106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* we need to zero database if we are the only one with it open */
3883106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if ((tdb_flags & TDB_CLEAR_IF_FIRST) &&
3884106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    (locked = (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_WRLCK, F_SETLK, 0, 1) == 0))) {
3885106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		open_flags |= O_CREAT;
3886106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (ftruncate(tdb->fd, 0) == -1) {
3887106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
3888106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 "failed to truncate %s: %s\n",
3889106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 name, strerror(errno)));
3890106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail; /* errno set by ftruncate */
3891106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3892106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3893106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3894106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header)
3895106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    || strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0
3896106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    || (tdb->header.version != TDB_VERSION
3897106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		&& !(rev = (tdb->header.version==TDB_BYTEREV(TDB_VERSION))))) {
3898106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* its not a valid database - possibly initialise it */
3899106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (!(open_flags & O_CREAT) || tdb_new_database(tdb, hash_size) == -1) {
3900106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			errno = EIO; /* ie bad format or something */
3901106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
3902106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3903106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		rev = (tdb->flags & TDB_CONVERT);
3904106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3905106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	vp = (unsigned char *)&tdb->header.version;
3906106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	vertest = (((u32)vp[0]) << 24) | (((u32)vp[1]) << 16) |
3907106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		  (((u32)vp[2]) << 8) | (u32)vp[3];
3908106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->flags |= (vertest==TDB_VERSION) ? TDB_BIGENDIAN : 0;
3909106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!rev)
3910106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->flags &= ~TDB_CONVERT;
3911106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	else {
3912106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb->flags |= TDB_CONVERT;
3913106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_convert(&tdb->header, sizeof(tdb->header));
3914106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3915106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (fstat(tdb->fd, &st) == -1)
3916106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3917106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3918106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->header.rwlocks != 0) {
3919106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: spinlocks no longer supported\n"));
3920106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3921106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3922106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3923106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Is it already in the open list?  If so, fail. */
3924106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_already_open(st.st_dev, st.st_ino)) {
3925106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
3926106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 "%s (%d,%d) is already open in this process\n",
3927106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			 name, (int)st.st_dev, (int)st.st_ino));
3928106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		errno = EBUSY;
3929106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3930106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3931106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3932106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!(tdb->name = (char *)strdup(name))) {
3933106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		errno = ENOMEM;
3934106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3935106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3936106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3937106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->map_size = st.st_size;
3938106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->device = st.st_dev;
3939106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->inode = st.st_ino;
3940106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->max_dead_records = 0;
3941106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_mmap(tdb);
3942106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (locked) {
3943106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_UNLCK, F_SETLK, 0, 1) == -1) {
3944106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
3945106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 "failed to take ACTIVE_LOCK on %s: %s\n",
3946106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o				 name, strerror(errno)));
3947106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
3948106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
3949106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3950106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3951106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3952106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* We always need to do this if the CLEAR_IF_FIRST flag is set, even if
3953106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   we didn't get the initial exclusive lock as we need to let all other
3954106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	   users know we're using it. */
3955106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3956106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_flags & TDB_CLEAR_IF_FIRST) {
3957106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/* leave this lock in place to indicate it's in use */
3958106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0, 1) == -1)
3959106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			goto fail;
3960106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3961106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3962106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* if needed, run recovery */
3963106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_transaction_recover(tdb) == -1) {
3964106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3965106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3966106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3967106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o internal:
3968106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Internal (memory-only) databases skip all the code above to
3969106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * do with disk files, and resume here by releasing their
3970106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	 * global lock and hooking into the active list. */
3971106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1) == -1)
3972106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
3973106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->next = tdbs;
3974106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdbs = tdb;
3975106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb;
3976106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3977106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o fail:
3978106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	{ int save_errno = errno;
3979106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3980106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (!tdb)
3981106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return NULL;
3982efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o
3983106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->map_ptr) {
3984106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->flags & TDB_INTERNAL)
3985106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			SAFE_FREE(tdb->map_ptr);
3986106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		else
3987106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_munmap(tdb);
3988106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3989106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	SAFE_FREE(tdb->name);
3990106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->fd != -1)
3991106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (close(tdb->fd) != 0)
3992106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to close tdb->fd on error!\n"));
3993106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	SAFE_FREE(tdb);
3994106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	errno = save_errno;
3995106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return NULL;
3996106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
3997106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
3998106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
3999106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/*
4000106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * Set the maximum number of dead records per hash chain
4001106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o */
4002106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4003106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ovoid tdb_set_max_dead(struct tdb_context *tdb, int max_dead)
4004106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
4005106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->max_dead_records = max_dead;
4006106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
4007106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4008106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/**
4009106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * Close a database.
4010106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o *
4011106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o * @returns -1 for error; 0 for success.
4012106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o **/
4013106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_close(struct tdb_context *tdb)
4014106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
4015106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_context **i;
4016106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	int ret = 0;
4017106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4018106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->transaction) {
4019106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		tdb_transaction_cancel(tdb);
4020106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
4021106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4022106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->map_ptr) {
4023106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb->flags & TDB_INTERNAL)
4024106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			SAFE_FREE(tdb->map_ptr);
4025106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		else
4026106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb_munmap(tdb);
4027106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
4028106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	SAFE_FREE(tdb->name);
4029106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->fd != -1)
4030106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		ret = close(tdb->fd);
4031106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	SAFE_FREE(tdb->lockrecs);
4032106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4033106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	/* Remove from contexts list */
4034106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (i = &tdbs; *i; i = &(*i)->next) {
4035106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (*i == tdb) {
4036106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			*i = tdb->next;
4037106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			break;
4038106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
4039106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
4040106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4041106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	memset(tdb, 0, sizeof(*tdb));
4042106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	SAFE_FREE(tdb);
4043106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4044106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return ret;
4045106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
4046106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4047106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* register a loging function */
4048106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ovoid tdb_set_logging_function(struct tdb_context *tdb,
4049106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o                              const struct tdb_logging_context *log_ctx)
4050106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
4051106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o        tdb->log = *log_ctx;
4052106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
4053106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4054106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ovoid *tdb_get_logging_private(struct tdb_context *tdb)
4055106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
4056106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return tdb->log.log_private;
4057106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
4058106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4059106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* reopen a tdb - this can be used after a fork to ensure that we have an independent
4060106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o   seek pointer from our parent and to re-establish locks */
4061106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_reopen(struct tdb_context *tdb)
4062106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
4063106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct stat st;
4064106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4065106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->flags & TDB_INTERNAL) {
4066106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		return 0; /* Nothing to do. */
4067106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
4068106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4069106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->num_locks != 0 || tdb->global_lock.count) {
4070106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_reopen: reopen not allowed with locks held\n"));
4071106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
4072106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
4073106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4074106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->transaction != 0) {
4075106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_reopen: reopen not allowed inside a transaction\n"));
4076106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
4077106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
4078106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4079106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb_munmap(tdb) != 0) {
4080106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: munmap failed (%s)\n", strerror(errno)));
4081106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
4082106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
4083106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (close(tdb->fd) != 0)
4084106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: WARNING closing tdb->fd failed!\n"));
4085106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb->fd = open(tdb->name, tdb->open_flags & ~(O_CREAT|O_TRUNC), 0);
4086106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (tdb->fd == -1) {
4087106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: open failed (%s)\n", strerror(errno)));
4088106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
4089106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
4090efc6f628e15de95bcd13e4f0ee223cb42115d520Theodore Ts'o	if ((tdb->flags & TDB_CLEAR_IF_FIRST) &&
4091106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	    (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0, 1) == -1)) {
4092106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: failed to obtain active lock\n"));
4093106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
4094106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
4095106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (fstat(tdb->fd, &st) != 0) {
4096106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: fstat failed (%s)\n", strerror(errno)));
4097106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
4098106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
4099106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	if (st.st_ino != tdb->inode || st.st_dev != tdb->device) {
4100106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: file dev/inode has changed!\n"));
4101106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		goto fail;
4102106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
4103106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_mmap(tdb);
4104106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4105106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
4106106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4107106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'ofail:
4108106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	tdb_close(tdb);
4109106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return -1;
4110106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
4111106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4112106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o/* reopen all tdb's */
4113106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'oint tdb_reopen_all(int parent_longlived)
4114106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o{
4115106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	struct tdb_context *tdb;
4116106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4117106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	for (tdb=tdbs; tdb; tdb = tdb->next) {
4118106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		/*
4119106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * If the parent is longlived (ie. a
4120106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * parent daemon architecture), we know
4121106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * it will keep it's active lock on a
4122106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * tdb opened with CLEAR_IF_FIRST. Thus
4123106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * for child processes we don't have to
4124106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * add an active lock. This is essential
4125106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * to improve performance on systems that
4126106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * keep POSIX locks as a non-scalable data
4127106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 * structure in the kernel.
4128106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		 */
4129106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (parent_longlived) {
4130106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			/* Ensure no clear-if-first. */
4131106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			tdb->flags &= ~TDB_CLEAR_IF_FIRST;
4132106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		}
4133106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4134106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o		if (tdb_reopen(tdb) != 0)
4135106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o			return -1;
4136106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	}
4137106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o
4138106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o	return 0;
4139106ad96daee151064aa44a857f82ba10c8258b40Theodore Ts'o}
4140