1/*
2   md_k.h : kernel internal structure of the Linux MD driver
3          Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   You should have received a copy of the GNU General Public License
11   (for example /usr/src/linux/COPYING); if not, write to the Free
12   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
13*/
14
15#ifndef _MD_K_H
16#define _MD_K_H
17
18/* and dm-bio-list.h is not under include/linux because.... ??? */
19#include "../../../drivers/md/dm-bio-list.h"
20
21#define	LEVEL_MULTIPATH		(-4)
22#define	LEVEL_LINEAR		(-1)
23#define	LEVEL_FAULTY		(-5)
24
25/* we need a value for 'no level specified' and 0
26 * means 'raid0', so we need something else.  This is
27 * for internal use only
28 */
29#define	LEVEL_NONE		(-1000000)
30
31#define MaxSector (~(sector_t)0)
32#define MD_THREAD_NAME_MAX 14
33
34typedef struct mddev_s mddev_t;
35typedef struct mdk_rdev_s mdk_rdev_t;
36
37#define MAX_MD_DEVS  256	/* Max number of md dev */
38
39/*
40 * options passed in raidrun:
41 */
42
43/* Currently this must fix in an 'int' */
44#define MAX_CHUNK_SIZE (1<<30)
45
46/*
47 * MD's 'extended' device
48 */
49struct mdk_rdev_s
50{
51	struct list_head same_set;	/* RAID devices within the same set */
52
53	sector_t size;			/* Device size (in blocks) */
54	mddev_t *mddev;			/* RAID array if running */
55	unsigned long last_events;	/* IO event timestamp */
56
57	struct block_device *bdev;	/* block device handle */
58
59	struct page	*sb_page;
60	int		sb_loaded;
61	__u64		sb_events;
62	sector_t	data_offset;	/* start of data in array */
63	sector_t	sb_offset;
64	int		sb_size;	/* bytes in the superblock */
65	int		preferred_minor;	/* autorun support */
66
67	struct kobject	kobj;
68
69	/* A device can be in one of three states based on two flags:
70	 * Not working:   faulty==1 in_sync==0
71	 * Fully working: faulty==0 in_sync==1
72	 * Working, but not
73	 * in sync with array
74	 *                faulty==0 in_sync==0
75	 *
76	 * It can never have faulty==1, in_sync==1
77	 * This reduces the burden of testing multiple flags in many cases
78	 */
79
80	unsigned long	flags;
81#define	Faulty		1		/* device is known to have a fault */
82#define	In_sync		2		/* device is in_sync with rest of array */
83#define	WriteMostly	4		/* Avoid reading if at all possible */
84#define	BarriersNotsupp	5		/* BIO_RW_BARRIER is not supported */
85
86	int desc_nr;			/* descriptor index in the superblock */
87	int raid_disk;			/* role of device in array */
88	int saved_raid_disk;		/* role that device used to have in the
89					 * array and could again if we did a partial
90					 * resync from the bitmap
91					 */
92	sector_t	recovery_offset;/* If this device has been partially
93					 * recovered, this is where we were
94					 * up to.
95					 */
96
97	atomic_t	nr_pending;	/* number of pending requests.
98					 * only maintained for arrays that
99					 * support hot removal
100					 */
101	atomic_t	read_errors;	/* number of consecutive read errors that
102					 * we have tried to ignore.
103					 */
104	atomic_t	corrected_errors; /* number of corrected read errors,
105					   * for reporting to userspace and storing
106					   * in superblock.
107					   */
108};
109
110struct mddev_s
111{
112	void				*private;
113	struct mdk_personality		*pers;
114	dev_t				unit;
115	int				md_minor;
116	struct list_head 		disks;
117	int				sb_dirty;
118	int				ro;
119
120	struct gendisk			*gendisk;
121
122	struct kobject			kobj;
123
124	/* Superblock information */
125	int				major_version,
126					minor_version,
127					patch_version;
128	int				persistent;
129	int				chunk_size;
130	time_t				ctime, utime;
131	int				level, layout;
132	char				clevel[16];
133	int				raid_disks;
134	int				max_disks;
135	sector_t			size; /* used size of component devices */
136	sector_t			array_size; /* exported array size */
137	__u64				events;
138
139	char				uuid[16];
140
141	/* If the array is being reshaped, we need to record the
142	 * new shape and an indication of where we are up to.
143	 * This is written to the superblock.
144	 * If reshape_position is MaxSector, then no reshape is happening (yet).
145	 */
146	sector_t			reshape_position;
147	int				delta_disks, new_level, new_layout, new_chunk;
148
149	struct mdk_thread_s		*thread;	/* management thread */
150	struct mdk_thread_s		*sync_thread;	/* doing resync or reconstruct */
151	sector_t			curr_resync;	/* last block scheduled */
152	unsigned long			resync_mark;	/* a recent timestamp */
153	sector_t			resync_mark_cnt;/* blocks written at resync_mark */
154	sector_t			curr_mark_cnt; /* blocks scheduled now */
155
156	sector_t			resync_max_sectors; /* may be set by personality */
157
158	sector_t			resync_mismatches; /* count of sectors where
159							    * parity/replica mismatch found
160							    */
161
162	/* allow user-space to request suspension of IO to regions of the array */
163	sector_t			suspend_lo;
164	sector_t			suspend_hi;
165	/* if zero, use the system-wide default */
166	int				sync_speed_min;
167	int				sync_speed_max;
168
169	int				ok_start_degraded;
170	/* recovery/resync flags
171	 * NEEDED:   we might need to start a resync/recover
172	 * RUNNING:  a thread is running, or about to be started
173	 * SYNC:     actually doing a resync, not a recovery
174	 * ERR:      and IO error was detected - abort the resync/recovery
175	 * INTR:     someone requested a (clean) early abort.
176	 * DONE:     thread is done and is waiting to be reaped
177	 * REQUEST:  user-space has requested a sync (used with SYNC)
178	 * CHECK:    user-space request for for check-only, no repair
179	 * RESHAPE:  A reshape is happening
180	 *
181	 * If neither SYNC or RESHAPE are set, then it is a recovery.
182	 */
183#define	MD_RECOVERY_RUNNING	0
184#define	MD_RECOVERY_SYNC	1
185#define	MD_RECOVERY_ERR		2
186#define	MD_RECOVERY_INTR	3
187#define	MD_RECOVERY_DONE	4
188#define	MD_RECOVERY_NEEDED	5
189#define	MD_RECOVERY_REQUESTED	6
190#define	MD_RECOVERY_CHECK	7
191#define MD_RECOVERY_RESHAPE	8
192#define	MD_RECOVERY_FROZEN	9
193
194	unsigned long			recovery;
195
196	int				in_sync;	/* know to not need resync */
197	struct mutex			reconfig_mutex;
198	atomic_t			active;
199
200	int				changed;	/* true if we might need to reread partition info */
201	int				degraded;	/* whether md should consider
202							 * adding a spare
203							 */
204	int				barriers_work;	/* initialised to true, cleared as soon
205							 * as a barrier request to slave
206							 * fails.  Only supported
207							 */
208	struct bio			*biolist; 	/* bios that need to be retried
209							 * because BIO_RW_BARRIER is not supported
210							 */
211
212	atomic_t			recovery_active; /* blocks scheduled, but not written */
213	wait_queue_head_t		recovery_wait;
214	sector_t			recovery_cp;
215
216	spinlock_t			write_lock;
217	wait_queue_head_t		sb_wait;	/* for waiting on superblock updates */
218	atomic_t			pending_writes;	/* number of active superblock writes */
219
220	unsigned int			safemode;	/* if set, update "clean" superblock
221							 * when no writes pending.
222							 */
223	unsigned int			safemode_delay;
224	struct timer_list		safemode_timer;
225	atomic_t			writes_pending;
226	request_queue_t			*queue;	/* for plugging ... */
227
228	atomic_t                        write_behind; /* outstanding async IO */
229	unsigned int                    max_write_behind; /* 0 = sync */
230
231	struct bitmap                   *bitmap; /* the bitmap for the device */
232	struct file			*bitmap_file; /* the bitmap file */
233	long				bitmap_offset; /* offset from superblock of
234							* start of bitmap. May be
235							* negative, but not '0'
236							*/
237	long				default_bitmap_offset; /* this is the offset to use when
238								* hot-adding a bitmap.  It should
239								* eventually be settable by sysfs.
240								*/
241
242	struct list_head		all_mddevs;
243};
244
245
246static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev)
247{
248	int faulty = test_bit(Faulty, &rdev->flags);
249	if (atomic_dec_and_test(&rdev->nr_pending) && faulty)
250		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
251}
252
253static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
254{
255        atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
256}
257
258struct mdk_personality
259{
260	char *name;
261	int level;
262	struct list_head list;
263	struct module *owner;
264	int (*make_request)(request_queue_t *q, struct bio *bio);
265	int (*run)(mddev_t *mddev);
266	int (*stop)(mddev_t *mddev);
267	void (*status)(struct seq_file *seq, mddev_t *mddev);
268	/* error_handler must set ->faulty and clear ->in_sync
269	 * if appropriate, and should abort recovery if needed
270	 */
271	void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev);
272	int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev);
273	int (*hot_remove_disk) (mddev_t *mddev, int number);
274	int (*spare_active) (mddev_t *mddev);
275	sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
276	int (*resize) (mddev_t *mddev, sector_t sectors);
277	int (*check_reshape) (mddev_t *mddev);
278	int (*start_reshape) (mddev_t *mddev);
279	int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
280	/* quiesce moves between quiescence states
281	 * 0 - fully active
282	 * 1 - no new requests allowed
283	 * others - reserved
284	 */
285	void (*quiesce) (mddev_t *mddev, int state);
286};
287
288
289struct md_sysfs_entry {
290	struct attribute attr;
291	ssize_t (*show)(mddev_t *, char *);
292	ssize_t (*store)(mddev_t *, const char *, size_t);
293};
294
295
296static inline char * mdname (mddev_t * mddev)
297{
298	return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
299}
300
301/*
302 * iterates through some rdev ringlist. It's safe to remove the
303 * current 'rdev'. Dont touch 'tmp' though.
304 */
305#define ITERATE_RDEV_GENERIC(head,rdev,tmp)				\
306									\
307	for ((tmp) = (head).next;					\
308		(rdev) = (list_entry((tmp), mdk_rdev_t, same_set)),	\
309			(tmp) = (tmp)->next, (tmp)->prev != &(head)	\
310		; )
311/*
312 * iterates through the 'same array disks' ringlist
313 */
314#define ITERATE_RDEV(mddev,rdev,tmp)					\
315	ITERATE_RDEV_GENERIC((mddev)->disks,rdev,tmp)
316
317/*
318 * Iterates through 'pending RAID disks'
319 */
320#define ITERATE_RDEV_PENDING(rdev,tmp)					\
321	ITERATE_RDEV_GENERIC(pending_raid_disks,rdev,tmp)
322
323typedef struct mdk_thread_s {
324	void			(*run) (mddev_t *mddev);
325	mddev_t			*mddev;
326	wait_queue_head_t	wqueue;
327	unsigned long           flags;
328	struct task_struct	*tsk;
329	unsigned long		timeout;
330} mdk_thread_t;
331
332#define THREAD_WAKEUP  0
333
334#define __wait_event_lock_irq(wq, condition, lock, cmd) 		\
335do {									\
336	wait_queue_t __wait;						\
337	init_waitqueue_entry(&__wait, current);				\
338									\
339	add_wait_queue(&wq, &__wait);					\
340	for (;;) {							\
341		set_current_state(TASK_UNINTERRUPTIBLE);		\
342		if (condition)						\
343			break;						\
344		spin_unlock_irq(&lock);					\
345		cmd;							\
346		schedule();						\
347		spin_lock_irq(&lock);					\
348	}								\
349	current->state = TASK_RUNNING;					\
350	remove_wait_queue(&wq, &__wait);				\
351} while (0)
352
353#define wait_event_lock_irq(wq, condition, lock, cmd) 			\
354do {									\
355	if (condition)	 						\
356		break;							\
357	__wait_event_lock_irq(wq, condition, lock, cmd);		\
358} while (0)
359
360static inline void safe_put_page(struct page *p)
361{
362	if (p) put_page(p);
363}
364
365#endif
366
367