unix_io.c revision 64e1b274edc48553c76511ff9b30f85c52aff046
1/*
2 * unix_io.c --- This is the Unix I/O interface to the I/O manager.
3 *
4 * Implements a one-block write-through cache.
5 *
6 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
7 * 	2002 by Theodore Ts'o.
8 *
9 * %Begin-Header%
10 * This file may be redistributed under the terms of the GNU Public
11 * License.
12 * %End-Header%
13 */
14
15#define _LARGEFILE_SOURCE
16#define _LARGEFILE64_SOURCE
17
18#include <stdio.h>
19#include <string.h>
20#if HAVE_UNISTD_H
21#include <unistd.h>
22#endif
23#if HAVE_ERRNO_H
24#include <errno.h>
25#endif
26#include <fcntl.h>
27#include <time.h>
28#if HAVE_SYS_STAT_H
29#include <sys/stat.h>
30#endif
31#if HAVE_SYS_TYPES_H
32#include <sys/types.h>
33#endif
34#include <sys/resource.h>
35
36#include "ext2_fs.h"
37#include "ext2fs.h"
38
39/*
40 * For checking structure magic numbers...
41 */
42
43#define EXT2_CHECK_MAGIC(struct, code) \
44	  if ((struct)->magic != (code)) return (code)
45
46struct unix_cache {
47	char		*buf;
48	unsigned long	block;
49	int		access_time;
50	int		dirty:1;
51	int		in_use:1;
52};
53
54#define CACHE_SIZE 8
55#define WRITE_VIA_CACHE_SIZE 4	/* Must be smaller than CACHE_SIZE */
56
57struct unix_private_data {
58	int	magic;
59	int	dev;
60	int	flags;
61	int	access_time;
62	struct unix_cache cache[CACHE_SIZE];
63};
64
65static errcode_t unix_open(const char *name, int flags, io_channel *channel);
66static errcode_t unix_close(io_channel channel);
67static errcode_t unix_set_blksize(io_channel channel, int blksize);
68static errcode_t unix_read_blk(io_channel channel, unsigned long block,
69			       int count, void *data);
70static errcode_t unix_write_blk(io_channel channel, unsigned long block,
71				int count, const void *data);
72static errcode_t unix_flush(io_channel channel);
73static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
74				int size, const void *data);
75
76static struct struct_io_manager struct_unix_manager = {
77	EXT2_ET_MAGIC_IO_MANAGER,
78	"Unix I/O Manager",
79	unix_open,
80	unix_close,
81	unix_set_blksize,
82	unix_read_blk,
83	unix_write_blk,
84	unix_flush,
85	unix_write_byte
86};
87
88io_manager unix_io_manager = &struct_unix_manager;
89
90/*
91 * Here are the raw I/O functions
92 */
93static errcode_t raw_read_blk(io_channel channel,
94			      struct unix_private_data *data,
95			      unsigned long block,
96			      int count, void *buf)
97{
98	errcode_t	retval;
99	size_t		size;
100	ext2_loff_t	location;
101	int		actual = 0;
102
103	size = (count < 0) ? -count : count * channel->block_size;
104	location = (ext2_loff_t) block * channel->block_size;
105	if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
106		retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
107		goto error_out;
108	}
109	actual = read(data->dev, buf, size);
110	if (actual != size) {
111		if (actual < 0)
112			actual = 0;
113		retval = EXT2_ET_SHORT_READ;
114		goto error_out;
115	}
116	return 0;
117
118error_out:
119	memset((char *) buf+actual, 0, size-actual);
120	if (channel->read_error)
121		retval = (channel->read_error)(channel, block, count, buf,
122					       size, actual, retval);
123	return retval;
124}
125
126static errcode_t raw_write_blk(io_channel channel,
127			       struct unix_private_data *data,
128			       unsigned long block,
129			       int count, const void *buf)
130{
131	size_t		size;
132	ext2_loff_t	location;
133	int		actual = 0;
134	errcode_t	retval;
135
136	if (count == 1)
137		size = channel->block_size;
138	else {
139		if (count < 0)
140			size = -count;
141		else
142			size = count * channel->block_size;
143	}
144
145	location = (ext2_loff_t) block * channel->block_size;
146	if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
147		retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
148		goto error_out;
149	}
150
151	actual = write(data->dev, buf, size);
152	if (actual != size) {
153		retval = EXT2_ET_SHORT_WRITE;
154		goto error_out;
155	}
156	return 0;
157
158error_out:
159	if (channel->write_error)
160		retval = (channel->write_error)(channel, block, count, buf,
161						size, actual, retval);
162	return retval;
163}
164
165
166/*
167 * Here we implement the cache functions
168 */
169
170/* Allocate the cache buffers */
171static errcode_t alloc_cache(io_channel channel,
172			     struct unix_private_data *data)
173{
174	errcode_t		retval;
175	struct unix_cache	*cache;
176	int			i;
177
178	data->access_time = 0;
179	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
180		cache->block = 0;
181		cache->access_time = 0;
182		cache->dirty = 0;
183		cache->in_use = 0;
184		if ((retval = ext2fs_get_mem(channel->block_size,
185					     (void **) &cache->buf)))
186			return retval;
187	}
188	return 0;
189}
190
191/* Free the cache buffers */
192static void free_cache(io_channel channel,
193		       struct unix_private_data *data)
194{
195	struct unix_cache	*cache;
196	int			i;
197
198	data->access_time = 0;
199	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
200		cache->block = 0;
201		cache->access_time = 0;
202		cache->dirty = 0;
203		cache->in_use = 0;
204		if (cache->buf)
205			ext2fs_free_mem((void **) &cache->buf);
206		cache->buf = 0;
207	}
208}
209
210/*
211 * Try to find a block in the cache.  If get_cache is non-zero, then
212 * if the block isn't in the cache, evict the oldest block in the
213 * cache and create a new cache entry for the requested block.
214 */
215static struct unix_cache *find_cached_block(io_channel channel,
216					    struct unix_private_data *data,
217					    unsigned long block,
218					    int get_cache)
219{
220	struct unix_cache	*cache, *unused_cache, *oldest_cache;
221	int			i;
222
223	unused_cache = oldest_cache = 0;
224	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
225		if (!cache->in_use) {
226			unused_cache = cache;
227			continue;
228		}
229		if (cache->block == block) {
230			cache->access_time = ++data->access_time;
231			return cache;
232		}
233		if (!oldest_cache ||
234		    (cache->access_time < oldest_cache->access_time))
235			oldest_cache = cache;
236	}
237	if (!get_cache)
238		return 0;
239
240	/*
241	 * Try to allocate cache slot.
242	 */
243	if (unused_cache)
244		cache = unused_cache;
245	else {
246		cache = oldest_cache;
247		if (cache->dirty)
248			raw_write_blk(channel, data,
249				      cache->block, 1, cache->buf);
250	}
251	cache->in_use = 1;
252	cache->block = block;
253	cache->access_time = ++data->access_time;
254	return cache;
255}
256
257/*
258 * Flush all of the blocks in the cache
259 */
260static errcode_t flush_cached_blocks(io_channel channel,
261				     struct unix_private_data *data,
262				     int invalidate)
263
264{
265	struct unix_cache	*cache;
266	errcode_t		retval, retval2;
267	int			i;
268
269	retval2 = 0;
270	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
271		if (!cache->in_use)
272			continue;
273
274		if (invalidate)
275			cache->in_use = 0;
276
277		if (!cache->dirty)
278			continue;
279
280		retval = raw_write_blk(channel, data,
281				       cache->block, 1, cache->buf);
282		if (retval)
283			retval2 = retval;
284		else
285			cache->dirty = 0;
286	}
287	return retval2;
288}
289
290
291
292static errcode_t unix_open(const char *name, int flags, io_channel *channel)
293{
294	io_channel	io = NULL;
295	struct unix_private_data *data = NULL;
296	errcode_t	retval;
297	int		open_flags;
298	struct stat	st;
299
300	if (name == 0)
301		return EXT2_ET_BAD_DEVICE_NAME;
302	retval = ext2fs_get_mem(sizeof(struct struct_io_channel),
303				(void **) &io);
304	if (retval)
305		return retval;
306	memset(io, 0, sizeof(struct struct_io_channel));
307	io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
308	retval = ext2fs_get_mem(sizeof(struct unix_private_data),
309				(void **) &data);
310	if (retval)
311		goto cleanup;
312
313	io->manager = unix_io_manager;
314	retval = ext2fs_get_mem(strlen(name)+1, (void **) &io->name);
315	if (retval)
316		goto cleanup;
317
318	strcpy(io->name, name);
319	io->private_data = data;
320	io->block_size = 1024;
321	io->read_error = 0;
322	io->write_error = 0;
323	io->refcount = 1;
324
325	memset(data, 0, sizeof(struct unix_private_data));
326	data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
327
328	if ((retval = alloc_cache(io, data)))
329		goto cleanup;
330
331	open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
332#ifdef HAVE_OPEN64
333	data->dev = open64(name, open_flags);
334#else
335	data->dev = open(name, open_flags);
336#endif
337	if (data->dev < 0) {
338		retval = errno;
339		goto cleanup;
340	}
341
342#ifdef __linux__
343#undef RLIM_INFINITY
344#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
345#define RLIM_INFINITY	((unsigned long)(~0UL>>1))
346#else
347#define RLIM_INFINITY  (~0UL)
348#endif
349	/*
350	 * Work around a bug in 2.4.10+ kernels where writes to block
351	 * devices are wrongly getting hit by the filesize limit.
352	 */
353	if ((flags & IO_FLAG_RW) &&
354	    (fstat(data->dev, &st) == 0) &&
355	    (S_ISBLK(st.st_mode))) {
356		struct rlimit	rlim;
357
358		rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
359		setrlimit(RLIMIT_FSIZE, &rlim);
360		getrlimit(RLIMIT_FSIZE, &rlim);
361		if (((unsigned long) rlim.rlim_cur) <
362		    ((unsigned long) rlim.rlim_max)) {
363			rlim.rlim_cur = rlim.rlim_max;
364			setrlimit(RLIMIT_FSIZE, &rlim);
365		}
366	}
367#endif
368	*channel = io;
369	return 0;
370
371cleanup:
372	if (data) {
373		free_cache(io, data);
374		ext2fs_free_mem((void **) &data);
375	}
376	if (io)
377		ext2fs_free_mem((void **) &io);
378	return retval;
379}
380
381static errcode_t unix_close(io_channel channel)
382{
383	struct unix_private_data *data;
384	errcode_t	retval = 0;
385
386	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
387	data = (struct unix_private_data *) channel->private_data;
388	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
389
390	if (--channel->refcount > 0)
391		return 0;
392
393	retval = flush_cached_blocks(channel, data, 0);
394
395	if (close(data->dev) < 0)
396		retval = errno;
397	free_cache(channel, data);
398
399	ext2fs_free_mem((void **) &channel->private_data);
400	if (channel->name)
401		ext2fs_free_mem((void **) &channel->name);
402	ext2fs_free_mem((void **) &channel);
403	return retval;
404}
405
406static errcode_t unix_set_blksize(io_channel channel, int blksize)
407{
408	struct unix_private_data *data;
409	errcode_t		retval;
410
411	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
412	data = (struct unix_private_data *) channel->private_data;
413	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
414
415	if (channel->block_size != blksize) {
416		if ((retval = flush_cached_blocks(channel, data, 0)))
417			return retval;
418
419		channel->block_size = blksize;
420		free_cache(channel, data);
421		if ((retval = alloc_cache(channel, data)))
422			return retval;
423	}
424	return 0;
425}
426
427
428static errcode_t unix_read_blk(io_channel channel, unsigned long block,
429			       int count, void *buf)
430{
431	struct unix_private_data *data;
432	struct unix_cache *cache;
433	errcode_t	retval;
434	char		*cp;
435	int		i, j;
436
437	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
438	data = (struct unix_private_data *) channel->private_data;
439	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
440
441	/*
442	 * If we're doing an odd-sized read, flush out the cache and
443	 * then do a direct read.
444	 */
445	if (count < 0) {
446		if ((retval = flush_cached_blocks(channel, data, 0)))
447			return retval;
448		return raw_read_blk(channel, data, block, count, buf);
449	}
450
451	cp = buf;
452	while (count > 0) {
453		/* If it's in the cache, use it! */
454		if ((cache = find_cached_block(channel, data, block, 0))) {
455#ifdef DEBUG
456			printf("Using cached block %d\n", block);
457#endif
458			memcpy(cp, cache->buf, channel->block_size);
459			count--;
460			block++;
461			cp += channel->block_size;
462			continue;
463		}
464		/*
465		 * Find the number of uncached blocks so we can do a
466		 * single read request
467		 */
468		for (i=1; i < count; i++)
469			if (find_cached_block(channel, data, block+i, 0))
470				break;
471#ifdef DEBUG
472		printf("Reading %d blocks starting at %d\n", i, block);
473#endif
474		if ((retval = raw_read_blk(channel, data, block, i, cp)))
475			return retval;
476
477		/* Save the results in the cache */
478		for (j=0; j < i; j++) {
479			count--;
480			cache = find_cached_block(channel, data, block++, 1);
481			if (cache)
482				memcpy(cache->buf, cp, channel->block_size);
483			cp += channel->block_size;
484		}
485	}
486	return 0;
487}
488
489static errcode_t unix_write_blk(io_channel channel, unsigned long block,
490				int count, const void *buf)
491{
492	struct unix_private_data *data;
493	struct unix_cache *cache;
494	errcode_t	retval = 0, retval2;
495	const char	*cp;
496	int		writethrough;
497
498	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
499	data = (struct unix_private_data *) channel->private_data;
500	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
501
502	/*
503	 * If we're doing an odd-sized write or a very large write,
504	 * flush out the cache completely and then do a direct write.
505	 */
506	if (count < 0 || count > WRITE_VIA_CACHE_SIZE) {
507		if ((retval = flush_cached_blocks(channel, data, 1)))
508			return retval;
509		return raw_write_blk(channel, data, block, count, buf);
510	}
511
512	/*
513	 * For a moderate-sized multi-block write, first force a write
514	 * if we're in write-through cache mode, and then fill the
515	 * cache with the blocks.
516	 */
517	writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
518	if (writethrough)
519		retval = raw_write_blk(channel, data, block, count, buf);
520
521	cp = buf;
522	while (count > 0) {
523		cache = find_cached_block(channel, data, block, 1);
524		if (!cache) {
525			/*
526			 * Oh shit, we couldn't get cache descriptor.
527			 * Force the write directly.
528			 */
529			if ((retval2 = raw_write_blk(channel, data, block,
530						1, cp)))
531				retval = retval2;
532		} else {
533			memcpy(cache->buf, cp, channel->block_size);
534			cache->dirty = !writethrough;
535		}
536		count--;
537		block++;
538		cp += channel->block_size;
539	}
540	return retval;
541}
542
543static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
544				 int size, const void *buf)
545{
546	struct unix_private_data *data;
547	errcode_t	retval = 0;
548	size_t		actual;
549
550	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
551	data = (struct unix_private_data *) channel->private_data;
552	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
553
554	/*
555	 * Flush out the cache completely
556	 */
557	if ((retval = flush_cached_blocks(channel, data, 1)))
558		return retval;
559
560	if (lseek(data->dev, offset, SEEK_SET) < 0)
561		return errno;
562
563	actual = write(data->dev, buf, size);
564	if (actual != size)
565		return EXT2_ET_SHORT_WRITE;
566
567	return 0;
568}
569
570/*
571 * Flush data buffers to disk.
572 */
573static errcode_t unix_flush(io_channel channel)
574{
575	struct unix_private_data *data;
576	errcode_t retval = 0;
577
578	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
579	data = (struct unix_private_data *) channel->private_data;
580	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
581
582	retval = flush_cached_blocks(channel, data, 0);
583	fsync(data->dev);
584	return retval;
585}
586
587