unix_io.c revision 289e0557c24c68290b6d9b73b09674447801fdac
1/*
2 * unix_io.c --- This is the Unix (well, really POSIX) implementation
3 * 	of the I/O manager.
4 *
5 * Implements a one-block write-through cache.
6 *
7 * Includes support for Windows NT support under Cygwin.
8 *
9 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
10 * 	2002 by Theodore Ts'o.
11 *
12 * %Begin-Header%
13 * This file may be redistributed under the terms of the GNU Public
14 * License.
15 * %End-Header%
16 */
17
18#define _LARGEFILE_SOURCE
19#define _LARGEFILE64_SOURCE
20
21#include <stdio.h>
22#include <string.h>
23#if HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#if HAVE_ERRNO_H
27#include <errno.h>
28#endif
29#include <fcntl.h>
30#include <time.h>
31#ifdef __linux__
32#include <sys/utsname.h>
33#endif
34#if HAVE_SYS_STAT_H
35#include <sys/stat.h>
36#endif
37#if HAVE_SYS_TYPES_H
38#include <sys/types.h>
39#endif
40#if HAVE_SYS_RESOURCE_H
41#include <sys/resource.h>
42#endif
43
44#include "ext2_fs.h"
45#include "ext2fs.h"
46
47/*
48 * For checking structure magic numbers...
49 */
50
51#define EXT2_CHECK_MAGIC(struct, code) \
52	  if ((struct)->magic != (code)) return (code)
53
54struct unix_cache {
55	char		*buf;
56	unsigned long	block;
57	int		access_time;
58	int		dirty:1;
59	int		in_use:1;
60};
61
62#define CACHE_SIZE 8
63#define WRITE_DIRECT_SIZE 4	/* Must be smaller than CACHE_SIZE */
64#define READ_DIRECT_SIZE 4	/* Should be smaller than CACHE_SIZE */
65
66struct unix_private_data {
67	int	magic;
68	int	dev;
69	int	flags;
70	int	access_time;
71	struct unix_cache cache[CACHE_SIZE];
72};
73
74static errcode_t unix_open(const char *name, int flags, io_channel *channel);
75static errcode_t unix_close(io_channel channel);
76static errcode_t unix_set_blksize(io_channel channel, int blksize);
77static errcode_t unix_read_blk(io_channel channel, unsigned long block,
78			       int count, void *data);
79static errcode_t unix_write_blk(io_channel channel, unsigned long block,
80				int count, const void *data);
81static errcode_t unix_flush(io_channel channel);
82static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
83				int size, const void *data);
84
85static void reuse_cache(io_channel channel, struct unix_private_data *data,
86		 struct unix_cache *cache, unsigned long block);
87
88/* __FreeBSD_kernel__ is defined by GNU/kFreeBSD - the FreeBSD kernel
89 * does not know buffered block devices - everything is raw. */
90#if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
91#define NEED_BOUNCE_BUFFER
92#else
93#undef NEED_BOUNCE_BUFFER
94#endif
95
96static struct struct_io_manager struct_unix_manager = {
97	EXT2_ET_MAGIC_IO_MANAGER,
98	"Unix I/O Manager",
99	unix_open,
100	unix_close,
101	unix_set_blksize,
102	unix_read_blk,
103	unix_write_blk,
104	unix_flush,
105#ifdef NEED_BOUNCE_BUFFER
106	0
107#else
108	unix_write_byte
109#endif
110};
111
112io_manager unix_io_manager = &struct_unix_manager;
113
114/*
115 * Here are the raw I/O functions
116 */
117#ifndef NEED_BOUNCE_BUFFER
118static errcode_t raw_read_blk(io_channel channel,
119			      struct unix_private_data *data,
120			      unsigned long block,
121			      int count, void *buf)
122{
123	errcode_t	retval;
124	ssize_t		size;
125	ext2_loff_t	location;
126	int		actual = 0;
127
128	size = (count < 0) ? -count : count * channel->block_size;
129	location = (ext2_loff_t) block * channel->block_size;
130	if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
131		retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
132		goto error_out;
133	}
134	actual = read(data->dev, buf, size);
135	if (actual != size) {
136		if (actual < 0)
137			actual = 0;
138		retval = EXT2_ET_SHORT_READ;
139		goto error_out;
140	}
141	return 0;
142
143error_out:
144	memset((char *) buf+actual, 0, size-actual);
145	if (channel->read_error)
146		retval = (channel->read_error)(channel, block, count, buf,
147					       size, actual, retval);
148	return retval;
149}
150#else /* NEED_BOUNCE_BUFFER */
151/*
152 * Windows and FreeBSD block devices only allow sector alignment IO in offset and size
153 */
154static errcode_t raw_read_blk(io_channel channel,
155			      struct unix_private_data *data,
156			      unsigned long block,
157			      int count, void *buf)
158{
159	errcode_t	retval;
160	size_t		size, alignsize, fragment;
161	ext2_loff_t	location;
162	int		total = 0, actual;
163#define BLOCKALIGN 512
164	char		sector[BLOCKALIGN];
165
166	size = (count < 0) ? -count : count * channel->block_size;
167	location = (ext2_loff_t) block * channel->block_size;
168#ifdef DEBUG
169	printf("count=%d, size=%d, block=%d, blk_size=%d, location=%lx\n",
170	 		count, size, block, channel->block_size, location);
171#endif
172	if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
173		retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
174		goto error_out;
175	}
176	fragment = size % BLOCKALIGN;
177	alignsize = size - fragment;
178	if (alignsize) {
179		actual = read(data->dev, buf, alignsize);
180		if (actual != alignsize)
181			goto short_read;
182	}
183	if (fragment) {
184		actual = read(data->dev, sector, BLOCKALIGN);
185		if (actual != BLOCKALIGN)
186			goto short_read;
187		memcpy(buf+alignsize, sector, fragment);
188	}
189	return 0;
190
191short_read:
192	if (actual>0)
193		total += actual;
194	retval = EXT2_ET_SHORT_READ;
195
196error_out:
197	memset((char *) buf+total, 0, size-actual);
198	if (channel->read_error)
199		retval = (channel->read_error)(channel, block, count, buf,
200					       size, actual, retval);
201	return retval;
202}
203#endif
204
205static errcode_t raw_write_blk(io_channel channel,
206			       struct unix_private_data *data,
207			       unsigned long block,
208			       int count, const void *buf)
209{
210	ssize_t		size;
211	ext2_loff_t	location;
212	int		actual = 0;
213	errcode_t	retval;
214
215	if (count == 1)
216		size = channel->block_size;
217	else {
218		if (count < 0)
219			size = -count;
220		else
221			size = count * channel->block_size;
222	}
223
224	location = (ext2_loff_t) block * channel->block_size;
225	if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
226		retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
227		goto error_out;
228	}
229
230	actual = write(data->dev, buf, size);
231	if (actual != size) {
232		retval = EXT2_ET_SHORT_WRITE;
233		goto error_out;
234	}
235	return 0;
236
237error_out:
238	if (channel->write_error)
239		retval = (channel->write_error)(channel, block, count, buf,
240						size, actual, retval);
241	return retval;
242}
243
244
245/*
246 * Here we implement the cache functions
247 */
248
249/* Allocate the cache buffers */
250static errcode_t alloc_cache(io_channel channel,
251			     struct unix_private_data *data)
252{
253	errcode_t		retval;
254	struct unix_cache	*cache;
255	int			i;
256
257	data->access_time = 0;
258	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
259		cache->block = 0;
260		cache->access_time = 0;
261		cache->dirty = 0;
262		cache->in_use = 0;
263		if ((retval = ext2fs_get_mem(channel->block_size,
264					     &cache->buf)))
265			return retval;
266	}
267	return 0;
268}
269
270/* Free the cache buffers */
271static void free_cache(struct unix_private_data *data)
272{
273	struct unix_cache	*cache;
274	int			i;
275
276	data->access_time = 0;
277	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
278		cache->block = 0;
279		cache->access_time = 0;
280		cache->dirty = 0;
281		cache->in_use = 0;
282		if (cache->buf)
283			ext2fs_free_mem(&cache->buf);
284		cache->buf = 0;
285	}
286}
287
288#ifndef NO_IO_CACHE
289/*
290 * Try to find a block in the cache.  If the block is not found, and
291 * eldest is a non-zero pointer, then fill in eldest with the cache
292 * entry to that should be reused.
293 */
294static struct unix_cache *find_cached_block(struct unix_private_data *data,
295					    unsigned long block,
296					    struct unix_cache **eldest)
297{
298	struct unix_cache	*cache, *unused_cache, *oldest_cache;
299	int			i;
300
301	unused_cache = oldest_cache = 0;
302	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
303		if (!cache->in_use) {
304			if (!unused_cache)
305				unused_cache = cache;
306			continue;
307		}
308		if (cache->block == block) {
309			cache->access_time = ++data->access_time;
310			return cache;
311		}
312		if (!oldest_cache ||
313		    (cache->access_time < oldest_cache->access_time))
314			oldest_cache = cache;
315	}
316	if (eldest)
317		*eldest = (unused_cache) ? unused_cache : oldest_cache;
318	return 0;
319}
320
321/*
322 * Reuse a particular cache entry for another block.
323 */
324static void reuse_cache(io_channel channel, struct unix_private_data *data,
325		 struct unix_cache *cache, unsigned long block)
326{
327	if (cache->dirty && cache->in_use)
328		raw_write_blk(channel, data, cache->block, 1, cache->buf);
329
330	cache->in_use = 1;
331	cache->dirty = 0;
332	cache->block = block;
333	cache->access_time = ++data->access_time;
334}
335
336/*
337 * Flush all of the blocks in the cache
338 */
339static errcode_t flush_cached_blocks(io_channel channel,
340				     struct unix_private_data *data,
341				     int invalidate)
342
343{
344	struct unix_cache	*cache;
345	errcode_t		retval, retval2;
346	int			i;
347
348	retval2 = 0;
349	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
350		if (!cache->in_use)
351			continue;
352
353		if (invalidate)
354			cache->in_use = 0;
355
356		if (!cache->dirty)
357			continue;
358
359		retval = raw_write_blk(channel, data,
360				       cache->block, 1, cache->buf);
361		if (retval)
362			retval2 = retval;
363		else
364			cache->dirty = 0;
365	}
366	return retval2;
367}
368#endif /* NO_IO_CACHE */
369
370static errcode_t unix_open(const char *name, int flags, io_channel *channel)
371{
372	io_channel	io = NULL;
373	struct unix_private_data *data = NULL;
374	errcode_t	retval;
375	int		open_flags;
376	struct stat	st;
377#ifdef __linux__
378	struct 		utsname ut;
379#endif
380
381	if (name == 0)
382		return EXT2_ET_BAD_DEVICE_NAME;
383	retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
384	if (retval)
385		return retval;
386	memset(io, 0, sizeof(struct struct_io_channel));
387	io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
388	retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data);
389	if (retval)
390		goto cleanup;
391
392	io->manager = unix_io_manager;
393	retval = ext2fs_get_mem(strlen(name)+1, &io->name);
394	if (retval)
395		goto cleanup;
396
397	strcpy(io->name, name);
398	io->private_data = data;
399	io->block_size = 1024;
400	io->read_error = 0;
401	io->write_error = 0;
402	io->refcount = 1;
403
404	memset(data, 0, sizeof(struct unix_private_data));
405	data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
406
407	if ((retval = alloc_cache(io, data)))
408		goto cleanup;
409
410	open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
411#ifdef HAVE_OPEN64
412	data->dev = open64(name, open_flags);
413#else
414	data->dev = open(name, open_flags);
415#endif
416	if (data->dev < 0) {
417		retval = errno;
418		goto cleanup;
419	}
420
421#ifdef __linux__
422#undef RLIM_INFINITY
423#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
424#define RLIM_INFINITY	((unsigned long)(~0UL>>1))
425#else
426#define RLIM_INFINITY  (~0UL)
427#endif
428	/*
429	 * Work around a bug in 2.4.10-2.4.18 kernels where writes to
430	 * block devices are wrongly getting hit by the filesize
431	 * limit.  This workaround isn't perfect, since it won't work
432	 * if glibc wasn't built against 2.2 header files.  (Sigh.)
433	 *
434	 */
435	if ((flags & IO_FLAG_RW) &&
436	    (uname(&ut) == 0) &&
437	    ((ut.release[0] == '2') && (ut.release[1] == '.') &&
438	     (ut.release[2] == '4') && (ut.release[3] == '.') &&
439	     (ut.release[4] == '1') && (ut.release[5] >= '0') &&
440	     (ut.release[5] < '8')) &&
441	    (fstat(data->dev, &st) == 0) &&
442	    (S_ISBLK(st.st_mode))) {
443		struct rlimit	rlim;
444
445		rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
446		setrlimit(RLIMIT_FSIZE, &rlim);
447		getrlimit(RLIMIT_FSIZE, &rlim);
448		if (((unsigned long) rlim.rlim_cur) <
449		    ((unsigned long) rlim.rlim_max)) {
450			rlim.rlim_cur = rlim.rlim_max;
451			setrlimit(RLIMIT_FSIZE, &rlim);
452		}
453	}
454#endif
455	*channel = io;
456	return 0;
457
458cleanup:
459	if (data) {
460		free_cache(data);
461		ext2fs_free_mem(&data);
462	}
463	if (io)
464		ext2fs_free_mem(&io);
465	return retval;
466}
467
468static errcode_t unix_close(io_channel channel)
469{
470	struct unix_private_data *data;
471	errcode_t	retval = 0;
472
473	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
474	data = (struct unix_private_data *) channel->private_data;
475	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
476
477	if (--channel->refcount > 0)
478		return 0;
479
480#ifndef NO_IO_CACHE
481	retval = flush_cached_blocks(channel, data, 0);
482#endif
483
484	if (close(data->dev) < 0)
485		retval = errno;
486	free_cache(data);
487
488	ext2fs_free_mem(&channel->private_data);
489	if (channel->name)
490		ext2fs_free_mem(&channel->name);
491	ext2fs_free_mem(&channel);
492	return retval;
493}
494
495static errcode_t unix_set_blksize(io_channel channel, int blksize)
496{
497	struct unix_private_data *data;
498	errcode_t		retval;
499
500	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
501	data = (struct unix_private_data *) channel->private_data;
502	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
503
504	if (channel->block_size != blksize) {
505#ifndef NO_IO_CACHE
506		if ((retval = flush_cached_blocks(channel, data, 0)))
507			return retval;
508#endif
509
510		channel->block_size = blksize;
511		free_cache(data);
512		if ((retval = alloc_cache(channel, data)))
513			return retval;
514	}
515	return 0;
516}
517
518
519static errcode_t unix_read_blk(io_channel channel, unsigned long block,
520			       int count, void *buf)
521{
522	struct unix_private_data *data;
523	struct unix_cache *cache, *reuse[READ_DIRECT_SIZE];
524	errcode_t	retval;
525	char		*cp;
526	int		i, j;
527
528	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
529	data = (struct unix_private_data *) channel->private_data;
530	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
531
532#ifdef NO_IO_CACHE
533	return raw_read_blk(channel, data, block, count, buf);
534#else
535	/*
536	 * If we're doing an odd-sized read or a very large read,
537	 * flush out the cache and then do a direct read.
538	 */
539	if (count < 0 || count > WRITE_DIRECT_SIZE) {
540		if ((retval = flush_cached_blocks(channel, data, 0)))
541			return retval;
542		return raw_read_blk(channel, data, block, count, buf);
543	}
544
545	cp = buf;
546	while (count > 0) {
547		/* If it's in the cache, use it! */
548		if ((cache = find_cached_block(data, block, &reuse[0]))) {
549#ifdef DEBUG
550			printf("Using cached block %d\n", block);
551#endif
552			memcpy(cp, cache->buf, channel->block_size);
553			count--;
554			block++;
555			cp += channel->block_size;
556			continue;
557		}
558		/*
559		 * Find the number of uncached blocks so we can do a
560		 * single read request
561		 */
562		for (i=1; i < count; i++)
563			if (find_cached_block(data, block+i, &reuse[i]))
564				break;
565#ifdef DEBUG
566		printf("Reading %d blocks starting at %d\n", i, block);
567#endif
568		if ((retval = raw_read_blk(channel, data, block, i, cp)))
569			return retval;
570
571		/* Save the results in the cache */
572		for (j=0; j < i; j++) {
573			count--;
574			cache = reuse[j];
575			reuse_cache(channel, data, cache, block++);
576			memcpy(cache->buf, cp, channel->block_size);
577			cp += channel->block_size;
578		}
579	}
580	return 0;
581#endif /* NO_IO_CACHE */
582}
583
584static errcode_t unix_write_blk(io_channel channel, unsigned long block,
585				int count, const void *buf)
586{
587	struct unix_private_data *data;
588	struct unix_cache *cache, *reuse;
589	errcode_t	retval = 0;
590	const char	*cp;
591	int		writethrough;
592
593	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
594	data = (struct unix_private_data *) channel->private_data;
595	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
596
597#ifdef NO_IO_CACHE
598	return raw_write_blk(channel, data, block, count, buf);
599#else
600	/*
601	 * If we're doing an odd-sized write or a very large write,
602	 * flush out the cache completely and then do a direct write.
603	 */
604	if (count < 0 || count > WRITE_DIRECT_SIZE) {
605		if ((retval = flush_cached_blocks(channel, data, 1)))
606			return retval;
607		return raw_write_blk(channel, data, block, count, buf);
608	}
609
610	/*
611	 * For a moderate-sized multi-block write, first force a write
612	 * if we're in write-through cache mode, and then fill the
613	 * cache with the blocks.
614	 */
615	writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
616	if (writethrough)
617		retval = raw_write_blk(channel, data, block, count, buf);
618
619	cp = buf;
620	while (count > 0) {
621		cache = find_cached_block(data, block, &reuse);
622		if (!cache) {
623			cache = reuse;
624			reuse_cache(channel, data, cache, block);
625		}
626		memcpy(cache->buf, cp, channel->block_size);
627		cache->dirty = !writethrough;
628		count--;
629		block++;
630		cp += channel->block_size;
631	}
632	return retval;
633#endif /* NO_IO_CACHE */
634}
635
636static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
637				 int size, const void *buf)
638{
639	struct unix_private_data *data;
640	errcode_t	retval = 0;
641	ssize_t		actual;
642
643	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
644	data = (struct unix_private_data *) channel->private_data;
645	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
646
647#ifndef NO_IO_CACHE
648	/*
649	 * Flush out the cache completely
650	 */
651	if ((retval = flush_cached_blocks(channel, data, 1)))
652		return retval;
653#endif
654
655	if (lseek(data->dev, offset, SEEK_SET) < 0)
656		return errno;
657
658	actual = write(data->dev, buf, size);
659	if (actual != size)
660		return EXT2_ET_SHORT_WRITE;
661
662	return 0;
663}
664
665/*
666 * Flush data buffers to disk.
667 */
668static errcode_t unix_flush(io_channel channel)
669{
670	struct unix_private_data *data;
671	errcode_t retval = 0;
672
673	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
674	data = (struct unix_private_data *) channel->private_data;
675	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
676
677#ifndef NO_IO_CACHE
678	retval = flush_cached_blocks(channel, data, 0);
679#endif
680	fsync(data->dev);
681	return retval;
682}
683
684