unix_io.c revision a85e81a2ff4fb1afc05ff74d5da573031c3495e0
15d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)/*
21320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci * unix_io.c --- This is the Unix (well, really POSIX) implementation
35d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) * 	of the I/O manager.
45d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) *
5 * Implements a one-block write-through cache.
6 *
7 * Includes support for Windows NT support under Cygwin.
8 *
9 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
10 * 	2002 by Theodore Ts'o.
11 *
12 * %Begin-Header%
13 * This file may be redistributed under the terms of the GNU Public
14 * License.
15 * %End-Header%
16 */
17
18#define _LARGEFILE_SOURCE
19#define _LARGEFILE64_SOURCE
20
21#include <stdio.h>
22#include <string.h>
23#if HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#if HAVE_ERRNO_H
27#include <errno.h>
28#endif
29#include <fcntl.h>
30#include <time.h>
31#ifdef __linux__
32#include <sys/utsname.h>
33#endif
34#if HAVE_SYS_STAT_H
35#include <sys/stat.h>
36#endif
37#if HAVE_SYS_TYPES_H
38#include <sys/types.h>
39#endif
40#if HAVE_SYS_RESOURCE_H
41#include <sys/resource.h>
42#endif
43
44#include "ext2_fs.h"
45#include "ext2fs.h"
46
47/*
48 * For checking structure magic numbers...
49 */
50
51#define EXT2_CHECK_MAGIC(struct, code) \
52	  if ((struct)->magic != (code)) return (code)
53
54struct unix_cache {
55	char		*buf;
56	unsigned long	block;
57	int		access_time;
58	int		dirty:1;
59	int		in_use:1;
60};
61
62#define CACHE_SIZE 8
63#define WRITE_DIRECT_SIZE 4	/* Must be smaller than CACHE_SIZE */
64#define READ_DIRECT_SIZE 4	/* Should be smaller than CACHE_SIZE */
65
66struct unix_private_data {
67	int	magic;
68	int	dev;
69	int	flags;
70	int	access_time;
71	struct unix_cache cache[CACHE_SIZE];
72};
73
74static errcode_t unix_open(const char *name, int flags, io_channel *channel);
75static errcode_t unix_close(io_channel channel);
76static errcode_t unix_set_blksize(io_channel channel, int blksize);
77static errcode_t unix_read_blk(io_channel channel, unsigned long block,
78			       int count, void *data);
79static errcode_t unix_write_blk(io_channel channel, unsigned long block,
80				int count, const void *data);
81static errcode_t unix_flush(io_channel channel);
82static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
83				int size, const void *data);
84
85static void reuse_cache(io_channel channel, struct unix_private_data *data,
86		 struct unix_cache *cache, unsigned long block);
87
88static struct struct_io_manager struct_unix_manager = {
89	EXT2_ET_MAGIC_IO_MANAGER,
90	"Unix I/O Manager",
91	unix_open,
92	unix_close,
93	unix_set_blksize,
94	unix_read_blk,
95	unix_write_blk,
96	unix_flush,
97#ifdef __CYGWIN__
98	0
99#else
100	unix_write_byte
101#endif
102};
103
104io_manager unix_io_manager = &struct_unix_manager;
105
106/*
107 * Here are the raw I/O functions
108 */
109#ifndef __CYGWIN__
110static errcode_t raw_read_blk(io_channel channel,
111			      struct unix_private_data *data,
112			      unsigned long block,
113			      int count, void *buf)
114{
115	errcode_t	retval;
116	size_t		size;
117	ext2_loff_t	location;
118	int		actual = 0;
119
120	size = (count < 0) ? -count : count * channel->block_size;
121	location = (ext2_loff_t) block * channel->block_size;
122	if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
123		retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
124		goto error_out;
125	}
126	actual = read(data->dev, buf, size);
127	if (actual != size) {
128		if (actual < 0)
129			actual = 0;
130		retval = EXT2_ET_SHORT_READ;
131		goto error_out;
132	}
133	return 0;
134
135error_out:
136	memset((char *) buf+actual, 0, size-actual);
137	if (channel->read_error)
138		retval = (channel->read_error)(channel, block, count, buf,
139					       size, actual, retval);
140	return retval;
141}
142#else /* __CYGWIN__ */
143/*
144 * Windows block devices only allow sector alignment IO in offset and size
145 */
146static errcode_t raw_read_blk(io_channel channel,
147			      struct unix_private_data *data,
148			      unsigned long block,
149			      int count, void *buf)
150{
151	errcode_t	retval;
152	size_t		size, alignsize, fragment;
153	ext2_loff_t	location;
154	int		total = 0, actual;
155#define BLOCKALIGN 512
156	char		sector[BLOCKALIGN];
157
158	size = (count < 0) ? -count : count * channel->block_size;
159	location = (ext2_loff_t) block * channel->block_size;
160#ifdef DEBUG
161	printf("count=%d, size=%d, block=%d, blk_size=%d, location=%lx\n",
162	 		count, size, block, channel->block_size, location);
163#endif
164	if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
165		retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
166		goto error_out;
167	}
168	fragment = size % BLOCKALIGN;
169	alignsize = size - fragment;
170	if (alignsize) {
171		actual = read(data->dev, buf, alignsize);
172		if (actual != alignsize)
173			goto short_read;
174	}
175	if (fragment) {
176		actual = read(data->dev, sector, BLOCKALIGN);
177		if (actual != BLOCKALIGN)
178			goto short_read;
179		memcpy(buf+alignsize, sector, fragment);
180	}
181	return 0;
182
183short_read:
184	if (actual>0)
185		total += actual;
186	retval = EXT2_ET_SHORT_READ;
187
188error_out:
189	memset((char *) buf+total, 0, size-actual);
190	if (channel->read_error)
191		retval = (channel->read_error)(channel, block, count, buf,
192					       size, actual, retval);
193	return retval;
194}
195#endif
196
197static errcode_t raw_write_blk(io_channel channel,
198			       struct unix_private_data *data,
199			       unsigned long block,
200			       int count, const void *buf)
201{
202	size_t		size;
203	ext2_loff_t	location;
204	int		actual = 0;
205	errcode_t	retval;
206
207	if (count == 1)
208		size = channel->block_size;
209	else {
210		if (count < 0)
211			size = -count;
212		else
213			size = count * channel->block_size;
214	}
215
216	location = (ext2_loff_t) block * channel->block_size;
217	if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
218		retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
219		goto error_out;
220	}
221
222	actual = write(data->dev, buf, size);
223	if (actual != size) {
224		retval = EXT2_ET_SHORT_WRITE;
225		goto error_out;
226	}
227	return 0;
228
229error_out:
230	if (channel->write_error)
231		retval = (channel->write_error)(channel, block, count, buf,
232						size, actual, retval);
233	return retval;
234}
235
236
237/*
238 * Here we implement the cache functions
239 */
240
241/* Allocate the cache buffers */
242static errcode_t alloc_cache(io_channel channel,
243			     struct unix_private_data *data)
244{
245	errcode_t		retval;
246	struct unix_cache	*cache;
247	int			i;
248
249	data->access_time = 0;
250	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
251		cache->block = 0;
252		cache->access_time = 0;
253		cache->dirty = 0;
254		cache->in_use = 0;
255		if ((retval = ext2fs_get_mem(channel->block_size,
256					     (void **) &cache->buf)))
257			return retval;
258	}
259	return 0;
260}
261
262/* Free the cache buffers */
263static void free_cache(io_channel channel,
264		       struct unix_private_data *data)
265{
266	struct unix_cache	*cache;
267	int			i;
268
269	data->access_time = 0;
270	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
271		cache->block = 0;
272		cache->access_time = 0;
273		cache->dirty = 0;
274		cache->in_use = 0;
275		if (cache->buf)
276			ext2fs_free_mem((void **) &cache->buf);
277		cache->buf = 0;
278	}
279}
280
281/*
282 * Try to find a block in the cache.  If the block is not found, and
283 * eldest is a non-zero pointer, then fill in eldest with the cache
284 * entry to that should be reused.
285 */
286static struct unix_cache *find_cached_block(io_channel channel,
287					    struct unix_private_data *data,
288					    unsigned long block,
289					    struct unix_cache **eldest)
290{
291	struct unix_cache	*cache, *unused_cache, *oldest_cache;
292	int			i;
293
294	unused_cache = oldest_cache = 0;
295	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
296		if (!cache->in_use) {
297			if (!unused_cache)
298				unused_cache = cache;
299			continue;
300		}
301		if (cache->block == block) {
302			cache->access_time = ++data->access_time;
303			return cache;
304		}
305		if (!oldest_cache ||
306		    (cache->access_time < oldest_cache->access_time))
307			oldest_cache = cache;
308	}
309	if (eldest)
310		*eldest = (unused_cache) ? unused_cache : oldest_cache;
311	return 0;
312}
313
314/*
315 * Reuse a particular cache entry for another block.
316 */
317static void reuse_cache(io_channel channel, struct unix_private_data *data,
318		 struct unix_cache *cache, unsigned long block)
319{
320	if (cache->dirty && cache->in_use)
321		raw_write_blk(channel, data, cache->block, 1, cache->buf);
322
323	cache->in_use = 1;
324	cache->dirty = 0;
325	cache->block = block;
326	cache->access_time = ++data->access_time;
327}
328
329/*
330 * Flush all of the blocks in the cache
331 */
332static errcode_t flush_cached_blocks(io_channel channel,
333				     struct unix_private_data *data,
334				     int invalidate)
335
336{
337	struct unix_cache	*cache;
338	errcode_t		retval, retval2;
339	int			i;
340
341	retval2 = 0;
342	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
343		if (!cache->in_use)
344			continue;
345
346		if (invalidate)
347			cache->in_use = 0;
348
349		if (!cache->dirty)
350			continue;
351
352		retval = raw_write_blk(channel, data,
353				       cache->block, 1, cache->buf);
354		if (retval)
355			retval2 = retval;
356		else
357			cache->dirty = 0;
358	}
359	return retval2;
360}
361
362static errcode_t unix_open(const char *name, int flags, io_channel *channel)
363{
364	io_channel	io = NULL;
365	struct unix_private_data *data = NULL;
366	errcode_t	retval;
367	int		open_flags;
368	struct stat	st;
369#ifdef __linux__
370	struct 		utsname ut;
371#endif
372
373	if (name == 0)
374		return EXT2_ET_BAD_DEVICE_NAME;
375	retval = ext2fs_get_mem(sizeof(struct struct_io_channel),
376				(void **) &io);
377	if (retval)
378		return retval;
379	memset(io, 0, sizeof(struct struct_io_channel));
380	io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
381	retval = ext2fs_get_mem(sizeof(struct unix_private_data),
382				(void **) &data);
383	if (retval)
384		goto cleanup;
385
386	io->manager = unix_io_manager;
387	retval = ext2fs_get_mem(strlen(name)+1, (void **) &io->name);
388	if (retval)
389		goto cleanup;
390
391	strcpy(io->name, name);
392	io->private_data = data;
393	io->block_size = 1024;
394	io->read_error = 0;
395	io->write_error = 0;
396	io->refcount = 1;
397
398	memset(data, 0, sizeof(struct unix_private_data));
399	data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
400
401	if ((retval = alloc_cache(io, data)))
402		goto cleanup;
403
404	open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
405#ifdef HAVE_OPEN64
406	data->dev = open64(name, open_flags);
407#else
408	data->dev = open(name, open_flags);
409#endif
410	if (data->dev < 0) {
411		retval = errno;
412		goto cleanup;
413	}
414
415#ifdef __linux__
416#undef RLIM_INFINITY
417#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
418#define RLIM_INFINITY	((unsigned long)(~0UL>>1))
419#else
420#define RLIM_INFINITY  (~0UL)
421#endif
422	/*
423	 * Work around a bug in 2.4.10-2.4.18 kernels where writes to
424	 * block devices are wrongly getting hit by the filesize
425	 * limit.  This workaround isn't perfect, since it won't work
426	 * if glibc wasn't built against 2.2 header files.  (Sigh.)
427	 *
428	 */
429	if ((flags & IO_FLAG_RW) &&
430	    (uname(&ut) == 0) &&
431	    ((ut.release[0] == '2') && (ut.release[1] == '.') &&
432	     (ut.release[2] == '4') && (ut.release[3] == '.') &&
433	     (ut.release[4] == '1') && (ut.release[5] >= '0') &&
434	     (ut.release[5] < '8')) &&
435	    (fstat(data->dev, &st) == 0) &&
436	    (S_ISBLK(st.st_mode))) {
437		struct rlimit	rlim;
438
439		rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
440		setrlimit(RLIMIT_FSIZE, &rlim);
441		getrlimit(RLIMIT_FSIZE, &rlim);
442		if (((unsigned long) rlim.rlim_cur) <
443		    ((unsigned long) rlim.rlim_max)) {
444			rlim.rlim_cur = rlim.rlim_max;
445			setrlimit(RLIMIT_FSIZE, &rlim);
446		}
447	}
448#endif
449	*channel = io;
450	return 0;
451
452cleanup:
453	if (data) {
454		free_cache(io, data);
455		ext2fs_free_mem((void **) &data);
456	}
457	if (io)
458		ext2fs_free_mem((void **) &io);
459	return retval;
460}
461
462static errcode_t unix_close(io_channel channel)
463{
464	struct unix_private_data *data;
465	errcode_t	retval = 0;
466
467	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
468	data = (struct unix_private_data *) channel->private_data;
469	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
470
471	if (--channel->refcount > 0)
472		return 0;
473
474	retval = flush_cached_blocks(channel, data, 0);
475
476	if (close(data->dev) < 0)
477		retval = errno;
478	free_cache(channel, data);
479
480	ext2fs_free_mem((void **) &channel->private_data);
481	if (channel->name)
482		ext2fs_free_mem((void **) &channel->name);
483	ext2fs_free_mem((void **) &channel);
484	return retval;
485}
486
487static errcode_t unix_set_blksize(io_channel channel, int blksize)
488{
489	struct unix_private_data *data;
490	errcode_t		retval;
491
492	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
493	data = (struct unix_private_data *) channel->private_data;
494	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
495
496	if (channel->block_size != blksize) {
497		if ((retval = flush_cached_blocks(channel, data, 0)))
498			return retval;
499
500		channel->block_size = blksize;
501		free_cache(channel, data);
502		if ((retval = alloc_cache(channel, data)))
503			return retval;
504	}
505	return 0;
506}
507
508
509static errcode_t unix_read_blk(io_channel channel, unsigned long block,
510			       int count, void *buf)
511{
512	struct unix_private_data *data;
513	struct unix_cache *cache, *reuse[READ_DIRECT_SIZE];
514	errcode_t	retval;
515	char		*cp;
516	int		i, j;
517
518	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
519	data = (struct unix_private_data *) channel->private_data;
520	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
521
522	/*
523	 * If we're doing an odd-sized read or a very large read,
524	 * flush out the cache and then do a direct read.
525	 */
526	if (count < 0 || count > WRITE_DIRECT_SIZE) {
527		if ((retval = flush_cached_blocks(channel, data, 0)))
528			return retval;
529		return raw_read_blk(channel, data, block, count, buf);
530	}
531
532	cp = buf;
533	while (count > 0) {
534		/* If it's in the cache, use it! */
535		if ((cache = find_cached_block(channel, data, block,
536					       &reuse[0]))) {
537#ifdef DEBUG
538			printf("Using cached block %d\n", block);
539#endif
540			memcpy(cp, cache->buf, channel->block_size);
541			count--;
542			block++;
543			cp += channel->block_size;
544			continue;
545		}
546		/*
547		 * Find the number of uncached blocks so we can do a
548		 * single read request
549		 */
550		for (i=1; i < count; i++)
551			if (find_cached_block(channel, data, block+i,
552					      &reuse[i]))
553				break;
554#ifdef DEBUG
555		printf("Reading %d blocks starting at %d\n", i, block);
556#endif
557		if ((retval = raw_read_blk(channel, data, block, i, cp)))
558			return retval;
559
560		/* Save the results in the cache */
561		for (j=0; j < i; j++) {
562			count--;
563			cache = reuse[j];
564			reuse_cache(channel, data, cache, block++);
565			memcpy(cache->buf, cp, channel->block_size);
566			cp += channel->block_size;
567		}
568	}
569	return 0;
570}
571
572static errcode_t unix_write_blk(io_channel channel, unsigned long block,
573				int count, const void *buf)
574{
575	struct unix_private_data *data;
576	struct unix_cache *cache, *reuse;
577	errcode_t	retval = 0;
578	const char	*cp;
579	int		writethrough;
580
581	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
582	data = (struct unix_private_data *) channel->private_data;
583	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
584
585	/*
586	 * If we're doing an odd-sized write or a very large write,
587	 * flush out the cache completely and then do a direct write.
588	 */
589	if (count < 0 || count > WRITE_DIRECT_SIZE) {
590		if ((retval = flush_cached_blocks(channel, data, 1)))
591			return retval;
592		return raw_write_blk(channel, data, block, count, buf);
593	}
594
595	/*
596	 * For a moderate-sized multi-block write, first force a write
597	 * if we're in write-through cache mode, and then fill the
598	 * cache with the blocks.
599	 */
600	writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
601	if (writethrough)
602		retval = raw_write_blk(channel, data, block, count, buf);
603
604	cp = buf;
605	while (count > 0) {
606		cache = find_cached_block(channel, data, block, &reuse);
607		if (!cache) {
608			cache = reuse;
609			reuse_cache(channel, data, cache, block);
610		}
611		memcpy(cache->buf, cp, channel->block_size);
612		cache->dirty = !writethrough;
613		count--;
614		block++;
615		cp += channel->block_size;
616	}
617	return retval;
618}
619
620static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
621				 int size, const void *buf)
622{
623	struct unix_private_data *data;
624	errcode_t	retval = 0;
625	size_t		actual;
626
627	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
628	data = (struct unix_private_data *) channel->private_data;
629	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
630
631	/*
632	 * Flush out the cache completely
633	 */
634	if ((retval = flush_cached_blocks(channel, data, 1)))
635		return retval;
636
637	if (lseek(data->dev, offset, SEEK_SET) < 0)
638		return errno;
639
640	actual = write(data->dev, buf, size);
641	if (actual != size)
642		return EXT2_ET_SHORT_WRITE;
643
644	return 0;
645}
646
647/*
648 * Flush data buffers to disk.
649 */
650static errcode_t unix_flush(io_channel channel)
651{
652	struct unix_private_data *data;
653	errcode_t retval = 0;
654
655	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
656	data = (struct unix_private_data *) channel->private_data;
657	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
658
659	retval = flush_cached_blocks(channel, data, 0);
660	fsync(data->dev);
661	return retval;
662}
663
664