unix_io.c revision f12e285ffd9ff0b37c4f91d5ab2b021ed1eb43be
1/*
2 * unix_io.c --- This is the Unix I/O interface to the I/O manager.
3 *
4 * Implements a one-block write-through cache.
5 *
6 * Copyright (C) 1993, 1994, 1995 Theodore Ts'o.
7 *
8 * %Begin-Header%
9 * This file may be redistributed under the terms of the GNU Public
10 * License.
11 * %End-Header%
12 */
13
14#define _LARGEFILE_SOURCE
15#define _LARGEFILE64_SOURCE
16
17#include <stdio.h>
18#include <string.h>
19#if HAVE_UNISTD_H
20#include <unistd.h>
21#endif
22#if HAVE_ERRNO_H
23#include <errno.h>
24#endif
25#include <fcntl.h>
26#include <time.h>
27#if HAVE_SYS_STAT_H
28#include <sys/stat.h>
29#endif
30#if HAVE_SYS_TYPES_H
31#include <sys/types.h>
32#endif
33#include <sys/resource.h>
34
35#include "ext2_fs.h"
36#include "ext2fs.h"
37
38/*
39 * For checking structure magic numbers...
40 */
41
42#define EXT2_CHECK_MAGIC(struct, code) \
43	  if ((struct)->magic != (code)) return (code)
44
45struct unix_cache {
46	char		*buf;
47	unsigned long	block;
48	int		access_time;
49	int		dirty:1;
50	int		in_use:1;
51};
52
53#define CACHE_SIZE 8
54#define WRITE_VIA_CACHE_SIZE 4	/* Must be smaller than CACHE_SIZE */
55
56struct unix_private_data {
57	int	magic;
58	int	dev;
59	int	flags;
60	int	access_time;
61	struct unix_cache cache[CACHE_SIZE];
62};
63
64static errcode_t unix_open(const char *name, int flags, io_channel *channel);
65static errcode_t unix_close(io_channel channel);
66static errcode_t unix_set_blksize(io_channel channel, int blksize);
67static errcode_t unix_read_blk(io_channel channel, unsigned long block,
68			       int count, void *data);
69static errcode_t unix_write_blk(io_channel channel, unsigned long block,
70				int count, const void *data);
71static errcode_t unix_flush(io_channel channel);
72static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
73				int size, const void *data);
74
75static struct struct_io_manager struct_unix_manager = {
76	EXT2_ET_MAGIC_IO_MANAGER,
77	"Unix I/O Manager",
78	unix_open,
79	unix_close,
80	unix_set_blksize,
81	unix_read_blk,
82	unix_write_blk,
83	unix_flush,
84	unix_write_byte
85};
86
87io_manager unix_io_manager = &struct_unix_manager;
88
89/*
90 * Here are the raw I/O functions
91 */
92static errcode_t raw_read_blk(io_channel channel,
93			      struct unix_private_data *data,
94			      unsigned long block,
95			      int count, void *buf)
96{
97	errcode_t	retval;
98	size_t		size;
99	ext2_loff_t	location;
100	int		actual = 0;
101
102	size = (count < 0) ? -count : count * channel->block_size;
103	location = (ext2_loff_t) block * channel->block_size;
104	if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
105		retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
106		goto error_out;
107	}
108	actual = read(data->dev, buf, size);
109	if (actual != size) {
110		if (actual < 0)
111			actual = 0;
112		retval = EXT2_ET_SHORT_READ;
113		goto error_out;
114	}
115	return 0;
116
117error_out:
118	memset((char *) buf+actual, 0, size-actual);
119	if (channel->read_error)
120		retval = (channel->read_error)(channel, block, count, buf,
121					       size, actual, retval);
122	return retval;
123}
124
125static errcode_t raw_write_blk(io_channel channel,
126			       struct unix_private_data *data,
127			       unsigned long block,
128			       int count, const void *buf)
129{
130	size_t		size;
131	ext2_loff_t	location;
132	int		actual = 0;
133	errcode_t	retval;
134
135	if (count == 1)
136		size = channel->block_size;
137	else {
138		if (count < 0)
139			size = -count;
140		else
141			size = count * channel->block_size;
142	}
143
144	location = (ext2_loff_t) block * channel->block_size;
145	if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
146		retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
147		goto error_out;
148	}
149
150	actual = write(data->dev, buf, size);
151	if (actual != size) {
152		retval = EXT2_ET_SHORT_WRITE;
153		goto error_out;
154	}
155	return 0;
156
157error_out:
158	if (channel->write_error)
159		retval = (channel->write_error)(channel, block, count, buf,
160						size, actual, retval);
161	return retval;
162}
163
164
165/*
166 * Here we implement the cache functions
167 */
168
169/* Allocate the cache buffers */
170static errcode_t alloc_cache(io_channel channel,
171			     struct unix_private_data *data)
172{
173	errcode_t		retval;
174	struct unix_cache	*cache;
175	int			i;
176
177	data->access_time = 0;
178	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
179		cache->block = 0;
180		cache->access_time = 0;
181		cache->dirty = 0;
182		cache->in_use = 0;
183		if ((retval = ext2fs_get_mem(channel->block_size,
184					     (void **) &cache->buf)))
185			return retval;
186	}
187	return 0;
188}
189
190/* Free the cache buffers */
191static void free_cache(io_channel channel,
192		       struct unix_private_data *data)
193{
194	struct unix_cache	*cache;
195	int			i;
196
197	data->access_time = 0;
198	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
199		cache->block = 0;
200		cache->access_time = 0;
201		cache->dirty = 0;
202		cache->in_use = 0;
203		if (cache->buf)
204			ext2fs_free_mem((void **) &cache->buf);
205		cache->buf = 0;
206	}
207}
208
209/*
210 * Try to find a block in the cache.  If get_cache is non-zero, then
211 * if the block isn't in the cache, evict the oldest block in the
212 * cache and create a new cache entry for the requested block.
213 */
214static struct unix_cache *find_cached_block(io_channel channel,
215					    struct unix_private_data *data,
216					    unsigned long block,
217					    int get_cache)
218{
219	struct unix_cache	*cache, *unused_cache, *oldest_cache;
220	int			i;
221
222	unused_cache = oldest_cache = 0;
223	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
224		if (!cache->in_use) {
225			unused_cache = cache;
226			continue;
227		}
228		if (cache->block == block) {
229			cache->access_time = ++data->access_time;
230			return cache;
231		}
232		if (!oldest_cache ||
233		    (cache->access_time < oldest_cache->access_time))
234			oldest_cache = cache;
235	}
236	if (!get_cache)
237		return 0;
238
239	/*
240	 * Try to allocate cache slot.
241	 */
242	if (unused_cache)
243		cache = unused_cache;
244	else {
245		cache = oldest_cache;
246		if (cache->dirty)
247			raw_write_blk(channel, data,
248				      cache->block, 1, cache->buf);
249	}
250	cache->in_use = 1;
251	cache->block = block;
252	cache->access_time = ++data->access_time;
253	return cache;
254}
255
256/*
257 * Flush all of the blocks in the cache
258 */
259static errcode_t flush_cached_blocks(io_channel channel,
260				     struct unix_private_data *data,
261				     int invalidate)
262
263{
264	struct unix_cache	*cache;
265	errcode_t		retval, retval2;
266	int			i;
267
268	retval2 = 0;
269	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
270		if (!cache->in_use)
271			continue;
272
273		if (invalidate)
274			cache->in_use = 0;
275
276		if (!cache->dirty)
277			continue;
278
279		retval = raw_write_blk(channel, data,
280				       cache->block, 1, cache->buf);
281		if (retval)
282			retval2 = retval;
283		else
284			cache->dirty = 0;
285	}
286	return retval2;
287}
288
289
290
291static errcode_t unix_open(const char *name, int flags, io_channel *channel)
292{
293	io_channel	io = NULL;
294	struct unix_private_data *data = NULL;
295	errcode_t	retval;
296	int		open_flags;
297	struct stat	st;
298
299	if (name == 0)
300		return EXT2_ET_BAD_DEVICE_NAME;
301	retval = ext2fs_get_mem(sizeof(struct struct_io_channel),
302				(void **) &io);
303	if (retval)
304		return retval;
305	memset(io, 0, sizeof(struct struct_io_channel));
306	io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
307	retval = ext2fs_get_mem(sizeof(struct unix_private_data),
308				(void **) &data);
309	if (retval)
310		goto cleanup;
311
312	io->manager = unix_io_manager;
313	retval = ext2fs_get_mem(strlen(name)+1, (void **) &io->name);
314	if (retval)
315		goto cleanup;
316
317	strcpy(io->name, name);
318	io->private_data = data;
319	io->block_size = 1024;
320	io->read_error = 0;
321	io->write_error = 0;
322	io->refcount = 1;
323
324	memset(data, 0, sizeof(struct unix_private_data));
325	data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
326
327	if ((retval = alloc_cache(io, data)))
328		goto cleanup;
329
330	open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
331#ifdef HAVE_OPEN64
332	data->dev = open64(name, open_flags);
333#else
334	data->dev = open(name, open_flags);
335#endif
336	if (data->dev < 0) {
337		retval = errno;
338		goto cleanup;
339	}
340	/*
341	 * Work around a bug in 2.4.10+ kernels where writes to block
342	 * devices are wrongly getting hit by the filesize limit.
343	 */
344	if ((flags & IO_FLAG_RW) &&
345	    (fstat(data->dev, &st) == 0) &&
346	    (S_ISBLK(st.st_mode))) {
347		struct rlimit	rlim;
348
349		rlim.rlim_cur = rlim.rlim_max = ((unsigned long)(~0UL));
350		setrlimit(RLIMIT_FSIZE, &rlim);
351		getrlimit(RLIMIT_FSIZE, &rlim);
352		if (((unsigned long) rlim.rlim_cur) <
353		    ((unsigned long) rlim.rlim_max)) {
354			rlim.rlim_cur = rlim.rlim_max;
355			setrlimit(RLIMIT_FSIZE, &rlim);
356		}
357	}
358	*channel = io;
359	return 0;
360
361cleanup:
362	if (data) {
363		free_cache(io, data);
364		ext2fs_free_mem((void **) &data);
365	}
366	if (io)
367		ext2fs_free_mem((void **) &io);
368	return retval;
369}
370
371static errcode_t unix_close(io_channel channel)
372{
373	struct unix_private_data *data;
374	errcode_t	retval = 0;
375
376	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
377	data = (struct unix_private_data *) channel->private_data;
378	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
379
380	if (--channel->refcount > 0)
381		return 0;
382
383	retval = flush_cached_blocks(channel, data, 0);
384
385	if (close(data->dev) < 0)
386		retval = errno;
387	free_cache(channel, data);
388
389	ext2fs_free_mem((void **) &channel->private_data);
390	if (channel->name)
391		ext2fs_free_mem((void **) &channel->name);
392	ext2fs_free_mem((void **) &channel);
393	return retval;
394}
395
396static errcode_t unix_set_blksize(io_channel channel, int blksize)
397{
398	struct unix_private_data *data;
399	errcode_t		retval;
400
401	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
402	data = (struct unix_private_data *) channel->private_data;
403	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
404
405	if (channel->block_size != blksize) {
406		if ((retval = flush_cached_blocks(channel, data, 0)))
407			return retval;
408
409		channel->block_size = blksize;
410		free_cache(channel, data);
411		if ((retval = alloc_cache(channel, data)))
412			return retval;
413	}
414	return 0;
415}
416
417
418static errcode_t unix_read_blk(io_channel channel, unsigned long block,
419			       int count, void *buf)
420{
421	struct unix_private_data *data;
422	struct unix_cache *cache;
423	errcode_t	retval;
424	char		*cp;
425	int		i, j;
426
427	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
428	data = (struct unix_private_data *) channel->private_data;
429	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
430
431	/*
432	 * If we're doing an odd-sized read, flush out the cache and
433	 * then do a direct read.
434	 */
435	if (count < 0) {
436		if ((retval = flush_cached_blocks(channel, data, 0)))
437			return retval;
438		return raw_read_blk(channel, data, block, count, buf);
439	}
440
441	cp = buf;
442	while (count > 0) {
443		/* If it's in the cache, use it! */
444		if ((cache = find_cached_block(channel, data, block, 0))) {
445#ifdef DEBUG
446			printf("Using cached block %d\n", block);
447#endif
448			memcpy(cp, cache->buf, channel->block_size);
449			count--;
450			block++;
451			cp += channel->block_size;
452			continue;
453		}
454		/*
455		 * Find the number of uncached blocks so we can do a
456		 * single read request
457		 */
458		for (i=1; i < count; i++)
459			if (find_cached_block(channel, data, block+i, 0))
460				break;
461#ifdef DEBUG
462		printf("Reading %d blocks starting at %d\n", i, block);
463#endif
464		if ((retval = raw_read_blk(channel, data, block, i, cp)))
465			return retval;
466
467		/* Save the results in the cache */
468		for (j=0; j < i; j++) {
469			count--;
470			cache = find_cached_block(channel, data, block++, 1);
471			if (cache)
472				memcpy(cache->buf, cp, channel->block_size);
473			cp += channel->block_size;
474		}
475	}
476	return 0;
477}
478
479static errcode_t unix_write_blk(io_channel channel, unsigned long block,
480				int count, const void *buf)
481{
482	struct unix_private_data *data;
483	struct unix_cache *cache;
484	errcode_t	retval = 0, retval2;
485	const char	*cp;
486	int		writethrough;
487
488	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
489	data = (struct unix_private_data *) channel->private_data;
490	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
491
492	/*
493	 * If we're doing an odd-sized write or a very large write,
494	 * flush out the cache completely and then do a direct write.
495	 */
496	if (count < 0 || count > WRITE_VIA_CACHE_SIZE) {
497		if ((retval = flush_cached_blocks(channel, data, 1)))
498			return retval;
499		return raw_write_blk(channel, data, block, count, buf);
500	}
501
502	/*
503	 * For a moderate-sized multi-block write, first force a write
504	 * if we're in write-through cache mode, and then fill the
505	 * cache with the blocks.
506	 */
507	writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
508	if (writethrough)
509		retval = raw_write_blk(channel, data, block, count, buf);
510
511	cp = buf;
512	while (count > 0) {
513		cache = find_cached_block(channel, data, block, 1);
514		if (!cache) {
515			/*
516			 * Oh shit, we couldn't get cache descriptor.
517			 * Force the write directly.
518			 */
519			if ((retval2 = raw_write_blk(channel, data, block,
520						1, cp)))
521				retval = retval2;
522		} else {
523			memcpy(cache->buf, cp, channel->block_size);
524			cache->dirty = !writethrough;
525		}
526		count--;
527		block++;
528		cp += channel->block_size;
529	}
530	return retval;
531}
532
533static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
534				 int size, const void *buf)
535{
536	struct unix_private_data *data;
537	errcode_t	retval = 0;
538	size_t		actual;
539
540	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
541	data = (struct unix_private_data *) channel->private_data;
542	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
543
544	/*
545	 * Flush out the cache completely
546	 */
547	if ((retval = flush_cached_blocks(channel, data, 1)))
548		return retval;
549
550	if (lseek(data->dev, offset, SEEK_SET) < 0)
551		return errno;
552
553	actual = write(data->dev, buf, size);
554	if (actual != size)
555		return EXT2_ET_SHORT_WRITE;
556
557	return 0;
558}
559
560/*
561 * Flush data buffers to disk.
562 */
563static errcode_t unix_flush(io_channel channel)
564{
565	struct unix_private_data *data;
566	errcode_t retval = 0;
567
568	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
569	data = (struct unix_private_data *) channel->private_data;
570	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
571
572	retval = flush_cached_blocks(channel, data, 0);
573	fsync(data->dev);
574	return retval;
575}
576
577