1/*
2 * fallocate.c -- Allocate large chunks of file.
3 *
4 * Copyright (C) 2014 Oracle.
5 *
6 * %Begin-Header%
7 * This file may be redistributed under the terms of the GNU Library
8 * General Public License, version 2.
9 * %End-Header%
10 */
11
12#include "config.h"
13
14#if HAVE_SYS_TYPES_H
15#include <sys/types.h>
16#endif
17
18#include "ext2_fs.h"
19#include "ext2fs.h"
20#define min(a, b) ((a) < (b) ? (a) : (b))
21
22#undef DEBUG
23
24#ifdef DEBUG
25# define dbg_printf(f, a...)  do {printf(f, ## a); fflush(stdout); } while (0)
26#else
27# define dbg_printf(f, a...)
28#endif
29
30/*
31 * Extent-based fallocate code.
32 *
33 * Find runs of unmapped logical blocks by starting at start and walking the
34 * extents until we reach the end of the range we want.
35 *
36 * For each run of unmapped blocks, try to find the extents on either side of
37 * the range.  If there's a left extent that can grow by at least a cluster and
38 * there are lblocks between start and the next lcluster after start, see if
39 * there's an implied cluster allocation; if so, zero the blocks (if the left
40 * extent is initialized) and adjust the extent.  Ditto for the blocks between
41 * the end of the last full lcluster and end, if there's a right extent.
42 *
43 * Try to attach as much as we can to the left extent, then try to attach as
44 * much as we can to the right extent.  For the remainder, try to allocate the
45 * whole range; map in whatever we get; and repeat until we're done.
46 *
47 * To attach to a left extent, figure out the maximum amount we can add to the
48 * extent and try to allocate that much, and append if successful.  To attach
49 * to a right extent, figure out the max we can add to the extent, try to
50 * allocate that much, and prepend if successful.
51 *
52 * We need an alloc_range function that tells us how much we can allocate given
53 * a maximum length and one of a suggested start, a fixed start, or a fixed end
54 * point.
55 *
56 * Every time we modify the extent tree we also need to update the block stats.
57 *
58 * At the end, update i_blocks and i_size appropriately.
59 */
60
61static void dbg_print_extent(const char *desc EXT2FS_ATTR((unused)),
62		const struct ext2fs_extent *extent EXT2FS_ATTR((unused)))
63{
64#ifdef DEBUG
65	if (desc)
66		printf("%s: ", desc);
67	printf("extent: lblk %llu--%llu, len %u, pblk %llu, flags: ",
68	       extent->e_lblk, extent->e_lblk + extent->e_len - 1,
69	       extent->e_len, extent->e_pblk);
70	if (extent->e_flags & EXT2_EXTENT_FLAGS_LEAF)
71		fputs("LEAF ", stdout);
72	if (extent->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
73		fputs("UNINIT ", stdout);
74	if (extent->e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
75		fputs("2ND_VISIT ", stdout);
76	if (!extent->e_flags)
77		fputs("(none)", stdout);
78	fputc('\n', stdout);
79	fflush(stdout);
80#endif
81}
82
83static errcode_t claim_range(ext2_filsys fs, struct ext2_inode *inode,
84			     blk64_t blk, blk64_t len)
85{
86	blk64_t	clusters;
87
88	clusters = (len + EXT2FS_CLUSTER_RATIO(fs) - 1) /
89		   EXT2FS_CLUSTER_RATIO(fs);
90	ext2fs_block_alloc_stats_range(fs, blk,
91			clusters * EXT2FS_CLUSTER_RATIO(fs), +1);
92	return ext2fs_iblk_add_blocks(fs, inode, clusters);
93}
94
95static errcode_t ext_falloc_helper(ext2_filsys fs,
96				   int flags,
97				   ext2_ino_t ino,
98				   struct ext2_inode *inode,
99				   ext2_extent_handle_t handle,
100				   struct ext2fs_extent *left_ext,
101				   struct ext2fs_extent *right_ext,
102				   blk64_t range_start, blk64_t range_len,
103				   blk64_t alloc_goal)
104{
105	struct ext2fs_extent	newex, ex;
106	int			op;
107	blk64_t			fillable, pblk, plen, x, y;
108	blk64_t			eof_blk = 0, cluster_fill = 0;
109	errcode_t		err;
110	blk_t			max_extent_len, max_uninit_len, max_init_len;
111
112#ifdef DEBUG
113	printf("%s: ", __func__);
114	if (left_ext)
115		printf("left_ext=%llu--%llu, ", left_ext->e_lblk,
116		       left_ext->e_lblk + left_ext->e_len - 1);
117	if (right_ext)
118		printf("right_ext=%llu--%llu, ", right_ext->e_lblk,
119		       right_ext->e_lblk + right_ext->e_len - 1);
120	printf("start=%llu len=%llu, goal=%llu\n", range_start, range_len,
121	       alloc_goal);
122	fflush(stdout);
123#endif
124	/* Can't create initialized extents past EOF? */
125	if (!(flags & EXT2_FALLOCATE_INIT_BEYOND_EOF))
126		eof_blk = EXT2_I_SIZE(inode) / fs->blocksize;
127
128	/* The allocation goal must be as far into a cluster as range_start. */
129	alloc_goal = (alloc_goal & ~EXT2FS_CLUSTER_MASK(fs)) |
130		     (range_start & EXT2FS_CLUSTER_MASK(fs));
131
132	max_uninit_len = EXT_UNINIT_MAX_LEN & ~EXT2FS_CLUSTER_MASK(fs);
133	max_init_len = EXT_INIT_MAX_LEN & ~EXT2FS_CLUSTER_MASK(fs);
134
135	/* We must lengthen the left extent to the end of the cluster */
136	if (left_ext && EXT2FS_CLUSTER_RATIO(fs) > 1) {
137		/* How many more blocks can be attached to left_ext? */
138		if (left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
139			fillable = max_uninit_len - left_ext->e_len;
140		else
141			fillable = max_init_len - left_ext->e_len;
142
143		if (fillable > range_len)
144			fillable = range_len;
145		if (fillable == 0)
146			goto expand_right;
147
148		/*
149		 * If range_start isn't on a cluster boundary, try an
150		 * implied cluster allocation for left_ext.
151		 */
152		cluster_fill = EXT2FS_CLUSTER_RATIO(fs) -
153			       (range_start & EXT2FS_CLUSTER_MASK(fs));
154		cluster_fill &= EXT2FS_CLUSTER_MASK(fs);
155		if (cluster_fill == 0)
156			goto expand_right;
157
158		if (cluster_fill > fillable)
159			cluster_fill = fillable;
160
161		/* Don't expand an initialized left_ext beyond EOF */
162		if (!(flags & EXT2_FALLOCATE_INIT_BEYOND_EOF)) {
163			x = left_ext->e_lblk + left_ext->e_len - 1;
164			dbg_printf("%s: lend=%llu newlend=%llu eofblk=%llu\n",
165				   __func__, x, x + cluster_fill, eof_blk);
166			if (eof_blk >= x && eof_blk <= x + cluster_fill)
167				cluster_fill = eof_blk - x;
168			if (cluster_fill == 0)
169				goto expand_right;
170		}
171
172		err = ext2fs_extent_goto(handle, left_ext->e_lblk);
173		if (err)
174			goto expand_right;
175		left_ext->e_len += cluster_fill;
176		range_start += cluster_fill;
177		range_len -= cluster_fill;
178		alloc_goal += cluster_fill;
179
180		dbg_print_extent("ext_falloc clus left+", left_ext);
181		err = ext2fs_extent_replace(handle, 0, left_ext);
182		if (err)
183			goto out;
184		err = ext2fs_extent_fix_parents(handle);
185		if (err)
186			goto out;
187
188		/* Zero blocks */
189		if (!(left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)) {
190			err = ext2fs_zero_blocks2(fs, left_ext->e_pblk +
191						  left_ext->e_len -
192						  cluster_fill, cluster_fill,
193						  NULL, NULL);
194			if (err)
195				goto out;
196		}
197	}
198
199expand_right:
200	/* We must lengthen the right extent to the beginning of the cluster */
201	if (right_ext && EXT2FS_CLUSTER_RATIO(fs) > 1) {
202		/* How much can we attach to right_ext? */
203		if (right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
204			fillable = max_uninit_len - right_ext->e_len;
205		else
206			fillable = max_init_len - right_ext->e_len;
207
208		if (fillable > range_len)
209			fillable = range_len;
210		if (fillable == 0)
211			goto try_merge;
212
213		/*
214		 * If range_end isn't on a cluster boundary, try an implied
215		 * cluster allocation for right_ext.
216		 */
217		cluster_fill = right_ext->e_lblk & EXT2FS_CLUSTER_MASK(fs);
218		if (cluster_fill == 0)
219			goto try_merge;
220
221		err = ext2fs_extent_goto(handle, right_ext->e_lblk);
222		if (err)
223			goto out;
224
225		if (cluster_fill > fillable)
226			cluster_fill = fillable;
227		right_ext->e_lblk -= cluster_fill;
228		right_ext->e_pblk -= cluster_fill;
229		right_ext->e_len += cluster_fill;
230		range_len -= cluster_fill;
231
232		dbg_print_extent("ext_falloc clus right+", right_ext);
233		err = ext2fs_extent_replace(handle, 0, right_ext);
234		if (err)
235			goto out;
236		err = ext2fs_extent_fix_parents(handle);
237		if (err)
238			goto out;
239
240		/* Zero blocks if necessary */
241		if (!(right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)) {
242			err = ext2fs_zero_blocks2(fs, right_ext->e_pblk,
243						  cluster_fill, NULL, NULL);
244			if (err)
245				goto out;
246		}
247	}
248
249try_merge:
250	/* Merge both extents together, perhaps? */
251	if (left_ext && right_ext) {
252		/* Are the two extents mergeable? */
253		if ((left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT) !=
254		    (right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT))
255			goto try_left;
256
257		/* User requires init/uninit but extent is uninit/init. */
258		if (((flags & EXT2_FALLOCATE_FORCE_INIT) &&
259		     (left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)) ||
260		    ((flags & EXT2_FALLOCATE_FORCE_UNINIT) &&
261		     !(left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)))
262			goto try_left;
263
264		/*
265		 * Skip initialized extent unless user wants to zero blocks
266		 * or requires init extent.
267		 */
268		if (!(left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
269		    (!(flags & EXT2_FALLOCATE_ZERO_BLOCKS) ||
270		     !(flags & EXT2_FALLOCATE_FORCE_INIT)))
271			goto try_left;
272
273		/* Will it even fit? */
274		x = left_ext->e_len + range_len + right_ext->e_len;
275		if (x > (left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT ?
276				max_uninit_len : max_init_len))
277			goto try_left;
278
279		err = ext2fs_extent_goto(handle, left_ext->e_lblk);
280		if (err)
281			goto try_left;
282
283		/* Allocate blocks */
284		y = left_ext->e_pblk + left_ext->e_len;
285		err = ext2fs_new_range(fs, EXT2_NEWRANGE_FIXED_GOAL |
286				       EXT2_NEWRANGE_MIN_LENGTH, y,
287				       right_ext->e_pblk - y + 1, NULL,
288				       &pblk, &plen);
289		if (err)
290			goto try_left;
291		if (pblk + plen != right_ext->e_pblk)
292			goto try_left;
293		err = claim_range(fs, inode, pblk, plen);
294		if (err)
295			goto out;
296
297		/* Modify extents */
298		left_ext->e_len = x;
299		dbg_print_extent("ext_falloc merge", left_ext);
300		err = ext2fs_extent_replace(handle, 0, left_ext);
301		if (err)
302			goto out;
303		err = ext2fs_extent_fix_parents(handle);
304		if (err)
305			goto out;
306		err = ext2fs_extent_get(handle, EXT2_EXTENT_NEXT_LEAF, &newex);
307		if (err)
308			goto out;
309		err = ext2fs_extent_delete(handle, 0);
310		if (err)
311			goto out;
312		err = ext2fs_extent_fix_parents(handle);
313		if (err)
314			goto out;
315		*right_ext = *left_ext;
316
317		/* Zero blocks */
318		if (!(left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
319		    (flags & EXT2_FALLOCATE_ZERO_BLOCKS)) {
320			err = ext2fs_zero_blocks2(fs, range_start, range_len,
321						  NULL, NULL);
322			if (err)
323				goto out;
324		}
325
326		return 0;
327	}
328
329try_left:
330	/* Extend the left extent */
331	if (left_ext) {
332		/* How many more blocks can be attached to left_ext? */
333		if (left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
334			fillable = max_uninit_len - left_ext->e_len;
335		else if (flags & EXT2_FALLOCATE_ZERO_BLOCKS)
336			fillable = max_init_len - left_ext->e_len;
337		else
338			fillable = 0;
339
340		/* User requires init/uninit but extent is uninit/init. */
341		if (((flags & EXT2_FALLOCATE_FORCE_INIT) &&
342		     (left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)) ||
343		    ((flags & EXT2_FALLOCATE_FORCE_UNINIT) &&
344		     !(left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)))
345			goto try_right;
346
347		if (fillable > range_len)
348			fillable = range_len;
349
350		/* Don't expand an initialized left_ext beyond EOF */
351		x = left_ext->e_lblk + left_ext->e_len - 1;
352		if (!(flags & EXT2_FALLOCATE_INIT_BEYOND_EOF)) {
353			dbg_printf("%s: lend=%llu newlend=%llu eofblk=%llu\n",
354				   __func__, x, x + fillable, eof_blk);
355			if (eof_blk >= x && eof_blk <= x + fillable)
356				fillable = eof_blk - x;
357		}
358
359		if (fillable == 0)
360			goto try_right;
361
362		/* Test if the right edge of the range is already mapped? */
363		if (EXT2FS_CLUSTER_RATIO(fs) > 1) {
364			err = ext2fs_map_cluster_block(fs, ino, inode,
365					x + fillable, &pblk);
366			if (err)
367				goto out;
368			if (pblk)
369				fillable -= 1 + ((x + fillable)
370						 & EXT2FS_CLUSTER_MASK(fs));
371			if (fillable == 0)
372				goto try_right;
373		}
374
375		/* Allocate range of blocks */
376		x = left_ext->e_pblk + left_ext->e_len;
377		err = ext2fs_new_range(fs, EXT2_NEWRANGE_FIXED_GOAL |
378				EXT2_NEWRANGE_MIN_LENGTH,
379				x, fillable, NULL, &pblk, &plen);
380		if (err)
381			goto try_right;
382		err = claim_range(fs, inode, pblk, plen);
383		if (err)
384			goto out;
385
386		/* Modify left_ext */
387		err = ext2fs_extent_goto(handle, left_ext->e_lblk);
388		if (err)
389			goto out;
390		range_start += plen;
391		range_len -= plen;
392		left_ext->e_len += plen;
393		dbg_print_extent("ext_falloc left+", left_ext);
394		err = ext2fs_extent_replace(handle, 0, left_ext);
395		if (err)
396			goto out;
397		err = ext2fs_extent_fix_parents(handle);
398		if (err)
399			goto out;
400
401		/* Zero blocks if necessary */
402		if (!(left_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
403		    (flags & EXT2_FALLOCATE_ZERO_BLOCKS)) {
404			err = ext2fs_zero_blocks2(fs, pblk, plen, NULL, NULL);
405			if (err)
406				goto out;
407		}
408	}
409
410try_right:
411	/* Extend the right extent */
412	if (right_ext) {
413		/* How much can we attach to right_ext? */
414		if (right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
415			fillable = max_uninit_len - right_ext->e_len;
416		else if (flags & EXT2_FALLOCATE_ZERO_BLOCKS)
417			fillable = max_init_len - right_ext->e_len;
418		else
419			fillable = 0;
420
421		/* User requires init/uninit but extent is uninit/init. */
422		if (((flags & EXT2_FALLOCATE_FORCE_INIT) &&
423		     (right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)) ||
424		    ((flags & EXT2_FALLOCATE_FORCE_UNINIT) &&
425		     !(right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT)))
426			goto try_anywhere;
427
428		if (fillable > range_len)
429			fillable = range_len;
430		if (fillable == 0)
431			goto try_anywhere;
432
433		/* Test if the left edge of the range is already mapped? */
434		if (EXT2FS_CLUSTER_RATIO(fs) > 1) {
435			err = ext2fs_map_cluster_block(fs, ino, inode,
436					right_ext->e_lblk - fillable, &pblk);
437			if (err)
438				goto out;
439			if (pblk)
440				fillable -= EXT2FS_CLUSTER_RATIO(fs) -
441						((right_ext->e_lblk - fillable)
442						 & EXT2FS_CLUSTER_MASK(fs));
443			if (fillable == 0)
444				goto try_anywhere;
445		}
446
447		/*
448		 * FIXME: It would be nice if we could handle allocating a
449		 * variable range from a fixed end point instead of just
450		 * skipping to the general allocator if the whole range is
451		 * unavailable.
452		 */
453		err = ext2fs_new_range(fs, EXT2_NEWRANGE_FIXED_GOAL |
454				EXT2_NEWRANGE_MIN_LENGTH,
455				right_ext->e_pblk - fillable,
456				fillable, NULL, &pblk, &plen);
457		if (err)
458			goto try_anywhere;
459		err = claim_range(fs, inode,
460			      pblk & ~EXT2FS_CLUSTER_MASK(fs),
461			      plen + (pblk & EXT2FS_CLUSTER_MASK(fs)));
462		if (err)
463			goto out;
464
465		/* Modify right_ext */
466		err = ext2fs_extent_goto(handle, right_ext->e_lblk);
467		if (err)
468			goto out;
469		range_len -= plen;
470		right_ext->e_lblk -= plen;
471		right_ext->e_pblk -= plen;
472		right_ext->e_len += plen;
473		dbg_print_extent("ext_falloc right+", right_ext);
474		err = ext2fs_extent_replace(handle, 0, right_ext);
475		if (err)
476			goto out;
477		err = ext2fs_extent_fix_parents(handle);
478		if (err)
479			goto out;
480
481		/* Zero blocks if necessary */
482		if (!(right_ext->e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
483		    (flags & EXT2_FALLOCATE_ZERO_BLOCKS)) {
484			err = ext2fs_zero_blocks2(fs, pblk,
485					plen + cluster_fill, NULL, NULL);
486			if (err)
487				goto out;
488		}
489	}
490
491try_anywhere:
492	/* Try implied cluster alloc on the left and right ends */
493	if (range_len > 0 && (range_start & EXT2FS_CLUSTER_MASK(fs))) {
494		cluster_fill = EXT2FS_CLUSTER_RATIO(fs) -
495			       (range_start & EXT2FS_CLUSTER_MASK(fs));
496		cluster_fill &= EXT2FS_CLUSTER_MASK(fs);
497		if (cluster_fill > range_len)
498			cluster_fill = range_len;
499		newex.e_lblk = range_start;
500		err = ext2fs_map_cluster_block(fs, ino, inode, newex.e_lblk,
501					       &pblk);
502		if (err)
503			goto out;
504		if (pblk == 0)
505			goto try_right_implied;
506		newex.e_pblk = pblk;
507		newex.e_len = cluster_fill;
508		newex.e_flags = (flags & EXT2_FALLOCATE_FORCE_INIT ? 0 :
509				 EXT2_EXTENT_FLAGS_UNINIT);
510		dbg_print_extent("ext_falloc iclus left+", &newex);
511		ext2fs_extent_goto(handle, newex.e_lblk);
512		err = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT,
513					&ex);
514		if (err == EXT2_ET_NO_CURRENT_NODE)
515			ex.e_lblk = 0;
516		else if (err)
517			goto out;
518
519		if (ex.e_lblk > newex.e_lblk)
520			op = 0; /* insert before */
521		else
522			op = EXT2_EXTENT_INSERT_AFTER;
523		dbg_printf("%s: inserting %s lblk %llu newex=%llu\n",
524			   __func__, op ? "after" : "before", ex.e_lblk,
525			   newex.e_lblk);
526		err = ext2fs_extent_insert(handle, op, &newex);
527		if (err)
528			goto out;
529		err = ext2fs_extent_fix_parents(handle);
530		if (err)
531			goto out;
532
533		if (!(newex.e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
534		    (flags & EXT2_FALLOCATE_ZERO_BLOCKS)) {
535			err = ext2fs_zero_blocks2(fs, newex.e_pblk,
536						  newex.e_len, NULL, NULL);
537			if (err)
538				goto out;
539		}
540
541		range_start += cluster_fill;
542		range_len -= cluster_fill;
543	}
544
545try_right_implied:
546	y = range_start + range_len;
547	if (range_len > 0 && (y & EXT2FS_CLUSTER_MASK(fs))) {
548		cluster_fill = y & EXT2FS_CLUSTER_MASK(fs);
549		if (cluster_fill > range_len)
550			cluster_fill = range_len;
551		newex.e_lblk = y & ~EXT2FS_CLUSTER_MASK(fs);
552		err = ext2fs_map_cluster_block(fs, ino, inode, newex.e_lblk,
553					       &pblk);
554		if (err)
555			goto out;
556		if (pblk == 0)
557			goto no_implied;
558		newex.e_pblk = pblk;
559		newex.e_len = cluster_fill;
560		newex.e_flags = (flags & EXT2_FALLOCATE_FORCE_INIT ? 0 :
561				 EXT2_EXTENT_FLAGS_UNINIT);
562		dbg_print_extent("ext_falloc iclus right+", &newex);
563		ext2fs_extent_goto(handle, newex.e_lblk);
564		err = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT,
565					&ex);
566		if (err == EXT2_ET_NO_CURRENT_NODE)
567			ex.e_lblk = 0;
568		else if (err)
569			goto out;
570
571		if (ex.e_lblk > newex.e_lblk)
572			op = 0; /* insert before */
573		else
574			op = EXT2_EXTENT_INSERT_AFTER;
575		dbg_printf("%s: inserting %s lblk %llu newex=%llu\n",
576			   __func__, op ? "after" : "before", ex.e_lblk,
577			   newex.e_lblk);
578		err = ext2fs_extent_insert(handle, op, &newex);
579		if (err)
580			goto out;
581		err = ext2fs_extent_fix_parents(handle);
582		if (err)
583			goto out;
584
585		if (!(newex.e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
586		    (flags & EXT2_FALLOCATE_ZERO_BLOCKS)) {
587			err = ext2fs_zero_blocks2(fs, newex.e_pblk,
588						  newex.e_len, NULL, NULL);
589			if (err)
590				goto out;
591		}
592
593		range_len -= cluster_fill;
594	}
595
596no_implied:
597	if (range_len == 0)
598		return 0;
599
600	newex.e_lblk = range_start;
601	if (flags & EXT2_FALLOCATE_FORCE_INIT) {
602		max_extent_len = max_init_len;
603		newex.e_flags = 0;
604	} else {
605		max_extent_len = max_uninit_len;
606		newex.e_flags = EXT2_EXTENT_FLAGS_UNINIT;
607	}
608	pblk = alloc_goal;
609	y = range_len;
610	for (x = 0; x < y;) {
611		cluster_fill = newex.e_lblk & EXT2FS_CLUSTER_MASK(fs);
612		fillable = min(range_len + cluster_fill, max_extent_len);
613		err = ext2fs_new_range(fs, 0, pblk & ~EXT2FS_CLUSTER_MASK(fs),
614				       fillable,
615				       NULL, &pblk, &plen);
616		if (err)
617			goto out;
618		err = claim_range(fs, inode, pblk, plen);
619		if (err)
620			goto out;
621
622		/* Create extent */
623		newex.e_pblk = pblk + cluster_fill;
624		newex.e_len = plen - cluster_fill;
625		dbg_print_extent("ext_falloc create", &newex);
626		ext2fs_extent_goto(handle, newex.e_lblk);
627		err = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT,
628					&ex);
629		if (err == EXT2_ET_NO_CURRENT_NODE)
630			ex.e_lblk = 0;
631		else if (err)
632			goto out;
633
634		if (ex.e_lblk > newex.e_lblk)
635			op = 0; /* insert before */
636		else
637			op = EXT2_EXTENT_INSERT_AFTER;
638		dbg_printf("%s: inserting %s lblk %llu newex=%llu\n",
639			   __func__, op ? "after" : "before", ex.e_lblk,
640			   newex.e_lblk);
641		err = ext2fs_extent_insert(handle, op, &newex);
642		if (err)
643			goto out;
644		err = ext2fs_extent_fix_parents(handle);
645		if (err)
646			goto out;
647
648		if (!(newex.e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
649		    (flags & EXT2_FALLOCATE_ZERO_BLOCKS)) {
650			err = ext2fs_zero_blocks2(fs, pblk, plen, NULL, NULL);
651			if (err)
652				goto out;
653		}
654
655		/* Update variables at end of loop */
656		x += plen - cluster_fill;
657		range_len -= plen - cluster_fill;
658		newex.e_lblk += plen - cluster_fill;
659		pblk += plen - cluster_fill;
660		if (pblk >= ext2fs_blocks_count(fs->super))
661			pblk = fs->super->s_first_data_block;
662	}
663
664out:
665	return err;
666}
667
668static errcode_t extent_fallocate(ext2_filsys fs, int flags, ext2_ino_t ino,
669				      struct ext2_inode *inode, blk64_t goal,
670				      blk64_t start, blk64_t len)
671{
672	ext2_extent_handle_t	handle;
673	struct ext2fs_extent	left_extent, right_extent;
674	struct ext2fs_extent	*left_adjacent, *right_adjacent;
675	errcode_t		err;
676	blk64_t			range_start, range_end = 0, end, next;
677	blk64_t			count, goal_distance;
678
679	end = start + len - 1;
680	err = ext2fs_extent_open2(fs, ino, inode, &handle);
681	if (err)
682		return err;
683
684	/*
685	 * Find the extent closest to the start of the alloc range.  We don't
686	 * check the return value because _goto() sets the current node to the
687	 * next-lowest extent if 'start' is in a hole; or the next-highest
688	 * extent if there aren't any lower ones; or doesn't set a current node
689	 * if there was a real error reading the extent tree.  In that case,
690	 * _get() will error out.
691	 */
692start_again:
693	ext2fs_extent_goto(handle, start);
694	err = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, &left_extent);
695	if (err == EXT2_ET_NO_CURRENT_NODE) {
696		blk64_t max_blocks = ext2fs_blocks_count(fs->super);
697
698		if (goal == ~0ULL)
699			goal = ext2fs_find_inode_goal(fs, ino, inode, start);
700		err = ext2fs_find_first_zero_block_bitmap2(fs->block_map,
701						goal, max_blocks - 1, &goal);
702		goal += start;
703		err = ext_falloc_helper(fs, flags, ino, inode, handle, NULL,
704					NULL, start, len, goal);
705		goto errout;
706	} else if (err)
707		goto errout;
708
709	dbg_print_extent("ext_falloc initial", &left_extent);
710	next = left_extent.e_lblk + left_extent.e_len;
711	if (left_extent.e_lblk > start) {
712		/* The nearest extent we found was beyond start??? */
713		goal = left_extent.e_pblk - (left_extent.e_lblk - start);
714		err = ext_falloc_helper(fs, flags, ino, inode, handle, NULL,
715					&left_extent, start,
716					left_extent.e_lblk - start, goal);
717		if (err)
718			goto errout;
719
720		goto start_again;
721	} else if (next >= start) {
722		range_start = next;
723		left_adjacent = &left_extent;
724	} else {
725		range_start = start;
726		left_adjacent = NULL;
727	}
728	goal = left_extent.e_pblk + (range_start - left_extent.e_lblk);
729	goal_distance = range_start - next;
730
731	do {
732		err = ext2fs_extent_get(handle, EXT2_EXTENT_NEXT_LEAF,
733					   &right_extent);
734		dbg_printf("%s: ino=%d get next =%d\n", __func__, ino,
735			   (int)err);
736		dbg_print_extent("ext_falloc next", &right_extent);
737		/* Stop if we've seen this extent before */
738		if (!err && right_extent.e_lblk <= left_extent.e_lblk)
739			err = EXT2_ET_EXTENT_NO_NEXT;
740
741		if (err && err != EXT2_ET_EXTENT_NO_NEXT)
742			goto errout;
743		if (err == EXT2_ET_EXTENT_NO_NEXT ||
744		    right_extent.e_lblk > end + 1) {
745			range_end = end;
746			right_adjacent = NULL;
747		} else {
748			/* Handle right_extent.e_lblk <= end */
749			range_end = right_extent.e_lblk - 1;
750			right_adjacent = &right_extent;
751		}
752		if (err != EXT2_ET_EXTENT_NO_NEXT &&
753		    goal_distance > (range_end - right_extent.e_lblk)) {
754			goal = right_extent.e_pblk -
755					(right_extent.e_lblk - range_start);
756			goal_distance = range_end - right_extent.e_lblk;
757		}
758
759		dbg_printf("%s: ino=%d rstart=%llu rend=%llu\n", __func__, ino,
760			   range_start, range_end);
761		err = 0;
762		if (range_start <= range_end) {
763			count = range_end - range_start + 1;
764			err = ext_falloc_helper(fs, flags, ino, inode, handle,
765						left_adjacent, right_adjacent,
766						range_start, count, goal);
767			if (err)
768				goto errout;
769		}
770
771		if (range_end == end)
772			break;
773
774		err = ext2fs_extent_goto(handle, right_extent.e_lblk);
775		if (err)
776			goto errout;
777		next = right_extent.e_lblk + right_extent.e_len;
778		left_extent = right_extent;
779		left_adjacent = &left_extent;
780		range_start = next;
781		goal = left_extent.e_pblk + (range_start - left_extent.e_lblk);
782		goal_distance = range_start - next;
783	} while (range_end < end);
784
785errout:
786	ext2fs_extent_free(handle);
787	return err;
788}
789
790/*
791 * Map physical blocks to a range of logical blocks within a file.  The range
792 * of logical blocks are (start, start + len).  If there are already extents,
793 * the mappings will try to extend the mappings; otherwise, it will try to map
794 * start as if logical block 0 points to goal.  If goal is ~0ULL, then the goal
795 * is calculated based on the inode group.
796 *
797 * Flags:
798 * - EXT2_FALLOCATE_ZERO_BLOCKS: Zero the blocks that are allocated.
799 * - EXT2_FALLOCATE_FORCE_INIT: Create only initialized extents.
800 * - EXT2_FALLOCATE_FORCE_UNINIT: Create only uninitialized extents.
801 * - EXT2_FALLOCATE_INIT_BEYOND_EOF: Create extents beyond EOF.
802 *
803 * If neither FORCE_INIT nor FORCE_UNINIT are specified, this function will
804 * try to expand any extents it finds, zeroing blocks as necessary.
805 */
806errcode_t ext2fs_fallocate(ext2_filsys fs, int flags, ext2_ino_t ino,
807			   struct ext2_inode *inode, blk64_t goal,
808			   blk64_t start, blk64_t len)
809{
810	struct ext2_inode	inode_buf;
811	blk64_t			blk, x;
812	errcode_t		err;
813
814	if (((flags & EXT2_FALLOCATE_FORCE_INIT) &&
815	    (flags & EXT2_FALLOCATE_FORCE_UNINIT)) ||
816	   (flags & ~EXT2_FALLOCATE_ALL_FLAGS))
817		return EXT2_ET_INVALID_ARGUMENT;
818
819	if (len > ext2fs_blocks_count(fs->super))
820		return EXT2_ET_BLOCK_ALLOC_FAIL;
821	else if (len == 0)
822		return 0;
823
824	/* Read inode structure if necessary */
825	if (!inode) {
826		err = ext2fs_read_inode(fs, ino, &inode_buf);
827		if (err)
828			return err;
829		inode = &inode_buf;
830	}
831	dbg_printf("%s: ino=%d start=%llu len=%llu goal=%llu\n", __func__, ino,
832		   start, len, goal);
833
834	if (inode->i_flags & EXT4_EXTENTS_FL) {
835		err = extent_fallocate(fs, flags, ino, inode, goal, start, len);
836		goto out;
837	}
838
839	/* XXX: Allocate a bunch of blocks the slow way */
840	for (blk = start; blk < start + len; blk++) {
841		err = ext2fs_bmap2(fs, ino, inode, NULL, 0, blk, 0, &x);
842		if (err)
843			return err;
844		if (x)
845			continue;
846
847		err = ext2fs_bmap2(fs, ino, inode, NULL,
848				   BMAP_ALLOC | BMAP_UNINIT | BMAP_ZERO, blk,
849				   0, &x);
850		if (err)
851			return err;
852	}
853
854out:
855	if (inode == &inode_buf)
856		ext2fs_write_inode(fs, ino, inode);
857	return err;
858}
859