readdir.c revision 3d268c9b136f51385f9d041f3f2424501b257388
1/*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/fs.h>
11#include <linux/slab.h>
12#include <linux/namei.h>
13#include <linux/file.h>
14#include <linux/xattr.h>
15#include <linux/rbtree.h>
16#include <linux/security.h>
17#include <linux/cred.h>
18#include "overlayfs.h"
19
20struct ovl_cache_entry {
21	const char *name;
22	unsigned int len;
23	unsigned int type;
24	u64 ino;
25	bool is_whiteout;
26	struct list_head l_node;
27	struct rb_node node;
28};
29
30struct ovl_dir_cache {
31	long refcount;
32	u64 version;
33	struct list_head entries;
34};
35
36struct ovl_readdir_data {
37	struct dir_context ctx;
38	bool is_merge;
39	struct rb_root *root;
40	struct list_head *list;
41	struct list_head *middle;
42	int count;
43	int err;
44};
45
46struct ovl_dir_file {
47	bool is_real;
48	bool is_upper;
49	struct ovl_dir_cache *cache;
50	struct ovl_cache_entry cursor;
51	struct file *realfile;
52	struct file *upperfile;
53};
54
55static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
56{
57	return container_of(n, struct ovl_cache_entry, node);
58}
59
60static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
61						    const char *name, int len)
62{
63	struct rb_node *node = root->rb_node;
64	int cmp;
65
66	while (node) {
67		struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
68
69		cmp = strncmp(name, p->name, len);
70		if (cmp > 0)
71			node = p->node.rb_right;
72		else if (cmp < 0 || len < p->len)
73			node = p->node.rb_left;
74		else
75			return p;
76	}
77
78	return NULL;
79}
80
81static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len,
82						   u64 ino, unsigned int d_type)
83{
84	struct ovl_cache_entry *p;
85
86	p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL);
87	if (p) {
88		char *name_copy = (char *) (p + 1);
89		memcpy(name_copy, name, len);
90		name_copy[len] = '\0';
91		p->name = name_copy;
92		p->len = len;
93		p->type = d_type;
94		p->ino = ino;
95		p->is_whiteout = false;
96	}
97
98	return p;
99}
100
101static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
102				  const char *name, int len, u64 ino,
103				  unsigned int d_type)
104{
105	struct rb_node **newp = &rdd->root->rb_node;
106	struct rb_node *parent = NULL;
107	struct ovl_cache_entry *p;
108
109	while (*newp) {
110		int cmp;
111		struct ovl_cache_entry *tmp;
112
113		parent = *newp;
114		tmp = ovl_cache_entry_from_node(*newp);
115		cmp = strncmp(name, tmp->name, len);
116		if (cmp > 0)
117			newp = &tmp->node.rb_right;
118		else if (cmp < 0 || len < tmp->len)
119			newp = &tmp->node.rb_left;
120		else
121			return 0;
122	}
123
124	p = ovl_cache_entry_new(name, len, ino, d_type);
125	if (p == NULL)
126		return -ENOMEM;
127
128	list_add_tail(&p->l_node, rdd->list);
129	rb_link_node(&p->node, parent, newp);
130	rb_insert_color(&p->node, rdd->root);
131
132	return 0;
133}
134
135static int ovl_fill_lower(struct ovl_readdir_data *rdd,
136			  const char *name, int namelen,
137			  loff_t offset, u64 ino, unsigned int d_type)
138{
139	struct ovl_cache_entry *p;
140
141	p = ovl_cache_entry_find(rdd->root, name, namelen);
142	if (p) {
143		list_move_tail(&p->l_node, rdd->middle);
144	} else {
145		p = ovl_cache_entry_new(name, namelen, ino, d_type);
146		if (p == NULL)
147			rdd->err = -ENOMEM;
148		else
149			list_add_tail(&p->l_node, rdd->middle);
150	}
151
152	return rdd->err;
153}
154
155void ovl_cache_free(struct list_head *list)
156{
157	struct ovl_cache_entry *p;
158	struct ovl_cache_entry *n;
159
160	list_for_each_entry_safe(p, n, list, l_node)
161		kfree(p);
162
163	INIT_LIST_HEAD(list);
164}
165
166static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
167{
168	struct ovl_dir_cache *cache = od->cache;
169
170	list_del(&od->cursor.l_node);
171	WARN_ON(cache->refcount <= 0);
172	cache->refcount--;
173	if (!cache->refcount) {
174		if (ovl_dir_cache(dentry) == cache)
175			ovl_set_dir_cache(dentry, NULL);
176
177		ovl_cache_free(&cache->entries);
178		kfree(cache);
179	}
180}
181
182static int ovl_fill_merge(void *buf, const char *name, int namelen,
183			  loff_t offset, u64 ino, unsigned int d_type)
184{
185	struct ovl_readdir_data *rdd = buf;
186
187	rdd->count++;
188	if (!rdd->is_merge)
189		return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
190	else
191		return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
192}
193
194static inline int ovl_dir_read(struct path *realpath,
195			       struct ovl_readdir_data *rdd)
196{
197	struct file *realfile;
198	int err;
199
200	realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
201	if (IS_ERR(realfile))
202		return PTR_ERR(realfile);
203
204	rdd->ctx.pos = 0;
205	do {
206		rdd->count = 0;
207		rdd->err = 0;
208		err = iterate_dir(realfile, &rdd->ctx);
209		if (err >= 0)
210			err = rdd->err;
211	} while (!err && rdd->count);
212	fput(realfile);
213
214	return err;
215}
216
217static void ovl_dir_reset(struct file *file)
218{
219	struct ovl_dir_file *od = file->private_data;
220	struct ovl_dir_cache *cache = od->cache;
221	struct dentry *dentry = file->f_path.dentry;
222	enum ovl_path_type type = ovl_path_type(dentry);
223
224	if (cache && ovl_dentry_version_get(dentry) != cache->version) {
225		ovl_cache_put(od, dentry);
226		od->cache = NULL;
227	}
228	WARN_ON(!od->is_real && type != OVL_PATH_MERGE);
229	if (od->is_real && type == OVL_PATH_MERGE)
230		od->is_real = false;
231}
232
233static int ovl_dir_mark_whiteouts(struct dentry *dir,
234				  struct ovl_readdir_data *rdd)
235{
236	struct ovl_cache_entry *p;
237	struct dentry *dentry;
238	const struct cred *old_cred;
239	struct cred *override_cred;
240
241	override_cred = prepare_creds();
242	if (!override_cred) {
243		ovl_cache_free(rdd->list);
244		return -ENOMEM;
245	}
246
247	/*
248	 * CAP_DAC_OVERRIDE for lookup
249	 */
250	cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
251	old_cred = override_creds(override_cred);
252
253	mutex_lock(&dir->d_inode->i_mutex);
254	list_for_each_entry(p, rdd->list, l_node) {
255		if (!p->name)
256			continue;
257
258		if (p->type != DT_CHR)
259			continue;
260
261		dentry = lookup_one_len(p->name, dir, p->len);
262		if (IS_ERR(dentry))
263			continue;
264
265		p->is_whiteout = ovl_is_whiteout(dentry);
266		dput(dentry);
267	}
268	mutex_unlock(&dir->d_inode->i_mutex);
269
270	revert_creds(old_cred);
271	put_cred(override_cred);
272
273	return 0;
274}
275
276static inline int ovl_dir_read_merged(struct path *upperpath,
277				      struct path *lowerpath,
278				      struct list_head *list)
279{
280	int err;
281	struct rb_root root = RB_ROOT;
282	struct list_head middle;
283	struct ovl_readdir_data rdd = {
284		.ctx.actor = ovl_fill_merge,
285		.list = list,
286		.root = &root,
287		.is_merge = false,
288	};
289
290	if (upperpath->dentry) {
291		err = ovl_dir_read(upperpath, &rdd);
292		if (err)
293			goto out;
294
295		if (lowerpath->dentry) {
296			err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd);
297			if (err)
298				goto out;
299		}
300	}
301	if (lowerpath->dentry) {
302		/*
303		 * Insert lowerpath entries before upperpath ones, this allows
304		 * offsets to be reasonably constant
305		 */
306		list_add(&middle, rdd.list);
307		rdd.middle = &middle;
308		rdd.is_merge = true;
309		err = ovl_dir_read(lowerpath, &rdd);
310		list_del(&middle);
311	}
312out:
313	return err;
314
315}
316
317static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
318{
319	struct ovl_cache_entry *p;
320	loff_t off = 0;
321
322	list_for_each_entry(p, &od->cache->entries, l_node) {
323		if (!p->name)
324			continue;
325		if (off >= pos)
326			break;
327		off++;
328	}
329	list_move_tail(&od->cursor.l_node, &p->l_node);
330}
331
332static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
333{
334	int res;
335	struct path lowerpath;
336	struct path upperpath;
337	struct ovl_dir_cache *cache;
338
339	cache = ovl_dir_cache(dentry);
340	if (cache && ovl_dentry_version_get(dentry) == cache->version) {
341		cache->refcount++;
342		return cache;
343	}
344	ovl_set_dir_cache(dentry, NULL);
345
346	cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
347	if (!cache)
348		return ERR_PTR(-ENOMEM);
349
350	cache->refcount = 1;
351	INIT_LIST_HEAD(&cache->entries);
352
353	ovl_path_lower(dentry, &lowerpath);
354	ovl_path_upper(dentry, &upperpath);
355
356	res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries);
357	if (res) {
358		ovl_cache_free(&cache->entries);
359		kfree(cache);
360		return ERR_PTR(res);
361	}
362
363	cache->version = ovl_dentry_version_get(dentry);
364	ovl_set_dir_cache(dentry, cache);
365
366	return cache;
367}
368
369static int ovl_iterate(struct file *file, struct dir_context *ctx)
370{
371	struct ovl_dir_file *od = file->private_data;
372	struct dentry *dentry = file->f_path.dentry;
373
374	if (!ctx->pos)
375		ovl_dir_reset(file);
376
377	if (od->is_real)
378		return iterate_dir(od->realfile, ctx);
379
380	if (!od->cache) {
381		struct ovl_dir_cache *cache;
382
383		cache = ovl_cache_get(dentry);
384		if (IS_ERR(cache))
385			return PTR_ERR(cache);
386
387		od->cache = cache;
388		ovl_seek_cursor(od, ctx->pos);
389	}
390
391	while (od->cursor.l_node.next != &od->cache->entries) {
392		struct ovl_cache_entry *p;
393
394		p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node);
395		/* Skip cursors */
396		if (p->name) {
397			if (!p->is_whiteout) {
398				if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
399					break;
400			}
401			ctx->pos++;
402		}
403		list_move(&od->cursor.l_node, &p->l_node);
404	}
405	return 0;
406}
407
408static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
409{
410	loff_t res;
411	struct ovl_dir_file *od = file->private_data;
412
413	mutex_lock(&file_inode(file)->i_mutex);
414	if (!file->f_pos)
415		ovl_dir_reset(file);
416
417	if (od->is_real) {
418		res = vfs_llseek(od->realfile, offset, origin);
419		file->f_pos = od->realfile->f_pos;
420	} else {
421		res = -EINVAL;
422
423		switch (origin) {
424		case SEEK_CUR:
425			offset += file->f_pos;
426			break;
427		case SEEK_SET:
428			break;
429		default:
430			goto out_unlock;
431		}
432		if (offset < 0)
433			goto out_unlock;
434
435		if (offset != file->f_pos) {
436			file->f_pos = offset;
437			if (od->cache)
438				ovl_seek_cursor(od, offset);
439		}
440		res = offset;
441	}
442out_unlock:
443	mutex_unlock(&file_inode(file)->i_mutex);
444
445	return res;
446}
447
448static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
449			 int datasync)
450{
451	struct ovl_dir_file *od = file->private_data;
452	struct dentry *dentry = file->f_path.dentry;
453	struct file *realfile = od->realfile;
454
455	/*
456	 * Need to check if we started out being a lower dir, but got copied up
457	 */
458	if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) {
459		struct inode *inode = file_inode(file);
460
461		realfile = od->upperfile;
462		if (!realfile) {
463			struct path upperpath;
464
465			ovl_path_upper(dentry, &upperpath);
466			realfile = ovl_path_open(&upperpath, O_RDONLY);
467			mutex_lock(&inode->i_mutex);
468			if (!od->upperfile) {
469				if (IS_ERR(realfile)) {
470					mutex_unlock(&inode->i_mutex);
471					return PTR_ERR(realfile);
472				}
473				od->upperfile = realfile;
474			} else {
475				/* somebody has beaten us to it */
476				if (!IS_ERR(realfile))
477					fput(realfile);
478				realfile = od->upperfile;
479			}
480			mutex_unlock(&inode->i_mutex);
481		}
482	}
483
484	return vfs_fsync_range(realfile, start, end, datasync);
485}
486
487static int ovl_dir_release(struct inode *inode, struct file *file)
488{
489	struct ovl_dir_file *od = file->private_data;
490
491	if (od->cache) {
492		mutex_lock(&inode->i_mutex);
493		ovl_cache_put(od, file->f_path.dentry);
494		mutex_unlock(&inode->i_mutex);
495	}
496	fput(od->realfile);
497	if (od->upperfile)
498		fput(od->upperfile);
499	kfree(od);
500
501	return 0;
502}
503
504static int ovl_dir_open(struct inode *inode, struct file *file)
505{
506	struct path realpath;
507	struct file *realfile;
508	struct ovl_dir_file *od;
509	enum ovl_path_type type;
510
511	od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
512	if (!od)
513		return -ENOMEM;
514
515	type = ovl_path_real(file->f_path.dentry, &realpath);
516	realfile = ovl_path_open(&realpath, file->f_flags);
517	if (IS_ERR(realfile)) {
518		kfree(od);
519		return PTR_ERR(realfile);
520	}
521	INIT_LIST_HEAD(&od->cursor.l_node);
522	od->realfile = realfile;
523	od->is_real = (type != OVL_PATH_MERGE);
524	od->is_upper = (type != OVL_PATH_LOWER);
525	file->private_data = od;
526
527	return 0;
528}
529
530const struct file_operations ovl_dir_operations = {
531	.read		= generic_read_dir,
532	.open		= ovl_dir_open,
533	.iterate	= ovl_iterate,
534	.llseek		= ovl_dir_llseek,
535	.fsync		= ovl_dir_fsync,
536	.release	= ovl_dir_release,
537};
538
539int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
540{
541	int err;
542	struct path lowerpath;
543	struct path upperpath;
544	struct ovl_cache_entry *p;
545
546	ovl_path_upper(dentry, &upperpath);
547	ovl_path_lower(dentry, &lowerpath);
548
549	err = ovl_dir_read_merged(&upperpath, &lowerpath, list);
550	if (err)
551		return err;
552
553	err = 0;
554
555	list_for_each_entry(p, list, l_node) {
556		if (p->is_whiteout)
557			continue;
558
559		if (p->name[0] == '.') {
560			if (p->len == 1)
561				continue;
562			if (p->len == 2 && p->name[1] == '.')
563				continue;
564		}
565		err = -ENOTEMPTY;
566		break;
567	}
568
569	return err;
570}
571
572void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
573{
574	struct ovl_cache_entry *p;
575
576	mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT);
577	list_for_each_entry(p, list, l_node) {
578		struct dentry *dentry;
579
580		if (!p->is_whiteout)
581			continue;
582
583		dentry = lookup_one_len(p->name, upper, p->len);
584		if (IS_ERR(dentry)) {
585			pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
586			       upper->d_name.name, p->len, p->name,
587			       (int) PTR_ERR(dentry));
588			continue;
589		}
590		ovl_cleanup(upper->d_inode, dentry);
591		dput(dentry);
592	}
593	mutex_unlock(&upper->d_inode->i_mutex);
594}
595