xattr.c revision 010f5a21a323e7383e067009a7785462883fe5ea
1/*
2 * linux/fs/reiserfs/xattr.c
3 *
4 * Copyright (c) 2002 by Jeff Mahoney, <jeffm@suse.com>
5 *
6 */
7
8/*
9 * In order to implement EA/ACLs in a clean, backwards compatible manner,
10 * they are implemented as files in a "private" directory.
11 * Each EA is in it's own file, with the directory layout like so (/ is assumed
12 * to be relative to fs root). Inside the /.reiserfs_priv/xattrs directory,
13 * directories named using the capital-hex form of the objectid and
14 * generation number are used. Inside each directory are individual files
15 * named with the name of the extended attribute.
16 *
17 * So, for objectid 12648430, we could have:
18 * /.reiserfs_priv/xattrs/C0FFEE.0/system.posix_acl_access
19 * /.reiserfs_priv/xattrs/C0FFEE.0/system.posix_acl_default
20 * /.reiserfs_priv/xattrs/C0FFEE.0/user.Content-Type
21 * .. or similar.
22 *
23 * The file contents are the text of the EA. The size is known based on the
24 * stat data describing the file.
25 *
26 * In the case of system.posix_acl_access and system.posix_acl_default, since
27 * these are special cases for filesystem ACLs, they are interpreted by the
28 * kernel, in addition, they are negatively and positively cached and attached
29 * to the inode so that unnecessary lookups are avoided.
30 */
31
32#include <linux/reiserfs_fs.h>
33#include <linux/capability.h>
34#include <linux/dcache.h>
35#include <linux/namei.h>
36#include <linux/errno.h>
37#include <linux/fs.h>
38#include <linux/file.h>
39#include <linux/pagemap.h>
40#include <linux/xattr.h>
41#include <linux/reiserfs_xattr.h>
42#include <linux/reiserfs_acl.h>
43#include <asm/uaccess.h>
44#include <net/checksum.h>
45#include <linux/smp_lock.h>
46#include <linux/stat.h>
47
48#define FL_READONLY 128
49#define FL_DIR_SEM_HELD 256
50#define PRIVROOT_NAME ".reiserfs_priv"
51#define XAROOT_NAME   "xattrs"
52
53static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char
54								*prefix);
55
56/* Returns the dentry referring to the root of the extended attribute
57 * directory tree. If it has already been retrieved, it is used. If it
58 * hasn't been created and the flags indicate creation is allowed, we
59 * attempt to create it. On error, we return a pointer-encoded error.
60 */
61static struct dentry *get_xa_root(struct super_block *sb, int flags)
62{
63	struct dentry *privroot = dget(REISERFS_SB(sb)->priv_root);
64	struct dentry *xaroot;
65
66	/* This needs to be created at mount-time */
67	if (!privroot)
68		return ERR_PTR(-ENODATA);
69
70	mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR);
71	if (REISERFS_SB(sb)->xattr_root) {
72		xaroot = dget(REISERFS_SB(sb)->xattr_root);
73		goto out;
74	}
75
76	xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME));
77	if (IS_ERR(xaroot)) {
78		goto out;
79	} else if (!xaroot->d_inode) {
80		int err = -ENODATA;
81		if (flags == 0 || flags & XATTR_CREATE)
82			err = privroot->d_inode->i_op->mkdir(privroot->d_inode,
83			                                     xaroot, 0700);
84		if (err) {
85			dput(xaroot);
86			xaroot = ERR_PTR(err);
87			goto out;
88		}
89	}
90	REISERFS_SB(sb)->xattr_root = dget(xaroot);
91
92      out:
93	mutex_unlock(&privroot->d_inode->i_mutex);
94	dput(privroot);
95	return xaroot;
96}
97
98/* Opens the directory corresponding to the inode's extended attribute store.
99 * If flags allow, the tree to the directory may be created. If creation is
100 * prohibited, -ENODATA is returned. */
101static struct dentry *open_xa_dir(const struct inode *inode, int flags)
102{
103	struct dentry *xaroot, *xadir;
104	char namebuf[17];
105
106	xaroot = get_xa_root(inode->i_sb, flags);
107	if (IS_ERR(xaroot))
108		return xaroot;
109
110	/* ok, we have xaroot open */
111	snprintf(namebuf, sizeof(namebuf), "%X.%X",
112		 le32_to_cpu(INODE_PKEY(inode)->k_objectid),
113		 inode->i_generation);
114	xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
115	if (IS_ERR(xadir)) {
116		dput(xaroot);
117		return xadir;
118	}
119
120	if (!xadir->d_inode) {
121		int err;
122		if (flags == 0 || flags & XATTR_CREATE) {
123			/* Although there is nothing else trying to create this directory,
124			 * another directory with the same hash may be created, so we need
125			 * to protect against that */
126			err =
127			    xaroot->d_inode->i_op->mkdir(xaroot->d_inode, xadir,
128							 0700);
129			if (err) {
130				dput(xaroot);
131				dput(xadir);
132				return ERR_PTR(err);
133			}
134		}
135		if (!xadir->d_inode) {
136			dput(xaroot);
137			dput(xadir);
138			return ERR_PTR(-ENODATA);
139		}
140	}
141
142	dput(xaroot);
143	return xadir;
144}
145
146/* Returns a dentry corresponding to a specific extended attribute file
147 * for the inode. If flags allow, the file is created. Otherwise, a
148 * valid or negative dentry, or an error is returned. */
149static struct dentry *get_xa_file_dentry(const struct inode *inode,
150					 const char *name, int flags)
151{
152	struct dentry *xadir, *xafile;
153	int err = 0;
154
155	xadir = open_xa_dir(inode, flags);
156	if (IS_ERR(xadir)) {
157		return ERR_CAST(xadir);
158	} else if (!xadir->d_inode) {
159		dput(xadir);
160		return ERR_PTR(-ENODATA);
161	}
162
163	xafile = lookup_one_len(name, xadir, strlen(name));
164	if (IS_ERR(xafile)) {
165		dput(xadir);
166		return ERR_CAST(xafile);
167	}
168
169	if (xafile->d_inode) {	/* file exists */
170		if (flags & XATTR_CREATE) {
171			err = -EEXIST;
172			dput(xafile);
173			goto out;
174		}
175	} else if (flags & XATTR_REPLACE || flags & FL_READONLY) {
176		goto out;
177	} else {
178		/* inode->i_mutex is down, so nothing else can try to create
179		 * the same xattr */
180		err = xadir->d_inode->i_op->create(xadir->d_inode, xafile,
181						   0700 | S_IFREG, NULL);
182
183		if (err) {
184			dput(xafile);
185			goto out;
186		}
187	}
188
189      out:
190	dput(xadir);
191	if (err)
192		xafile = ERR_PTR(err);
193	else if (!xafile->d_inode) {
194		dput(xafile);
195		xafile = ERR_PTR(-ENODATA);
196	}
197	return xafile;
198}
199
200/*
201 * this is very similar to fs/reiserfs/dir.c:reiserfs_readdir, but
202 * we need to drop the path before calling the filldir struct.  That
203 * would be a big performance hit to the non-xattr case, so I've copied
204 * the whole thing for now. --clm
205 *
206 * the big difference is that I go backwards through the directory,
207 * and don't mess with f->f_pos, but the idea is the same.  Do some
208 * action on each and every entry in the directory.
209 *
210 * we're called with i_mutex held, so there are no worries about the directory
211 * changing underneath us.
212 */
213static int __xattr_readdir(struct inode *inode, void *dirent, filldir_t filldir)
214{
215	struct cpu_key pos_key;	/* key of current position in the directory (key of directory entry) */
216	INITIALIZE_PATH(path_to_entry);
217	struct buffer_head *bh;
218	int entry_num;
219	struct item_head *ih, tmp_ih;
220	int search_res;
221	char *local_buf;
222	loff_t next_pos;
223	char small_buf[32];	/* avoid kmalloc if we can */
224	struct reiserfs_de_head *deh;
225	int d_reclen;
226	char *d_name;
227	off_t d_off;
228	ino_t d_ino;
229	struct reiserfs_dir_entry de;
230
231	/* form key for search the next directory entry using f_pos field of
232	   file structure */
233	next_pos = max_reiserfs_offset(inode);
234
235	while (1) {
236	      research:
237		if (next_pos <= DOT_DOT_OFFSET)
238			break;
239		make_cpu_key(&pos_key, inode, next_pos, TYPE_DIRENTRY, 3);
240
241		search_res =
242		    search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry,
243					&de);
244		if (search_res == IO_ERROR) {
245			// FIXME: we could just skip part of directory which could
246			// not be read
247			pathrelse(&path_to_entry);
248			return -EIO;
249		}
250
251		if (search_res == NAME_NOT_FOUND)
252			de.de_entry_num--;
253
254		set_de_name_and_namelen(&de);
255		entry_num = de.de_entry_num;
256		deh = &(de.de_deh[entry_num]);
257
258		bh = de.de_bh;
259		ih = de.de_ih;
260
261		if (!is_direntry_le_ih(ih)) {
262			reiserfs_error(inode->i_sb, "jdm-20000",
263				       "not direntry %h", ih);
264			break;
265		}
266		copy_item_head(&tmp_ih, ih);
267
268		/* we must have found item, that is item of this directory, */
269		RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key),
270		       "vs-9000: found item %h does not match to dir we readdir %K",
271		       ih, &pos_key);
272
273		if (deh_offset(deh) <= DOT_DOT_OFFSET) {
274			break;
275		}
276
277		/* look for the previous entry in the directory */
278		next_pos = deh_offset(deh) - 1;
279
280		if (!de_visible(deh))
281			/* it is hidden entry */
282			continue;
283
284		d_reclen = entry_length(bh, ih, entry_num);
285		d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh);
286		d_off = deh_offset(deh);
287		d_ino = deh_objectid(deh);
288
289		if (!d_name[d_reclen - 1])
290			d_reclen = strlen(d_name);
291
292		if (d_reclen > REISERFS_MAX_NAME(inode->i_sb->s_blocksize)) {
293			/* too big to send back to VFS */
294			continue;
295		}
296
297		/* Ignore the .reiserfs_priv entry */
298		if (reiserfs_xattrs(inode->i_sb) &&
299		    !old_format_only(inode->i_sb) &&
300		    deh_objectid(deh) ==
301		    le32_to_cpu(INODE_PKEY
302				(REISERFS_SB(inode->i_sb)->priv_root->d_inode)->
303				k_objectid))
304			continue;
305
306		if (d_reclen <= 32) {
307			local_buf = small_buf;
308		} else {
309			local_buf = kmalloc(d_reclen, GFP_NOFS);
310			if (!local_buf) {
311				pathrelse(&path_to_entry);
312				return -ENOMEM;
313			}
314			if (item_moved(&tmp_ih, &path_to_entry)) {
315				kfree(local_buf);
316
317				/* sigh, must retry.  Do this same offset again */
318				next_pos = d_off;
319				goto research;
320			}
321		}
322
323		// Note, that we copy name to user space via temporary
324		// buffer (local_buf) because filldir will block if
325		// user space buffer is swapped out. At that time
326		// entry can move to somewhere else
327		memcpy(local_buf, d_name, d_reclen);
328
329		/* the filldir function might need to start transactions,
330		 * or do who knows what.  Release the path now that we've
331		 * copied all the important stuff out of the deh
332		 */
333		pathrelse(&path_to_entry);
334
335		if (filldir(dirent, local_buf, d_reclen, d_off, d_ino,
336			    DT_UNKNOWN) < 0) {
337			if (local_buf != small_buf) {
338				kfree(local_buf);
339			}
340			goto end;
341		}
342		if (local_buf != small_buf) {
343			kfree(local_buf);
344		}
345	}			/* while */
346
347      end:
348	pathrelse(&path_to_entry);
349	return 0;
350}
351
352/*
353 * this could be done with dedicated readdir ops for the xattr files,
354 * but I want to get something working asap
355 * this is stolen from vfs_readdir
356 *
357 */
358static
359int xattr_readdir(struct inode *inode, filldir_t filler, void *buf)
360{
361	int res = -ENOENT;
362	mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR);
363	if (!IS_DEADDIR(inode)) {
364		lock_kernel();
365		res = __xattr_readdir(inode, buf, filler);
366		unlock_kernel();
367	}
368	mutex_unlock(&inode->i_mutex);
369	return res;
370}
371
372/* Internal operations on file data */
373static inline void reiserfs_put_page(struct page *page)
374{
375	kunmap(page);
376	page_cache_release(page);
377}
378
379static struct page *reiserfs_get_page(struct inode *dir, size_t n)
380{
381	struct address_space *mapping = dir->i_mapping;
382	struct page *page;
383	/* We can deadlock if we try to free dentries,
384	   and an unlink/rmdir has just occured - GFP_NOFS avoids this */
385	mapping_set_gfp_mask(mapping, GFP_NOFS);
386	page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
387	if (!IS_ERR(page)) {
388		kmap(page);
389		if (PageError(page))
390			goto fail;
391	}
392	return page;
393
394      fail:
395	reiserfs_put_page(page);
396	return ERR_PTR(-EIO);
397}
398
399static inline __u32 xattr_hash(const char *msg, int len)
400{
401	return csum_partial(msg, len, 0);
402}
403
404int reiserfs_commit_write(struct file *f, struct page *page,
405			  unsigned from, unsigned to);
406int reiserfs_prepare_write(struct file *f, struct page *page,
407			   unsigned from, unsigned to);
408
409
410/* Generic extended attribute operations that can be used by xa plugins */
411
412/*
413 * inode->i_mutex: down
414 */
415int
416reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
417		   size_t buffer_size, int flags)
418{
419	int err = 0;
420	struct dentry *dentry;
421	struct page *page;
422	char *data;
423	size_t file_pos = 0;
424	size_t buffer_pos = 0;
425	struct iattr newattrs;
426	__u32 xahash = 0;
427
428	if (get_inode_sd_version(inode) == STAT_DATA_V1)
429		return -EOPNOTSUPP;
430
431	/* Empty xattrs are ok, they're just empty files, no hash */
432	if (buffer && buffer_size)
433		xahash = xattr_hash(buffer, buffer_size);
434
435	dentry = get_xa_file_dentry(inode, name, flags);
436	if (IS_ERR(dentry)) {
437		err = PTR_ERR(dentry);
438		goto out;
439	}
440
441	REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
442
443	/* Resize it so we're ok to write there */
444	newattrs.ia_size = buffer_size;
445	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
446	mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
447	err = notify_change(dentry, &newattrs);
448	if (err)
449		goto out_filp;
450
451	while (buffer_pos < buffer_size || buffer_pos == 0) {
452		size_t chunk;
453		size_t skip = 0;
454		size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
455		if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
456			chunk = PAGE_CACHE_SIZE;
457		else
458			chunk = buffer_size - buffer_pos;
459
460		page = reiserfs_get_page(dentry->d_inode, file_pos);
461		if (IS_ERR(page)) {
462			err = PTR_ERR(page);
463			goto out_filp;
464		}
465
466		lock_page(page);
467		data = page_address(page);
468
469		if (file_pos == 0) {
470			struct reiserfs_xattr_header *rxh;
471			skip = file_pos = sizeof(struct reiserfs_xattr_header);
472			if (chunk + skip > PAGE_CACHE_SIZE)
473				chunk = PAGE_CACHE_SIZE - skip;
474			rxh = (struct reiserfs_xattr_header *)data;
475			rxh->h_magic = cpu_to_le32(REISERFS_XATTR_MAGIC);
476			rxh->h_hash = cpu_to_le32(xahash);
477		}
478
479		err = reiserfs_prepare_write(NULL, page, page_offset,
480					    page_offset + chunk + skip);
481		if (!err) {
482			if (buffer)
483				memcpy(data + skip, buffer + buffer_pos, chunk);
484			err = reiserfs_commit_write(NULL, page, page_offset,
485						    page_offset + chunk +
486						    skip);
487		}
488		unlock_page(page);
489		reiserfs_put_page(page);
490		buffer_pos += chunk;
491		file_pos += chunk;
492		skip = 0;
493		if (err || buffer_size == 0 || !buffer)
494			break;
495	}
496
497	/* We can't mark the inode dirty if it's not hashed. This is the case
498	 * when we're inheriting the default ACL. If we dirty it, the inode
499	 * gets marked dirty, but won't (ever) make it onto the dirty list until
500	 * it's synced explicitly to clear I_DIRTY. This is bad. */
501	if (!hlist_unhashed(&inode->i_hash)) {
502		inode->i_ctime = CURRENT_TIME_SEC;
503		mark_inode_dirty(inode);
504	}
505
506      out_filp:
507	mutex_unlock(&dentry->d_inode->i_mutex);
508	dput(dentry);
509
510      out:
511	return err;
512}
513
514/*
515 * inode->i_mutex: down
516 */
517int
518reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
519		   size_t buffer_size)
520{
521	ssize_t err = 0;
522	struct dentry *dentry;
523	size_t isize;
524	size_t file_pos = 0;
525	size_t buffer_pos = 0;
526	struct page *page;
527	__u32 hash = 0;
528
529	if (name == NULL)
530		return -EINVAL;
531
532	/* We can't have xattrs attached to v1 items since they don't have
533	 * generation numbers */
534	if (get_inode_sd_version(inode) == STAT_DATA_V1)
535		return -EOPNOTSUPP;
536
537	dentry = get_xa_file_dentry(inode, name, FL_READONLY);
538	if (IS_ERR(dentry)) {
539		err = PTR_ERR(dentry);
540		goto out;
541	}
542
543	isize = i_size_read(dentry->d_inode);
544	REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
545
546	/* Just return the size needed */
547	if (buffer == NULL) {
548		err = isize - sizeof(struct reiserfs_xattr_header);
549		goto out_dput;
550	}
551
552	if (buffer_size < isize - sizeof(struct reiserfs_xattr_header)) {
553		err = -ERANGE;
554		goto out_dput;
555	}
556
557	while (file_pos < isize) {
558		size_t chunk;
559		char *data;
560		size_t skip = 0;
561		if (isize - file_pos > PAGE_CACHE_SIZE)
562			chunk = PAGE_CACHE_SIZE;
563		else
564			chunk = isize - file_pos;
565
566		page = reiserfs_get_page(dentry->d_inode, file_pos);
567		if (IS_ERR(page)) {
568			err = PTR_ERR(page);
569			goto out_dput;
570		}
571
572		lock_page(page);
573		data = page_address(page);
574		if (file_pos == 0) {
575			struct reiserfs_xattr_header *rxh =
576			    (struct reiserfs_xattr_header *)data;
577			skip = file_pos = sizeof(struct reiserfs_xattr_header);
578			chunk -= skip;
579			/* Magic doesn't match up.. */
580			if (rxh->h_magic != cpu_to_le32(REISERFS_XATTR_MAGIC)) {
581				unlock_page(page);
582				reiserfs_put_page(page);
583				reiserfs_warning(inode->i_sb, "jdm-20001",
584						 "Invalid magic for xattr (%s) "
585						 "associated with %k", name,
586						 INODE_PKEY(inode));
587				err = -EIO;
588				goto out_dput;
589			}
590			hash = le32_to_cpu(rxh->h_hash);
591		}
592		memcpy(buffer + buffer_pos, data + skip, chunk);
593		unlock_page(page);
594		reiserfs_put_page(page);
595		file_pos += chunk;
596		buffer_pos += chunk;
597		skip = 0;
598	}
599	err = isize - sizeof(struct reiserfs_xattr_header);
600
601	if (xattr_hash(buffer, isize - sizeof(struct reiserfs_xattr_header)) !=
602	    hash) {
603		reiserfs_warning(inode->i_sb, "jdm-20002",
604				 "Invalid hash for xattr (%s) associated "
605				 "with %k", name, INODE_PKEY(inode));
606		err = -EIO;
607	}
608
609      out_dput:
610	dput(dentry);
611
612      out:
613	return err;
614}
615
616static int
617__reiserfs_xattr_del(struct dentry *xadir, const char *name, int namelen)
618{
619	struct dentry *dentry;
620	struct inode *dir = xadir->d_inode;
621	int err = 0;
622
623	dentry = lookup_one_len(name, xadir, namelen);
624	if (IS_ERR(dentry)) {
625		err = PTR_ERR(dentry);
626		goto out;
627	} else if (!dentry->d_inode) {
628		err = -ENODATA;
629		goto out_file;
630	}
631
632	/* Skip directories.. */
633	if (S_ISDIR(dentry->d_inode->i_mode))
634		goto out_file;
635
636	if (!is_reiserfs_priv_object(dentry->d_inode)) {
637		reiserfs_error(dir->i_sb, "jdm-20003",
638			       "OID %08x [%.*s/%.*s] doesn't have "
639			       "priv flag set [parent is %sset].",
640			       le32_to_cpu(INODE_PKEY(dentry->d_inode)->
641					   k_objectid), xadir->d_name.len,
642			       xadir->d_name.name, namelen, name,
643			       is_reiserfs_priv_object(xadir->d_inode) ? "" :
644			       "not ");
645		dput(dentry);
646		return -EIO;
647	}
648
649	err = dir->i_op->unlink(dir, dentry);
650	if (!err)
651		d_delete(dentry);
652
653      out_file:
654	dput(dentry);
655
656      out:
657	return err;
658}
659
660int reiserfs_xattr_del(struct inode *inode, const char *name)
661{
662	struct dentry *dir;
663	int err;
664
665	dir = open_xa_dir(inode, FL_READONLY);
666	if (IS_ERR(dir)) {
667		err = PTR_ERR(dir);
668		goto out;
669	}
670
671	err = __reiserfs_xattr_del(dir, name, strlen(name));
672	dput(dir);
673
674	if (!err) {
675		inode->i_ctime = CURRENT_TIME_SEC;
676		mark_inode_dirty(inode);
677	}
678
679      out:
680	return err;
681}
682
683/* The following are side effects of other operations that aren't explicitly
684 * modifying extended attributes. This includes operations such as permissions
685 * or ownership changes, object deletions, etc. */
686
687static int
688reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen,
689			      loff_t offset, u64 ino, unsigned int d_type)
690{
691	struct dentry *xadir = (struct dentry *)buf;
692
693	return __reiserfs_xattr_del(xadir, name, namelen);
694
695}
696
697/* This is called w/ inode->i_mutex downed */
698int reiserfs_delete_xattrs(struct inode *inode)
699{
700	struct dentry *dir, *root;
701	int err = 0;
702
703	/* Skip out, an xattr has no xattrs associated with it */
704	if (is_reiserfs_priv_object(inode) ||
705	    get_inode_sd_version(inode) == STAT_DATA_V1 ||
706	    !reiserfs_xattrs(inode->i_sb)) {
707		return 0;
708	}
709	reiserfs_read_lock_xattrs(inode->i_sb);
710	dir = open_xa_dir(inode, FL_READONLY);
711	reiserfs_read_unlock_xattrs(inode->i_sb);
712	if (IS_ERR(dir)) {
713		err = PTR_ERR(dir);
714		goto out;
715	} else if (!dir->d_inode) {
716		dput(dir);
717		return 0;
718	}
719
720	lock_kernel();
721	err = xattr_readdir(dir->d_inode, reiserfs_delete_xattrs_filler, dir);
722	if (err) {
723		unlock_kernel();
724		goto out_dir;
725	}
726
727	/* Leftovers besides . and .. -- that's not good. */
728	if (dir->d_inode->i_nlink <= 2) {
729		root = get_xa_root(inode->i_sb, XATTR_REPLACE);
730		reiserfs_write_lock_xattrs(inode->i_sb);
731		err = vfs_rmdir(root->d_inode, dir);
732		reiserfs_write_unlock_xattrs(inode->i_sb);
733		dput(root);
734	} else {
735		reiserfs_warning(inode->i_sb, "jdm-20006",
736				 "Couldn't remove all entries in directory");
737	}
738	unlock_kernel();
739
740      out_dir:
741	dput(dir);
742
743      out:
744	if (!err)
745		REISERFS_I(inode)->i_flags =
746		    REISERFS_I(inode)->i_flags & ~i_has_xattr_dir;
747	return err;
748}
749
750struct reiserfs_chown_buf {
751	struct inode *inode;
752	struct dentry *xadir;
753	struct iattr *attrs;
754};
755
756/* XXX: If there is a better way to do this, I'd love to hear about it */
757static int
758reiserfs_chown_xattrs_filler(void *buf, const char *name, int namelen,
759			     loff_t offset, u64 ino, unsigned int d_type)
760{
761	struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf;
762	struct dentry *xafile, *xadir = chown_buf->xadir;
763	struct iattr *attrs = chown_buf->attrs;
764	int err = 0;
765
766	xafile = lookup_one_len(name, xadir, namelen);
767	if (IS_ERR(xafile))
768		return PTR_ERR(xafile);
769	else if (!xafile->d_inode) {
770		dput(xafile);
771		return -ENODATA;
772	}
773
774	if (!S_ISDIR(xafile->d_inode->i_mode))
775		err = notify_change(xafile, attrs);
776	dput(xafile);
777
778	return err;
779}
780
781int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
782{
783	struct dentry *dir;
784	int err = 0;
785	struct reiserfs_chown_buf buf;
786	unsigned int ia_valid = attrs->ia_valid;
787
788	/* Skip out, an xattr has no xattrs associated with it */
789	if (is_reiserfs_priv_object(inode) ||
790	    get_inode_sd_version(inode) == STAT_DATA_V1 ||
791	    !reiserfs_xattrs(inode->i_sb)) {
792		return 0;
793	}
794	reiserfs_read_lock_xattrs(inode->i_sb);
795	dir = open_xa_dir(inode, FL_READONLY);
796	reiserfs_read_unlock_xattrs(inode->i_sb);
797	if (IS_ERR(dir)) {
798		if (PTR_ERR(dir) != -ENODATA)
799			err = PTR_ERR(dir);
800		goto out;
801	} else if (!dir->d_inode) {
802		dput(dir);
803		goto out;
804	}
805
806	lock_kernel();
807
808	attrs->ia_valid &= (ATTR_UID | ATTR_GID | ATTR_CTIME);
809	buf.xadir = dir;
810	buf.attrs = attrs;
811	buf.inode = inode;
812
813	err = xattr_readdir(dir->d_inode, reiserfs_chown_xattrs_filler, &buf);
814	if (err) {
815		unlock_kernel();
816		goto out_dir;
817	}
818
819	err = notify_change(dir, attrs);
820	unlock_kernel();
821
822      out_dir:
823	dput(dir);
824
825      out:
826	attrs->ia_valid = ia_valid;
827	return err;
828}
829
830/* Actual operations that are exported to VFS-land */
831
832/*
833 * Inode operation getxattr()
834 * Preliminary locking: we down dentry->d_inode->i_mutex
835 */
836ssize_t
837reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
838		  size_t size)
839{
840	struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name);
841	int err;
842
843	if (!xah || !reiserfs_xattrs(dentry->d_sb) ||
844	    get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
845		return -EOPNOTSUPP;
846
847	reiserfs_read_lock_xattr_i(dentry->d_inode);
848	reiserfs_read_lock_xattrs(dentry->d_sb);
849	err = xah->get(dentry->d_inode, name, buffer, size);
850	reiserfs_read_unlock_xattrs(dentry->d_sb);
851	reiserfs_read_unlock_xattr_i(dentry->d_inode);
852	return err;
853}
854
855/*
856 * Inode operation setxattr()
857 *
858 * dentry->d_inode->i_mutex down
859 */
860int
861reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
862		  size_t size, int flags)
863{
864	struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name);
865	int err;
866	int lock;
867
868	if (!xah || !reiserfs_xattrs(dentry->d_sb) ||
869	    get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
870		return -EOPNOTSUPP;
871
872	reiserfs_write_lock_xattr_i(dentry->d_inode);
873	lock = !has_xattr_dir(dentry->d_inode);
874	if (lock)
875		reiserfs_write_lock_xattrs(dentry->d_sb);
876	else
877		reiserfs_read_lock_xattrs(dentry->d_sb);
878	err = xah->set(dentry->d_inode, name, value, size, flags);
879	if (lock)
880		reiserfs_write_unlock_xattrs(dentry->d_sb);
881	else
882		reiserfs_read_unlock_xattrs(dentry->d_sb);
883	reiserfs_write_unlock_xattr_i(dentry->d_inode);
884	return err;
885}
886
887/*
888 * Inode operation removexattr()
889 *
890 * dentry->d_inode->i_mutex down
891 */
892int reiserfs_removexattr(struct dentry *dentry, const char *name)
893{
894	int err;
895	struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name);
896
897	if (!xah || !reiserfs_xattrs(dentry->d_sb) ||
898	    get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
899		return -EOPNOTSUPP;
900
901	reiserfs_write_lock_xattr_i(dentry->d_inode);
902	reiserfs_read_lock_xattrs(dentry->d_sb);
903
904	/* Deletion pre-operation */
905	if (xah->del) {
906		err = xah->del(dentry->d_inode, name);
907		if (err)
908			goto out;
909	}
910
911	err = reiserfs_xattr_del(dentry->d_inode, name);
912
913	dentry->d_inode->i_ctime = CURRENT_TIME_SEC;
914	mark_inode_dirty(dentry->d_inode);
915
916      out:
917	reiserfs_read_unlock_xattrs(dentry->d_sb);
918	reiserfs_write_unlock_xattr_i(dentry->d_inode);
919	return err;
920}
921
922/* This is what filldir will use:
923 * r_pos will always contain the amount of space required for the entire
924 * list. If r_pos becomes larger than r_size, we need more space and we
925 * return an error indicating this. If r_pos is less than r_size, then we've
926 * filled the buffer successfully and we return success */
927struct reiserfs_listxattr_buf {
928	int r_pos;
929	int r_size;
930	char *r_buf;
931	struct inode *r_inode;
932};
933
934static int
935reiserfs_listxattr_filler(void *buf, const char *name, int namelen,
936			  loff_t offset, u64 ino, unsigned int d_type)
937{
938	struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf;
939	int len = 0;
940	if (name[0] != '.'
941	    || (namelen != 1 && (name[1] != '.' || namelen != 2))) {
942		struct reiserfs_xattr_handler *xah =
943		    find_xattr_handler_prefix(name);
944		if (!xah)
945			return 0;	/* Unsupported xattr name, skip it */
946
947		/* We call ->list() twice because the operation isn't required to just
948		 * return the name back - we want to make sure we have enough space */
949		len += xah->list(b->r_inode, name, namelen, NULL);
950
951		if (len) {
952			if (b->r_pos + len + 1 <= b->r_size) {
953				char *p = b->r_buf + b->r_pos;
954				p += xah->list(b->r_inode, name, namelen, p);
955				*p++ = '\0';
956			}
957			b->r_pos += len + 1;
958		}
959	}
960
961	return 0;
962}
963
964/*
965 * Inode operation listxattr()
966 *
967 * Preliminary locking: we down dentry->d_inode->i_mutex
968 */
969ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
970{
971	struct dentry *dir;
972	int err = 0;
973	struct reiserfs_listxattr_buf buf;
974
975	if (!dentry->d_inode)
976		return -EINVAL;
977
978	if (!reiserfs_xattrs(dentry->d_sb) ||
979	    get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
980		return -EOPNOTSUPP;
981
982	reiserfs_read_lock_xattr_i(dentry->d_inode);
983	reiserfs_read_lock_xattrs(dentry->d_sb);
984	dir = open_xa_dir(dentry->d_inode, FL_READONLY);
985	reiserfs_read_unlock_xattrs(dentry->d_sb);
986	if (IS_ERR(dir)) {
987		err = PTR_ERR(dir);
988		if (err == -ENODATA)
989			err = 0;	/* Not an error if there aren't any xattrs */
990		goto out;
991	}
992
993	buf.r_buf = buffer;
994	buf.r_size = buffer ? size : 0;
995	buf.r_pos = 0;
996	buf.r_inode = dentry->d_inode;
997
998	REISERFS_I(dentry->d_inode)->i_flags |= i_has_xattr_dir;
999
1000	err = xattr_readdir(dir->d_inode, reiserfs_listxattr_filler, &buf);
1001	if (err)
1002		goto out_dir;
1003
1004	if (buf.r_pos > buf.r_size && buffer != NULL)
1005		err = -ERANGE;
1006	else
1007		err = buf.r_pos;
1008
1009      out_dir:
1010	dput(dir);
1011
1012      out:
1013	reiserfs_read_unlock_xattr_i(dentry->d_inode);
1014	return err;
1015}
1016
1017/* This is the implementation for the xattr plugin infrastructure */
1018static LIST_HEAD(xattr_handlers);
1019static DEFINE_RWLOCK(handler_lock);
1020
1021static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char
1022								*prefix)
1023{
1024	struct reiserfs_xattr_handler *xah = NULL;
1025	struct list_head *p;
1026
1027	read_lock(&handler_lock);
1028	list_for_each(p, &xattr_handlers) {
1029		xah = list_entry(p, struct reiserfs_xattr_handler, handlers);
1030		if (strncmp(xah->prefix, prefix, strlen(xah->prefix)) == 0)
1031			break;
1032		xah = NULL;
1033	}
1034
1035	read_unlock(&handler_lock);
1036	return xah;
1037}
1038
1039static void __unregister_handlers(void)
1040{
1041	struct reiserfs_xattr_handler *xah;
1042	struct list_head *p, *tmp;
1043
1044	list_for_each_safe(p, tmp, &xattr_handlers) {
1045		xah = list_entry(p, struct reiserfs_xattr_handler, handlers);
1046		if (xah->exit)
1047			xah->exit();
1048
1049		list_del_init(p);
1050	}
1051	INIT_LIST_HEAD(&xattr_handlers);
1052}
1053
1054int __init reiserfs_xattr_register_handlers(void)
1055{
1056	int err = 0;
1057	struct reiserfs_xattr_handler *xah;
1058	struct list_head *p;
1059
1060	write_lock(&handler_lock);
1061
1062	/* If we're already initialized, nothing to do */
1063	if (!list_empty(&xattr_handlers)) {
1064		write_unlock(&handler_lock);
1065		return 0;
1066	}
1067
1068	/* Add the handlers */
1069	list_add_tail(&user_handler.handlers, &xattr_handlers);
1070	list_add_tail(&trusted_handler.handlers, &xattr_handlers);
1071#ifdef CONFIG_REISERFS_FS_SECURITY
1072	list_add_tail(&security_handler.handlers, &xattr_handlers);
1073#endif
1074#ifdef CONFIG_REISERFS_FS_POSIX_ACL
1075	list_add_tail(&posix_acl_access_handler.handlers, &xattr_handlers);
1076	list_add_tail(&posix_acl_default_handler.handlers, &xattr_handlers);
1077#endif
1078
1079	/* Run initializers, if available */
1080	list_for_each(p, &xattr_handlers) {
1081		xah = list_entry(p, struct reiserfs_xattr_handler, handlers);
1082		if (xah->init) {
1083			err = xah->init();
1084			if (err) {
1085				list_del_init(p);
1086				break;
1087			}
1088		}
1089	}
1090
1091	/* Clean up other handlers, if any failed */
1092	if (err)
1093		__unregister_handlers();
1094
1095	write_unlock(&handler_lock);
1096	return err;
1097}
1098
1099void reiserfs_xattr_unregister_handlers(void)
1100{
1101	write_lock(&handler_lock);
1102	__unregister_handlers();
1103	write_unlock(&handler_lock);
1104}
1105
1106/* This will catch lookups from the fs root to .reiserfs_priv */
1107static int
1108xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
1109{
1110	struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root;
1111	if (name->len == priv_root->d_name.len &&
1112	    name->hash == priv_root->d_name.hash &&
1113	    !memcmp(name->name, priv_root->d_name.name, name->len)) {
1114		return -ENOENT;
1115	} else if (q1->len == name->len &&
1116		   !memcmp(q1->name, name->name, name->len))
1117		return 0;
1118	return 1;
1119}
1120
1121static struct dentry_operations xattr_lookup_poison_ops = {
1122	.d_compare = xattr_lookup_poison,
1123};
1124
1125/* We need to take a copy of the mount flags since things like
1126 * MS_RDONLY don't get set until *after* we're called.
1127 * mount_flags != mount_options */
1128int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1129{
1130	int err = 0;
1131
1132	/* We need generation numbers to ensure that the oid mapping is correct
1133	 * v3.5 filesystems don't have them. */
1134	if (!old_format_only(s)) {
1135		set_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
1136	} else if (reiserfs_xattrs_optional(s)) {
1137		/* Old format filesystem, but optional xattrs have been enabled
1138		 * at mount time. Error out. */
1139		reiserfs_warning(s, "jdm-20005",
1140				 "xattrs/ACLs not supported on pre v3.6 "
1141				 "format filesystem. Failing mount.");
1142		err = -EOPNOTSUPP;
1143		goto error;
1144	} else {
1145		/* Old format filesystem, but no optional xattrs have been enabled. This
1146		 * means we silently disable xattrs on the filesystem. */
1147		clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
1148	}
1149
1150	/* If we don't have the privroot located yet - go find it */
1151	if (reiserfs_xattrs(s) && !REISERFS_SB(s)->priv_root) {
1152		struct dentry *dentry;
1153		dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
1154					strlen(PRIVROOT_NAME));
1155		if (!IS_ERR(dentry)) {
1156			if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) {
1157				struct inode *inode = dentry->d_parent->d_inode;
1158				mutex_lock_nested(&inode->i_mutex,
1159						  I_MUTEX_XATTR);
1160				err = inode->i_op->mkdir(inode, dentry, 0700);
1161				mutex_unlock(&inode->i_mutex);
1162				if (err) {
1163					dput(dentry);
1164					dentry = NULL;
1165				}
1166
1167				if (dentry && dentry->d_inode)
1168					reiserfs_info(s, "Created %s - "
1169						      "reserved for xattr "
1170						      "storage.\n",
1171						      PRIVROOT_NAME);
1172			} else if (!dentry->d_inode) {
1173				dput(dentry);
1174				dentry = NULL;
1175			}
1176		} else
1177			err = PTR_ERR(dentry);
1178
1179		if (!err && dentry) {
1180			s->s_root->d_op = &xattr_lookup_poison_ops;
1181			reiserfs_mark_inode_private(dentry->d_inode);
1182			REISERFS_SB(s)->priv_root = dentry;
1183		} else if (!(mount_flags & MS_RDONLY)) {	/* xattrs are unavailable */
1184			/* If we're read-only it just means that the dir hasn't been
1185			 * created. Not an error -- just no xattrs on the fs. We'll
1186			 * check again if we go read-write */
1187			reiserfs_warning(s, "jdm-20006",
1188					 "xattrs/ACLs enabled and couldn't "
1189					 "find/create .reiserfs_priv. "
1190					 "Failing mount.");
1191			err = -EOPNOTSUPP;
1192		}
1193	}
1194
1195      error:
1196	/* This is only nonzero if there was an error initializing the xattr
1197	 * directory or if there is a condition where we don't support them. */
1198	if (err) {
1199		clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
1200		clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt));
1201		clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt));
1202	}
1203
1204	/* The super_block MS_POSIXACL must mirror the (no)acl mount option. */
1205	s->s_flags = s->s_flags & ~MS_POSIXACL;
1206	if (reiserfs_posixacl(s))
1207		s->s_flags |= MS_POSIXACL;
1208
1209	return err;
1210}
1211
1212static int reiserfs_check_acl(struct inode *inode, int mask)
1213{
1214	struct posix_acl *acl;
1215	int error = -EAGAIN; /* do regular unix permission checks by default */
1216
1217	reiserfs_read_lock_xattr_i(inode);
1218	reiserfs_read_lock_xattrs(inode->i_sb);
1219
1220	acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
1221
1222	reiserfs_read_unlock_xattrs(inode->i_sb);
1223	reiserfs_read_unlock_xattr_i(inode);
1224
1225	if (acl) {
1226		if (!IS_ERR(acl)) {
1227			error = posix_acl_permission(inode, acl, mask);
1228			posix_acl_release(acl);
1229		} else if (PTR_ERR(acl) != -ENODATA)
1230			error = PTR_ERR(acl);
1231	}
1232
1233	return error;
1234}
1235
1236int reiserfs_permission(struct inode *inode, int mask)
1237{
1238	/*
1239	 * We don't do permission checks on the internal objects.
1240	 * Permissions are determined by the "owning" object.
1241	 */
1242	if (is_reiserfs_priv_object(inode))
1243		return 0;
1244
1245	/*
1246	 * Stat data v1 doesn't support ACLs.
1247	 */
1248	if (get_inode_sd_version(inode) == STAT_DATA_V1)
1249		return generic_permission(inode, mask, NULL);
1250	else
1251		return generic_permission(inode, mask, reiserfs_check_acl);
1252}
1253