1/*
2  FUSE: Filesystem in Userspace
3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5  This program can be distributed under the terms of the GNU GPL.
6  See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/pagemap.h>
12#include <linux/file.h>
13#include <linux/sched.h>
14#include <linux/namei.h>
15#include <linux/slab.h>
16
17static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
18{
19	struct fuse_conn *fc = get_fuse_conn(dir);
20	struct fuse_inode *fi = get_fuse_inode(dir);
21
22	if (!fc->do_readdirplus)
23		return false;
24	if (!fc->readdirplus_auto)
25		return true;
26	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
27		return true;
28	if (ctx->pos == 0)
29		return true;
30	return false;
31}
32
33static void fuse_advise_use_readdirplus(struct inode *dir)
34{
35	struct fuse_inode *fi = get_fuse_inode(dir);
36
37	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
38}
39
40#if BITS_PER_LONG >= 64
41static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
42{
43	entry->d_time = time;
44}
45
46static inline u64 fuse_dentry_time(struct dentry *entry)
47{
48	return entry->d_time;
49}
50#else
51/*
52 * On 32 bit archs store the high 32 bits of time in d_fsdata
53 */
54static void fuse_dentry_settime(struct dentry *entry, u64 time)
55{
56	entry->d_time = time;
57	entry->d_fsdata = (void *) (unsigned long) (time >> 32);
58}
59
60static u64 fuse_dentry_time(struct dentry *entry)
61{
62	return (u64) entry->d_time +
63		((u64) (unsigned long) entry->d_fsdata << 32);
64}
65#endif
66
67/*
68 * FUSE caches dentries and attributes with separate timeout.  The
69 * time in jiffies until the dentry/attributes are valid is stored in
70 * dentry->d_time and fuse_inode->i_time respectively.
71 */
72
73/*
74 * Calculate the time in jiffies until a dentry/attributes are valid
75 */
76static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
77{
78	if (sec || nsec) {
79		struct timespec ts = {sec, nsec};
80		return get_jiffies_64() + timespec_to_jiffies(&ts);
81	} else
82		return 0;
83}
84
85/*
86 * Set dentry and possibly attribute timeouts from the lookup/mk*
87 * replies
88 */
89static void fuse_change_entry_timeout(struct dentry *entry,
90				      struct fuse_entry_out *o)
91{
92	fuse_dentry_settime(entry,
93		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
94}
95
96static u64 attr_timeout(struct fuse_attr_out *o)
97{
98	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
99}
100
101static u64 entry_attr_timeout(struct fuse_entry_out *o)
102{
103	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
104}
105
106/*
107 * Mark the attributes as stale, so that at the next call to
108 * ->getattr() they will be fetched from userspace
109 */
110void fuse_invalidate_attr(struct inode *inode)
111{
112	get_fuse_inode(inode)->i_time = 0;
113}
114
115/**
116 * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
117 * atime is not used.
118 */
119void fuse_invalidate_atime(struct inode *inode)
120{
121	if (!IS_RDONLY(inode))
122		fuse_invalidate_attr(inode);
123}
124
125/*
126 * Just mark the entry as stale, so that a next attempt to look it up
127 * will result in a new lookup call to userspace
128 *
129 * This is called when a dentry is about to become negative and the
130 * timeout is unknown (unlink, rmdir, rename and in some cases
131 * lookup)
132 */
133void fuse_invalidate_entry_cache(struct dentry *entry)
134{
135	fuse_dentry_settime(entry, 0);
136}
137
138/*
139 * Same as fuse_invalidate_entry_cache(), but also try to remove the
140 * dentry from the hash
141 */
142static void fuse_invalidate_entry(struct dentry *entry)
143{
144	d_invalidate(entry);
145	fuse_invalidate_entry_cache(entry);
146}
147
148static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
149			     u64 nodeid, struct qstr *name,
150			     struct fuse_entry_out *outarg)
151{
152	memset(outarg, 0, sizeof(struct fuse_entry_out));
153	req->in.h.opcode = FUSE_LOOKUP;
154	req->in.h.nodeid = nodeid;
155	req->in.numargs = 1;
156	req->in.args[0].size = name->len + 1;
157	req->in.args[0].value = name->name;
158	req->out.numargs = 1;
159	if (fc->minor < 9)
160		req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
161	else
162		req->out.args[0].size = sizeof(struct fuse_entry_out);
163	req->out.args[0].value = outarg;
164}
165
166u64 fuse_get_attr_version(struct fuse_conn *fc)
167{
168	u64 curr_version;
169
170	/*
171	 * The spin lock isn't actually needed on 64bit archs, but we
172	 * don't yet care too much about such optimizations.
173	 */
174	spin_lock(&fc->lock);
175	curr_version = fc->attr_version;
176	spin_unlock(&fc->lock);
177
178	return curr_version;
179}
180
181/*
182 * Check whether the dentry is still valid
183 *
184 * If the entry validity timeout has expired and the dentry is
185 * positive, try to redo the lookup.  If the lookup results in a
186 * different inode, then let the VFS invalidate the dentry and redo
187 * the lookup once more.  If the lookup results in the same inode,
188 * then refresh the attributes, timeouts and mark the dentry valid.
189 */
190static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
191{
192	struct inode *inode;
193	struct dentry *parent;
194	struct fuse_conn *fc;
195	struct fuse_inode *fi;
196	int ret;
197
198	inode = ACCESS_ONCE(entry->d_inode);
199	if (inode && is_bad_inode(inode))
200		goto invalid;
201	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
202		 (flags & LOOKUP_REVAL)) {
203		int err;
204		struct fuse_entry_out outarg;
205		struct fuse_req *req;
206		struct fuse_forget_link *forget;
207		u64 attr_version;
208
209		/* For negative dentries, always do a fresh lookup */
210		if (!inode)
211			goto invalid;
212
213		ret = -ECHILD;
214		if (flags & LOOKUP_RCU)
215			goto out;
216
217		fc = get_fuse_conn(inode);
218		req = fuse_get_req_nopages(fc);
219		ret = PTR_ERR(req);
220		if (IS_ERR(req))
221			goto out;
222
223		forget = fuse_alloc_forget();
224		if (!forget) {
225			fuse_put_request(fc, req);
226			ret = -ENOMEM;
227			goto out;
228		}
229
230		attr_version = fuse_get_attr_version(fc);
231
232		parent = dget_parent(entry);
233		fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
234				 &entry->d_name, &outarg);
235		fuse_request_send(fc, req);
236		dput(parent);
237		err = req->out.h.error;
238		fuse_put_request(fc, req);
239		/* Zero nodeid is same as -ENOENT */
240		if (!err && !outarg.nodeid)
241			err = -ENOENT;
242		if (!err) {
243			fi = get_fuse_inode(inode);
244			if (outarg.nodeid != get_node_id(inode)) {
245				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
246				goto invalid;
247			}
248			spin_lock(&fc->lock);
249			fi->nlookup++;
250			spin_unlock(&fc->lock);
251		}
252		kfree(forget);
253		if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
254			goto invalid;
255
256		fuse_change_attributes(inode, &outarg.attr,
257				       entry_attr_timeout(&outarg),
258				       attr_version);
259		fuse_change_entry_timeout(entry, &outarg);
260	} else if (inode) {
261		fi = get_fuse_inode(inode);
262		if (flags & LOOKUP_RCU) {
263			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
264				return -ECHILD;
265		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
266			parent = dget_parent(entry);
267			fuse_advise_use_readdirplus(parent->d_inode);
268			dput(parent);
269		}
270	}
271	ret = 1;
272out:
273	return ret;
274
275invalid:
276	ret = 0;
277	goto out;
278}
279
280static int invalid_nodeid(u64 nodeid)
281{
282	return !nodeid || nodeid == FUSE_ROOT_ID;
283}
284
285const struct dentry_operations fuse_dentry_operations = {
286	.d_revalidate	= fuse_dentry_revalidate,
287};
288
289int fuse_valid_type(int m)
290{
291	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
292		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
293}
294
295int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
296		     struct fuse_entry_out *outarg, struct inode **inode)
297{
298	struct fuse_conn *fc = get_fuse_conn_super(sb);
299	struct fuse_req *req;
300	struct fuse_forget_link *forget;
301	u64 attr_version;
302	int err;
303
304	*inode = NULL;
305	err = -ENAMETOOLONG;
306	if (name->len > FUSE_NAME_MAX)
307		goto out;
308
309	req = fuse_get_req_nopages(fc);
310	err = PTR_ERR(req);
311	if (IS_ERR(req))
312		goto out;
313
314	forget = fuse_alloc_forget();
315	err = -ENOMEM;
316	if (!forget) {
317		fuse_put_request(fc, req);
318		goto out;
319	}
320
321	attr_version = fuse_get_attr_version(fc);
322
323	fuse_lookup_init(fc, req, nodeid, name, outarg);
324	fuse_request_send(fc, req);
325	err = req->out.h.error;
326	fuse_put_request(fc, req);
327	/* Zero nodeid is same as -ENOENT, but with valid timeout */
328	if (err || !outarg->nodeid)
329		goto out_put_forget;
330
331	err = -EIO;
332	if (!outarg->nodeid)
333		goto out_put_forget;
334	if (!fuse_valid_type(outarg->attr.mode))
335		goto out_put_forget;
336
337	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
338			   &outarg->attr, entry_attr_timeout(outarg),
339			   attr_version);
340	err = -ENOMEM;
341	if (!*inode) {
342		fuse_queue_forget(fc, forget, outarg->nodeid, 1);
343		goto out;
344	}
345	err = 0;
346
347 out_put_forget:
348	kfree(forget);
349 out:
350	return err;
351}
352
353static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
354				  unsigned int flags)
355{
356	int err;
357	struct fuse_entry_out outarg;
358	struct inode *inode;
359	struct dentry *newent;
360	bool outarg_valid = true;
361
362	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
363			       &outarg, &inode);
364	if (err == -ENOENT) {
365		outarg_valid = false;
366		err = 0;
367	}
368	if (err)
369		goto out_err;
370
371	err = -EIO;
372	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
373		goto out_iput;
374
375	newent = d_materialise_unique(entry, inode);
376	err = PTR_ERR(newent);
377	if (IS_ERR(newent))
378		goto out_err;
379
380	entry = newent ? newent : entry;
381	if (outarg_valid)
382		fuse_change_entry_timeout(entry, &outarg);
383	else
384		fuse_invalidate_entry_cache(entry);
385
386	fuse_advise_use_readdirplus(dir);
387	return newent;
388
389 out_iput:
390	iput(inode);
391 out_err:
392	return ERR_PTR(err);
393}
394
395/*
396 * Atomic create+open operation
397 *
398 * If the filesystem doesn't support this, then fall back to separate
399 * 'mknod' + 'open' requests.
400 */
401static int fuse_create_open(struct inode *dir, struct dentry *entry,
402			    struct file *file, unsigned flags,
403			    umode_t mode, int *opened)
404{
405	int err;
406	struct inode *inode;
407	struct fuse_conn *fc = get_fuse_conn(dir);
408	struct fuse_req *req;
409	struct fuse_forget_link *forget;
410	struct fuse_create_in inarg;
411	struct fuse_open_out outopen;
412	struct fuse_entry_out outentry;
413	struct fuse_file *ff;
414
415	/* Userspace expects S_IFREG in create mode */
416	BUG_ON((mode & S_IFMT) != S_IFREG);
417
418	forget = fuse_alloc_forget();
419	err = -ENOMEM;
420	if (!forget)
421		goto out_err;
422
423	req = fuse_get_req_nopages(fc);
424	err = PTR_ERR(req);
425	if (IS_ERR(req))
426		goto out_put_forget_req;
427
428	err = -ENOMEM;
429	ff = fuse_file_alloc(fc);
430	if (!ff)
431		goto out_put_request;
432
433	if (!fc->dont_mask)
434		mode &= ~current_umask();
435
436	flags &= ~O_NOCTTY;
437	memset(&inarg, 0, sizeof(inarg));
438	memset(&outentry, 0, sizeof(outentry));
439	inarg.flags = flags;
440	inarg.mode = mode;
441	inarg.umask = current_umask();
442	req->in.h.opcode = FUSE_CREATE;
443	req->in.h.nodeid = get_node_id(dir);
444	req->in.numargs = 2;
445	req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
446						sizeof(inarg);
447	req->in.args[0].value = &inarg;
448	req->in.args[1].size = entry->d_name.len + 1;
449	req->in.args[1].value = entry->d_name.name;
450	req->out.numargs = 2;
451	if (fc->minor < 9)
452		req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
453	else
454		req->out.args[0].size = sizeof(outentry);
455	req->out.args[0].value = &outentry;
456	req->out.args[1].size = sizeof(outopen);
457	req->out.args[1].value = &outopen;
458	fuse_request_send(fc, req);
459	err = req->out.h.error;
460	if (err)
461		goto out_free_ff;
462
463	err = -EIO;
464	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
465		goto out_free_ff;
466
467	fuse_put_request(fc, req);
468	ff->fh = outopen.fh;
469	ff->nodeid = outentry.nodeid;
470	ff->open_flags = outopen.open_flags;
471	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
472			  &outentry.attr, entry_attr_timeout(&outentry), 0);
473	if (!inode) {
474		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
475		fuse_sync_release(ff, flags);
476		fuse_queue_forget(fc, forget, outentry.nodeid, 1);
477		err = -ENOMEM;
478		goto out_err;
479	}
480	kfree(forget);
481	d_instantiate(entry, inode);
482	fuse_change_entry_timeout(entry, &outentry);
483	fuse_invalidate_attr(dir);
484	err = finish_open(file, entry, generic_file_open, opened);
485	if (err) {
486		fuse_sync_release(ff, flags);
487	} else {
488		file->private_data = fuse_file_get(ff);
489		fuse_finish_open(inode, file);
490	}
491	return err;
492
493out_free_ff:
494	fuse_file_free(ff);
495out_put_request:
496	fuse_put_request(fc, req);
497out_put_forget_req:
498	kfree(forget);
499out_err:
500	return err;
501}
502
503static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
504static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
505			    struct file *file, unsigned flags,
506			    umode_t mode, int *opened)
507{
508	int err;
509	struct fuse_conn *fc = get_fuse_conn(dir);
510	struct dentry *res = NULL;
511
512	if (d_unhashed(entry)) {
513		res = fuse_lookup(dir, entry, 0);
514		if (IS_ERR(res))
515			return PTR_ERR(res);
516
517		if (res)
518			entry = res;
519	}
520
521	if (!(flags & O_CREAT) || entry->d_inode)
522		goto no_open;
523
524	/* Only creates */
525	*opened |= FILE_CREATED;
526
527	if (fc->no_create)
528		goto mknod;
529
530	err = fuse_create_open(dir, entry, file, flags, mode, opened);
531	if (err == -ENOSYS) {
532		fc->no_create = 1;
533		goto mknod;
534	}
535out_dput:
536	dput(res);
537	return err;
538
539mknod:
540	err = fuse_mknod(dir, entry, mode, 0);
541	if (err)
542		goto out_dput;
543no_open:
544	return finish_no_open(file, res);
545}
546
547/*
548 * Code shared between mknod, mkdir, symlink and link
549 */
550static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
551			    struct inode *dir, struct dentry *entry,
552			    umode_t mode)
553{
554	struct fuse_entry_out outarg;
555	struct inode *inode;
556	int err;
557	struct fuse_forget_link *forget;
558
559	forget = fuse_alloc_forget();
560	if (!forget) {
561		fuse_put_request(fc, req);
562		return -ENOMEM;
563	}
564
565	memset(&outarg, 0, sizeof(outarg));
566	req->in.h.nodeid = get_node_id(dir);
567	req->out.numargs = 1;
568	if (fc->minor < 9)
569		req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
570	else
571		req->out.args[0].size = sizeof(outarg);
572	req->out.args[0].value = &outarg;
573	fuse_request_send(fc, req);
574	err = req->out.h.error;
575	fuse_put_request(fc, req);
576	if (err)
577		goto out_put_forget_req;
578
579	err = -EIO;
580	if (invalid_nodeid(outarg.nodeid))
581		goto out_put_forget_req;
582
583	if ((outarg.attr.mode ^ mode) & S_IFMT)
584		goto out_put_forget_req;
585
586	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
587			  &outarg.attr, entry_attr_timeout(&outarg), 0);
588	if (!inode) {
589		fuse_queue_forget(fc, forget, outarg.nodeid, 1);
590		return -ENOMEM;
591	}
592	kfree(forget);
593
594	err = d_instantiate_no_diralias(entry, inode);
595	if (err)
596		return err;
597
598	fuse_change_entry_timeout(entry, &outarg);
599	fuse_invalidate_attr(dir);
600	return 0;
601
602 out_put_forget_req:
603	kfree(forget);
604	return err;
605}
606
607static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
608		      dev_t rdev)
609{
610	struct fuse_mknod_in inarg;
611	struct fuse_conn *fc = get_fuse_conn(dir);
612	struct fuse_req *req = fuse_get_req_nopages(fc);
613	if (IS_ERR(req))
614		return PTR_ERR(req);
615
616	if (!fc->dont_mask)
617		mode &= ~current_umask();
618
619	memset(&inarg, 0, sizeof(inarg));
620	inarg.mode = mode;
621	inarg.rdev = new_encode_dev(rdev);
622	inarg.umask = current_umask();
623	req->in.h.opcode = FUSE_MKNOD;
624	req->in.numargs = 2;
625	req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
626						sizeof(inarg);
627	req->in.args[0].value = &inarg;
628	req->in.args[1].size = entry->d_name.len + 1;
629	req->in.args[1].value = entry->d_name.name;
630	return create_new_entry(fc, req, dir, entry, mode);
631}
632
633static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
634		       bool excl)
635{
636	return fuse_mknod(dir, entry, mode, 0);
637}
638
639static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
640{
641	struct fuse_mkdir_in inarg;
642	struct fuse_conn *fc = get_fuse_conn(dir);
643	struct fuse_req *req = fuse_get_req_nopages(fc);
644	if (IS_ERR(req))
645		return PTR_ERR(req);
646
647	if (!fc->dont_mask)
648		mode &= ~current_umask();
649
650	memset(&inarg, 0, sizeof(inarg));
651	inarg.mode = mode;
652	inarg.umask = current_umask();
653	req->in.h.opcode = FUSE_MKDIR;
654	req->in.numargs = 2;
655	req->in.args[0].size = sizeof(inarg);
656	req->in.args[0].value = &inarg;
657	req->in.args[1].size = entry->d_name.len + 1;
658	req->in.args[1].value = entry->d_name.name;
659	return create_new_entry(fc, req, dir, entry, S_IFDIR);
660}
661
662static int fuse_symlink(struct inode *dir, struct dentry *entry,
663			const char *link)
664{
665	struct fuse_conn *fc = get_fuse_conn(dir);
666	unsigned len = strlen(link) + 1;
667	struct fuse_req *req = fuse_get_req_nopages(fc);
668	if (IS_ERR(req))
669		return PTR_ERR(req);
670
671	req->in.h.opcode = FUSE_SYMLINK;
672	req->in.numargs = 2;
673	req->in.args[0].size = entry->d_name.len + 1;
674	req->in.args[0].value = entry->d_name.name;
675	req->in.args[1].size = len;
676	req->in.args[1].value = link;
677	return create_new_entry(fc, req, dir, entry, S_IFLNK);
678}
679
680static inline void fuse_update_ctime(struct inode *inode)
681{
682	if (!IS_NOCMTIME(inode)) {
683		inode->i_ctime = current_fs_time(inode->i_sb);
684		mark_inode_dirty_sync(inode);
685	}
686}
687
688static int fuse_unlink(struct inode *dir, struct dentry *entry)
689{
690	int err;
691	struct fuse_conn *fc = get_fuse_conn(dir);
692	struct fuse_req *req = fuse_get_req_nopages(fc);
693	if (IS_ERR(req))
694		return PTR_ERR(req);
695
696	req->in.h.opcode = FUSE_UNLINK;
697	req->in.h.nodeid = get_node_id(dir);
698	req->in.numargs = 1;
699	req->in.args[0].size = entry->d_name.len + 1;
700	req->in.args[0].value = entry->d_name.name;
701	fuse_request_send(fc, req);
702	err = req->out.h.error;
703	fuse_put_request(fc, req);
704	if (!err) {
705		struct inode *inode = entry->d_inode;
706		struct fuse_inode *fi = get_fuse_inode(inode);
707
708		spin_lock(&fc->lock);
709		fi->attr_version = ++fc->attr_version;
710		/*
711		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
712		 * happen if userspace filesystem is careless.  It would be
713		 * difficult to enforce correct nlink usage so just ignore this
714		 * condition here
715		 */
716		if (inode->i_nlink > 0)
717			drop_nlink(inode);
718		spin_unlock(&fc->lock);
719		fuse_invalidate_attr(inode);
720		fuse_invalidate_attr(dir);
721		fuse_invalidate_entry_cache(entry);
722		fuse_update_ctime(inode);
723	} else if (err == -EINTR)
724		fuse_invalidate_entry(entry);
725	return err;
726}
727
728static int fuse_rmdir(struct inode *dir, struct dentry *entry)
729{
730	int err;
731	struct fuse_conn *fc = get_fuse_conn(dir);
732	struct fuse_req *req = fuse_get_req_nopages(fc);
733	if (IS_ERR(req))
734		return PTR_ERR(req);
735
736	req->in.h.opcode = FUSE_RMDIR;
737	req->in.h.nodeid = get_node_id(dir);
738	req->in.numargs = 1;
739	req->in.args[0].size = entry->d_name.len + 1;
740	req->in.args[0].value = entry->d_name.name;
741	fuse_request_send(fc, req);
742	err = req->out.h.error;
743	fuse_put_request(fc, req);
744	if (!err) {
745		clear_nlink(entry->d_inode);
746		fuse_invalidate_attr(dir);
747		fuse_invalidate_entry_cache(entry);
748	} else if (err == -EINTR)
749		fuse_invalidate_entry(entry);
750	return err;
751}
752
753static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
754			      struct inode *newdir, struct dentry *newent,
755			      unsigned int flags, int opcode, size_t argsize)
756{
757	int err;
758	struct fuse_rename2_in inarg;
759	struct fuse_conn *fc = get_fuse_conn(olddir);
760	struct fuse_req *req;
761
762	req = fuse_get_req_nopages(fc);
763	if (IS_ERR(req))
764		return PTR_ERR(req);
765
766	memset(&inarg, 0, argsize);
767	inarg.newdir = get_node_id(newdir);
768	inarg.flags = flags;
769	req->in.h.opcode = opcode;
770	req->in.h.nodeid = get_node_id(olddir);
771	req->in.numargs = 3;
772	req->in.args[0].size = argsize;
773	req->in.args[0].value = &inarg;
774	req->in.args[1].size = oldent->d_name.len + 1;
775	req->in.args[1].value = oldent->d_name.name;
776	req->in.args[2].size = newent->d_name.len + 1;
777	req->in.args[2].value = newent->d_name.name;
778	fuse_request_send(fc, req);
779	err = req->out.h.error;
780	fuse_put_request(fc, req);
781	if (!err) {
782		/* ctime changes */
783		fuse_invalidate_attr(oldent->d_inode);
784		fuse_update_ctime(oldent->d_inode);
785
786		if (flags & RENAME_EXCHANGE) {
787			fuse_invalidate_attr(newent->d_inode);
788			fuse_update_ctime(newent->d_inode);
789		}
790
791		fuse_invalidate_attr(olddir);
792		if (olddir != newdir)
793			fuse_invalidate_attr(newdir);
794
795		/* newent will end up negative */
796		if (!(flags & RENAME_EXCHANGE) && newent->d_inode) {
797			fuse_invalidate_attr(newent->d_inode);
798			fuse_invalidate_entry_cache(newent);
799			fuse_update_ctime(newent->d_inode);
800		}
801	} else if (err == -EINTR) {
802		/* If request was interrupted, DEITY only knows if the
803		   rename actually took place.  If the invalidation
804		   fails (e.g. some process has CWD under the renamed
805		   directory), then there can be inconsistency between
806		   the dcache and the real filesystem.  Tough luck. */
807		fuse_invalidate_entry(oldent);
808		if (newent->d_inode)
809			fuse_invalidate_entry(newent);
810	}
811
812	return err;
813}
814
815static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
816			struct inode *newdir, struct dentry *newent,
817			unsigned int flags)
818{
819	struct fuse_conn *fc = get_fuse_conn(olddir);
820	int err;
821
822	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
823		return -EINVAL;
824
825	if (flags) {
826		if (fc->no_rename2 || fc->minor < 23)
827			return -EINVAL;
828
829		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
830					 FUSE_RENAME2,
831					 sizeof(struct fuse_rename2_in));
832		if (err == -ENOSYS) {
833			fc->no_rename2 = 1;
834			err = -EINVAL;
835		}
836	} else {
837		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
838					 FUSE_RENAME,
839					 sizeof(struct fuse_rename_in));
840	}
841
842	return err;
843}
844
845static int fuse_link(struct dentry *entry, struct inode *newdir,
846		     struct dentry *newent)
847{
848	int err;
849	struct fuse_link_in inarg;
850	struct inode *inode = entry->d_inode;
851	struct fuse_conn *fc = get_fuse_conn(inode);
852	struct fuse_req *req = fuse_get_req_nopages(fc);
853	if (IS_ERR(req))
854		return PTR_ERR(req);
855
856	memset(&inarg, 0, sizeof(inarg));
857	inarg.oldnodeid = get_node_id(inode);
858	req->in.h.opcode = FUSE_LINK;
859	req->in.numargs = 2;
860	req->in.args[0].size = sizeof(inarg);
861	req->in.args[0].value = &inarg;
862	req->in.args[1].size = newent->d_name.len + 1;
863	req->in.args[1].value = newent->d_name.name;
864	err = create_new_entry(fc, req, newdir, newent, inode->i_mode);
865	/* Contrary to "normal" filesystems it can happen that link
866	   makes two "logical" inodes point to the same "physical"
867	   inode.  We invalidate the attributes of the old one, so it
868	   will reflect changes in the backing inode (link count,
869	   etc.)
870	*/
871	if (!err) {
872		struct fuse_inode *fi = get_fuse_inode(inode);
873
874		spin_lock(&fc->lock);
875		fi->attr_version = ++fc->attr_version;
876		inc_nlink(inode);
877		spin_unlock(&fc->lock);
878		fuse_invalidate_attr(inode);
879		fuse_update_ctime(inode);
880	} else if (err == -EINTR) {
881		fuse_invalidate_attr(inode);
882	}
883	return err;
884}
885
886static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
887			  struct kstat *stat)
888{
889	unsigned int blkbits;
890	struct fuse_conn *fc = get_fuse_conn(inode);
891
892	/* see the comment in fuse_change_attributes() */
893	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
894		attr->size = i_size_read(inode);
895		attr->mtime = inode->i_mtime.tv_sec;
896		attr->mtimensec = inode->i_mtime.tv_nsec;
897		attr->ctime = inode->i_ctime.tv_sec;
898		attr->ctimensec = inode->i_ctime.tv_nsec;
899	}
900
901	stat->dev = inode->i_sb->s_dev;
902	stat->ino = attr->ino;
903	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
904	stat->nlink = attr->nlink;
905	stat->uid = make_kuid(&init_user_ns, attr->uid);
906	stat->gid = make_kgid(&init_user_ns, attr->gid);
907	stat->rdev = inode->i_rdev;
908	stat->atime.tv_sec = attr->atime;
909	stat->atime.tv_nsec = attr->atimensec;
910	stat->mtime.tv_sec = attr->mtime;
911	stat->mtime.tv_nsec = attr->mtimensec;
912	stat->ctime.tv_sec = attr->ctime;
913	stat->ctime.tv_nsec = attr->ctimensec;
914	stat->size = attr->size;
915	stat->blocks = attr->blocks;
916
917	if (attr->blksize != 0)
918		blkbits = ilog2(attr->blksize);
919	else
920		blkbits = inode->i_sb->s_blocksize_bits;
921
922	stat->blksize = 1 << blkbits;
923}
924
925static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
926			   struct file *file)
927{
928	int err;
929	struct fuse_getattr_in inarg;
930	struct fuse_attr_out outarg;
931	struct fuse_conn *fc = get_fuse_conn(inode);
932	struct fuse_req *req;
933	u64 attr_version;
934
935	req = fuse_get_req_nopages(fc);
936	if (IS_ERR(req))
937		return PTR_ERR(req);
938
939	attr_version = fuse_get_attr_version(fc);
940
941	memset(&inarg, 0, sizeof(inarg));
942	memset(&outarg, 0, sizeof(outarg));
943	/* Directories have separate file-handle space */
944	if (file && S_ISREG(inode->i_mode)) {
945		struct fuse_file *ff = file->private_data;
946
947		inarg.getattr_flags |= FUSE_GETATTR_FH;
948		inarg.fh = ff->fh;
949	}
950	req->in.h.opcode = FUSE_GETATTR;
951	req->in.h.nodeid = get_node_id(inode);
952	req->in.numargs = 1;
953	req->in.args[0].size = sizeof(inarg);
954	req->in.args[0].value = &inarg;
955	req->out.numargs = 1;
956	if (fc->minor < 9)
957		req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
958	else
959		req->out.args[0].size = sizeof(outarg);
960	req->out.args[0].value = &outarg;
961	fuse_request_send(fc, req);
962	err = req->out.h.error;
963	fuse_put_request(fc, req);
964	if (!err) {
965		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
966			make_bad_inode(inode);
967			err = -EIO;
968		} else {
969			fuse_change_attributes(inode, &outarg.attr,
970					       attr_timeout(&outarg),
971					       attr_version);
972			if (stat)
973				fuse_fillattr(inode, &outarg.attr, stat);
974		}
975	}
976	return err;
977}
978
979int fuse_update_attributes(struct inode *inode, struct kstat *stat,
980			   struct file *file, bool *refreshed)
981{
982	struct fuse_inode *fi = get_fuse_inode(inode);
983	int err;
984	bool r;
985
986	if (time_before64(fi->i_time, get_jiffies_64())) {
987		r = true;
988		err = fuse_do_getattr(inode, stat, file);
989	} else {
990		r = false;
991		err = 0;
992		if (stat) {
993			generic_fillattr(inode, stat);
994			stat->mode = fi->orig_i_mode;
995			stat->ino = fi->orig_ino;
996		}
997	}
998
999	if (refreshed != NULL)
1000		*refreshed = r;
1001
1002	return err;
1003}
1004
1005int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
1006			     u64 child_nodeid, struct qstr *name)
1007{
1008	int err = -ENOTDIR;
1009	struct inode *parent;
1010	struct dentry *dir;
1011	struct dentry *entry;
1012
1013	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
1014	if (!parent)
1015		return -ENOENT;
1016
1017	mutex_lock(&parent->i_mutex);
1018	if (!S_ISDIR(parent->i_mode))
1019		goto unlock;
1020
1021	err = -ENOENT;
1022	dir = d_find_alias(parent);
1023	if (!dir)
1024		goto unlock;
1025
1026	entry = d_lookup(dir, name);
1027	dput(dir);
1028	if (!entry)
1029		goto unlock;
1030
1031	fuse_invalidate_attr(parent);
1032	fuse_invalidate_entry(entry);
1033
1034	if (child_nodeid != 0 && entry->d_inode) {
1035		mutex_lock(&entry->d_inode->i_mutex);
1036		if (get_node_id(entry->d_inode) != child_nodeid) {
1037			err = -ENOENT;
1038			goto badentry;
1039		}
1040		if (d_mountpoint(entry)) {
1041			err = -EBUSY;
1042			goto badentry;
1043		}
1044		if (S_ISDIR(entry->d_inode->i_mode)) {
1045			shrink_dcache_parent(entry);
1046			if (!simple_empty(entry)) {
1047				err = -ENOTEMPTY;
1048				goto badentry;
1049			}
1050			entry->d_inode->i_flags |= S_DEAD;
1051		}
1052		dont_mount(entry);
1053		clear_nlink(entry->d_inode);
1054		err = 0;
1055 badentry:
1056		mutex_unlock(&entry->d_inode->i_mutex);
1057		if (!err)
1058			d_delete(entry);
1059	} else {
1060		err = 0;
1061	}
1062	dput(entry);
1063
1064 unlock:
1065	mutex_unlock(&parent->i_mutex);
1066	iput(parent);
1067	return err;
1068}
1069
1070/*
1071 * Calling into a user-controlled filesystem gives the filesystem
1072 * daemon ptrace-like capabilities over the current process.  This
1073 * means, that the filesystem daemon is able to record the exact
1074 * filesystem operations performed, and can also control the behavior
1075 * of the requester process in otherwise impossible ways.  For example
1076 * it can delay the operation for arbitrary length of time allowing
1077 * DoS against the requester.
1078 *
1079 * For this reason only those processes can call into the filesystem,
1080 * for which the owner of the mount has ptrace privilege.  This
1081 * excludes processes started by other users, suid or sgid processes.
1082 */
1083int fuse_allow_current_process(struct fuse_conn *fc)
1084{
1085	const struct cred *cred;
1086
1087	if (fc->flags & FUSE_ALLOW_OTHER)
1088		return 1;
1089
1090	cred = current_cred();
1091	if (uid_eq(cred->euid, fc->user_id) &&
1092	    uid_eq(cred->suid, fc->user_id) &&
1093	    uid_eq(cred->uid,  fc->user_id) &&
1094	    gid_eq(cred->egid, fc->group_id) &&
1095	    gid_eq(cred->sgid, fc->group_id) &&
1096	    gid_eq(cred->gid,  fc->group_id))
1097		return 1;
1098
1099	return 0;
1100}
1101
1102static int fuse_access(struct inode *inode, int mask)
1103{
1104	struct fuse_conn *fc = get_fuse_conn(inode);
1105	struct fuse_req *req;
1106	struct fuse_access_in inarg;
1107	int err;
1108
1109	BUG_ON(mask & MAY_NOT_BLOCK);
1110
1111	if (fc->no_access)
1112		return 0;
1113
1114	req = fuse_get_req_nopages(fc);
1115	if (IS_ERR(req))
1116		return PTR_ERR(req);
1117
1118	memset(&inarg, 0, sizeof(inarg));
1119	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1120	req->in.h.opcode = FUSE_ACCESS;
1121	req->in.h.nodeid = get_node_id(inode);
1122	req->in.numargs = 1;
1123	req->in.args[0].size = sizeof(inarg);
1124	req->in.args[0].value = &inarg;
1125	fuse_request_send(fc, req);
1126	err = req->out.h.error;
1127	fuse_put_request(fc, req);
1128	if (err == -ENOSYS) {
1129		fc->no_access = 1;
1130		err = 0;
1131	}
1132	return err;
1133}
1134
1135static int fuse_perm_getattr(struct inode *inode, int mask)
1136{
1137	if (mask & MAY_NOT_BLOCK)
1138		return -ECHILD;
1139
1140	return fuse_do_getattr(inode, NULL, NULL);
1141}
1142
1143/*
1144 * Check permission.  The two basic access models of FUSE are:
1145 *
1146 * 1) Local access checking ('default_permissions' mount option) based
1147 * on file mode.  This is the plain old disk filesystem permission
1148 * modell.
1149 *
1150 * 2) "Remote" access checking, where server is responsible for
1151 * checking permission in each inode operation.  An exception to this
1152 * is if ->permission() was invoked from sys_access() in which case an
1153 * access request is sent.  Execute permission is still checked
1154 * locally based on file mode.
1155 */
1156static int fuse_permission(struct inode *inode, int mask)
1157{
1158	struct fuse_conn *fc = get_fuse_conn(inode);
1159	bool refreshed = false;
1160	int err = 0;
1161
1162	if (!fuse_allow_current_process(fc))
1163		return -EACCES;
1164
1165	/*
1166	 * If attributes are needed, refresh them before proceeding
1167	 */
1168	if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
1169	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1170		struct fuse_inode *fi = get_fuse_inode(inode);
1171
1172		if (time_before64(fi->i_time, get_jiffies_64())) {
1173			refreshed = true;
1174
1175			err = fuse_perm_getattr(inode, mask);
1176			if (err)
1177				return err;
1178		}
1179	}
1180
1181	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1182		err = generic_permission(inode, mask);
1183
1184		/* If permission is denied, try to refresh file
1185		   attributes.  This is also needed, because the root
1186		   node will at first have no permissions */
1187		if (err == -EACCES && !refreshed) {
1188			err = fuse_perm_getattr(inode, mask);
1189			if (!err)
1190				err = generic_permission(inode, mask);
1191		}
1192
1193		/* Note: the opposite of the above test does not
1194		   exist.  So if permissions are revoked this won't be
1195		   noticed immediately, only after the attribute
1196		   timeout has expired */
1197	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1198		err = fuse_access(inode, mask);
1199	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1200		if (!(inode->i_mode & S_IXUGO)) {
1201			if (refreshed)
1202				return -EACCES;
1203
1204			err = fuse_perm_getattr(inode, mask);
1205			if (!err && !(inode->i_mode & S_IXUGO))
1206				return -EACCES;
1207		}
1208	}
1209	return err;
1210}
1211
1212static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1213			 struct dir_context *ctx)
1214{
1215	while (nbytes >= FUSE_NAME_OFFSET) {
1216		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
1217		size_t reclen = FUSE_DIRENT_SIZE(dirent);
1218		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1219			return -EIO;
1220		if (reclen > nbytes)
1221			break;
1222		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1223			return -EIO;
1224
1225		if (!dir_emit(ctx, dirent->name, dirent->namelen,
1226			       dirent->ino, dirent->type))
1227			break;
1228
1229		buf += reclen;
1230		nbytes -= reclen;
1231		ctx->pos = dirent->off;
1232	}
1233
1234	return 0;
1235}
1236
1237static int fuse_direntplus_link(struct file *file,
1238				struct fuse_direntplus *direntplus,
1239				u64 attr_version)
1240{
1241	int err;
1242	struct fuse_entry_out *o = &direntplus->entry_out;
1243	struct fuse_dirent *dirent = &direntplus->dirent;
1244	struct dentry *parent = file->f_path.dentry;
1245	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1246	struct dentry *dentry;
1247	struct dentry *alias;
1248	struct inode *dir = parent->d_inode;
1249	struct fuse_conn *fc;
1250	struct inode *inode;
1251
1252	if (!o->nodeid) {
1253		/*
1254		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1255		 * ENOENT. Instead, it only means the userspace filesystem did
1256		 * not want to return attributes/handle for this entry.
1257		 *
1258		 * So do nothing.
1259		 */
1260		return 0;
1261	}
1262
1263	if (name.name[0] == '.') {
1264		/*
1265		 * We could potentially refresh the attributes of the directory
1266		 * and its parent?
1267		 */
1268		if (name.len == 1)
1269			return 0;
1270		if (name.name[1] == '.' && name.len == 2)
1271			return 0;
1272	}
1273
1274	if (invalid_nodeid(o->nodeid))
1275		return -EIO;
1276	if (!fuse_valid_type(o->attr.mode))
1277		return -EIO;
1278
1279	fc = get_fuse_conn(dir);
1280
1281	name.hash = full_name_hash(name.name, name.len);
1282	dentry = d_lookup(parent, &name);
1283	if (dentry) {
1284		inode = dentry->d_inode;
1285		if (!inode) {
1286			d_drop(dentry);
1287		} else if (get_node_id(inode) != o->nodeid ||
1288			   ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
1289			d_invalidate(dentry);
1290		} else if (is_bad_inode(inode)) {
1291			err = -EIO;
1292			goto out;
1293		} else {
1294			struct fuse_inode *fi;
1295			fi = get_fuse_inode(inode);
1296			spin_lock(&fc->lock);
1297			fi->nlookup++;
1298			spin_unlock(&fc->lock);
1299
1300			fuse_change_attributes(inode, &o->attr,
1301					       entry_attr_timeout(o),
1302					       attr_version);
1303
1304			/*
1305			 * The other branch to 'found' comes via fuse_iget()
1306			 * which bumps nlookup inside
1307			 */
1308			goto found;
1309		}
1310		dput(dentry);
1311	}
1312
1313	dentry = d_alloc(parent, &name);
1314	err = -ENOMEM;
1315	if (!dentry)
1316		goto out;
1317
1318	inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1319			  &o->attr, entry_attr_timeout(o), attr_version);
1320	if (!inode)
1321		goto out;
1322
1323	alias = d_materialise_unique(dentry, inode);
1324	err = PTR_ERR(alias);
1325	if (IS_ERR(alias))
1326		goto out;
1327
1328	if (alias) {
1329		dput(dentry);
1330		dentry = alias;
1331	}
1332
1333found:
1334	if (fc->readdirplus_auto)
1335		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
1336	fuse_change_entry_timeout(dentry, o);
1337
1338	err = 0;
1339out:
1340	dput(dentry);
1341	return err;
1342}
1343
1344static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1345			     struct dir_context *ctx, u64 attr_version)
1346{
1347	struct fuse_direntplus *direntplus;
1348	struct fuse_dirent *dirent;
1349	size_t reclen;
1350	int over = 0;
1351	int ret;
1352
1353	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1354		direntplus = (struct fuse_direntplus *) buf;
1355		dirent = &direntplus->dirent;
1356		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1357
1358		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1359			return -EIO;
1360		if (reclen > nbytes)
1361			break;
1362		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1363			return -EIO;
1364
1365		if (!over) {
1366			/* We fill entries into dstbuf only as much as
1367			   it can hold. But we still continue iterating
1368			   over remaining entries to link them. If not,
1369			   we need to send a FORGET for each of those
1370			   which we did not link.
1371			*/
1372			over = !dir_emit(ctx, dirent->name, dirent->namelen,
1373				       dirent->ino, dirent->type);
1374			ctx->pos = dirent->off;
1375		}
1376
1377		buf += reclen;
1378		nbytes -= reclen;
1379
1380		ret = fuse_direntplus_link(file, direntplus, attr_version);
1381		if (ret)
1382			fuse_force_forget(file, direntplus->entry_out.nodeid);
1383	}
1384
1385	return 0;
1386}
1387
1388static int fuse_readdir(struct file *file, struct dir_context *ctx)
1389{
1390	int plus, err;
1391	size_t nbytes;
1392	struct page *page;
1393	struct inode *inode = file_inode(file);
1394	struct fuse_conn *fc = get_fuse_conn(inode);
1395	struct fuse_req *req;
1396	u64 attr_version = 0;
1397
1398	if (is_bad_inode(inode))
1399		return -EIO;
1400
1401	req = fuse_get_req(fc, 1);
1402	if (IS_ERR(req))
1403		return PTR_ERR(req);
1404
1405	page = alloc_page(GFP_KERNEL);
1406	if (!page) {
1407		fuse_put_request(fc, req);
1408		return -ENOMEM;
1409	}
1410
1411	plus = fuse_use_readdirplus(inode, ctx);
1412	req->out.argpages = 1;
1413	req->num_pages = 1;
1414	req->pages[0] = page;
1415	req->page_descs[0].length = PAGE_SIZE;
1416	if (plus) {
1417		attr_version = fuse_get_attr_version(fc);
1418		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1419			       FUSE_READDIRPLUS);
1420	} else {
1421		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1422			       FUSE_READDIR);
1423	}
1424	fuse_request_send(fc, req);
1425	nbytes = req->out.args[0].size;
1426	err = req->out.h.error;
1427	fuse_put_request(fc, req);
1428	if (!err) {
1429		if (plus) {
1430			err = parse_dirplusfile(page_address(page), nbytes,
1431						file, ctx,
1432						attr_version);
1433		} else {
1434			err = parse_dirfile(page_address(page), nbytes, file,
1435					    ctx);
1436		}
1437	}
1438
1439	__free_page(page);
1440	fuse_invalidate_atime(inode);
1441	return err;
1442}
1443
1444static char *read_link(struct dentry *dentry)
1445{
1446	struct inode *inode = dentry->d_inode;
1447	struct fuse_conn *fc = get_fuse_conn(inode);
1448	struct fuse_req *req = fuse_get_req_nopages(fc);
1449	char *link;
1450
1451	if (IS_ERR(req))
1452		return ERR_CAST(req);
1453
1454	link = (char *) __get_free_page(GFP_KERNEL);
1455	if (!link) {
1456		link = ERR_PTR(-ENOMEM);
1457		goto out;
1458	}
1459	req->in.h.opcode = FUSE_READLINK;
1460	req->in.h.nodeid = get_node_id(inode);
1461	req->out.argvar = 1;
1462	req->out.numargs = 1;
1463	req->out.args[0].size = PAGE_SIZE - 1;
1464	req->out.args[0].value = link;
1465	fuse_request_send(fc, req);
1466	if (req->out.h.error) {
1467		free_page((unsigned long) link);
1468		link = ERR_PTR(req->out.h.error);
1469	} else
1470		link[req->out.args[0].size] = '\0';
1471 out:
1472	fuse_put_request(fc, req);
1473	fuse_invalidate_atime(inode);
1474	return link;
1475}
1476
1477static void free_link(char *link)
1478{
1479	if (!IS_ERR(link))
1480		free_page((unsigned long) link);
1481}
1482
1483static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd)
1484{
1485	nd_set_link(nd, read_link(dentry));
1486	return NULL;
1487}
1488
1489static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
1490{
1491	free_link(nd_get_link(nd));
1492}
1493
1494static int fuse_dir_open(struct inode *inode, struct file *file)
1495{
1496	return fuse_open_common(inode, file, true);
1497}
1498
1499static int fuse_dir_release(struct inode *inode, struct file *file)
1500{
1501	fuse_release_common(file, FUSE_RELEASEDIR);
1502
1503	return 0;
1504}
1505
1506static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1507			  int datasync)
1508{
1509	return fuse_fsync_common(file, start, end, datasync, 1);
1510}
1511
1512static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1513			    unsigned long arg)
1514{
1515	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1516
1517	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1518	if (fc->minor < 18)
1519		return -ENOTTY;
1520
1521	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1522}
1523
1524static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1525				   unsigned long arg)
1526{
1527	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1528
1529	if (fc->minor < 18)
1530		return -ENOTTY;
1531
1532	return fuse_ioctl_common(file, cmd, arg,
1533				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1534}
1535
1536static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1537{
1538	/* Always update if mtime is explicitly set  */
1539	if (ivalid & ATTR_MTIME_SET)
1540		return true;
1541
1542	/* Or if kernel i_mtime is the official one */
1543	if (trust_local_mtime)
1544		return true;
1545
1546	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1547	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1548		return false;
1549
1550	/* In all other cases update */
1551	return true;
1552}
1553
1554static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
1555			   bool trust_local_cmtime)
1556{
1557	unsigned ivalid = iattr->ia_valid;
1558
1559	if (ivalid & ATTR_MODE)
1560		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1561	if (ivalid & ATTR_UID)
1562		arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
1563	if (ivalid & ATTR_GID)
1564		arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
1565	if (ivalid & ATTR_SIZE)
1566		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1567	if (ivalid & ATTR_ATIME) {
1568		arg->valid |= FATTR_ATIME;
1569		arg->atime = iattr->ia_atime.tv_sec;
1570		arg->atimensec = iattr->ia_atime.tv_nsec;
1571		if (!(ivalid & ATTR_ATIME_SET))
1572			arg->valid |= FATTR_ATIME_NOW;
1573	}
1574	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1575		arg->valid |= FATTR_MTIME;
1576		arg->mtime = iattr->ia_mtime.tv_sec;
1577		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1578		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1579			arg->valid |= FATTR_MTIME_NOW;
1580	}
1581	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1582		arg->valid |= FATTR_CTIME;
1583		arg->ctime = iattr->ia_ctime.tv_sec;
1584		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1585	}
1586}
1587
1588/*
1589 * Prevent concurrent writepages on inode
1590 *
1591 * This is done by adding a negative bias to the inode write counter
1592 * and waiting for all pending writes to finish.
1593 */
1594void fuse_set_nowrite(struct inode *inode)
1595{
1596	struct fuse_conn *fc = get_fuse_conn(inode);
1597	struct fuse_inode *fi = get_fuse_inode(inode);
1598
1599	BUG_ON(!mutex_is_locked(&inode->i_mutex));
1600
1601	spin_lock(&fc->lock);
1602	BUG_ON(fi->writectr < 0);
1603	fi->writectr += FUSE_NOWRITE;
1604	spin_unlock(&fc->lock);
1605	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1606}
1607
1608/*
1609 * Allow writepages on inode
1610 *
1611 * Remove the bias from the writecounter and send any queued
1612 * writepages.
1613 */
1614static void __fuse_release_nowrite(struct inode *inode)
1615{
1616	struct fuse_inode *fi = get_fuse_inode(inode);
1617
1618	BUG_ON(fi->writectr != FUSE_NOWRITE);
1619	fi->writectr = 0;
1620	fuse_flush_writepages(inode);
1621}
1622
1623void fuse_release_nowrite(struct inode *inode)
1624{
1625	struct fuse_conn *fc = get_fuse_conn(inode);
1626
1627	spin_lock(&fc->lock);
1628	__fuse_release_nowrite(inode);
1629	spin_unlock(&fc->lock);
1630}
1631
1632static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
1633			      struct inode *inode,
1634			      struct fuse_setattr_in *inarg_p,
1635			      struct fuse_attr_out *outarg_p)
1636{
1637	req->in.h.opcode = FUSE_SETATTR;
1638	req->in.h.nodeid = get_node_id(inode);
1639	req->in.numargs = 1;
1640	req->in.args[0].size = sizeof(*inarg_p);
1641	req->in.args[0].value = inarg_p;
1642	req->out.numargs = 1;
1643	if (fc->minor < 9)
1644		req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
1645	else
1646		req->out.args[0].size = sizeof(*outarg_p);
1647	req->out.args[0].value = outarg_p;
1648}
1649
1650/*
1651 * Flush inode->i_mtime to the server
1652 */
1653int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1654{
1655	struct fuse_conn *fc = get_fuse_conn(inode);
1656	struct fuse_req *req;
1657	struct fuse_setattr_in inarg;
1658	struct fuse_attr_out outarg;
1659	int err;
1660
1661	req = fuse_get_req_nopages(fc);
1662	if (IS_ERR(req))
1663		return PTR_ERR(req);
1664
1665	memset(&inarg, 0, sizeof(inarg));
1666	memset(&outarg, 0, sizeof(outarg));
1667
1668	inarg.valid = FATTR_MTIME;
1669	inarg.mtime = inode->i_mtime.tv_sec;
1670	inarg.mtimensec = inode->i_mtime.tv_nsec;
1671	if (fc->minor >= 23) {
1672		inarg.valid |= FATTR_CTIME;
1673		inarg.ctime = inode->i_ctime.tv_sec;
1674		inarg.ctimensec = inode->i_ctime.tv_nsec;
1675	}
1676	if (ff) {
1677		inarg.valid |= FATTR_FH;
1678		inarg.fh = ff->fh;
1679	}
1680	fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
1681	fuse_request_send(fc, req);
1682	err = req->out.h.error;
1683	fuse_put_request(fc, req);
1684
1685	return err;
1686}
1687
1688/*
1689 * Set attributes, and at the same time refresh them.
1690 *
1691 * Truncation is slightly complicated, because the 'truncate' request
1692 * may fail, in which case we don't want to touch the mapping.
1693 * vmtruncate() doesn't allow for this case, so do the rlimit checking
1694 * and the actual truncation by hand.
1695 */
1696int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1697		    struct file *file)
1698{
1699	struct fuse_conn *fc = get_fuse_conn(inode);
1700	struct fuse_inode *fi = get_fuse_inode(inode);
1701	struct fuse_req *req;
1702	struct fuse_setattr_in inarg;
1703	struct fuse_attr_out outarg;
1704	bool is_truncate = false;
1705	bool is_wb = fc->writeback_cache;
1706	loff_t oldsize;
1707	int err;
1708	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1709
1710	if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
1711		attr->ia_valid |= ATTR_FORCE;
1712
1713	err = inode_change_ok(inode, attr);
1714	if (err)
1715		return err;
1716
1717	if (attr->ia_valid & ATTR_OPEN) {
1718		if (fc->atomic_o_trunc)
1719			return 0;
1720		file = NULL;
1721	}
1722
1723	if (attr->ia_valid & ATTR_SIZE)
1724		is_truncate = true;
1725
1726	req = fuse_get_req_nopages(fc);
1727	if (IS_ERR(req))
1728		return PTR_ERR(req);
1729
1730	if (is_truncate) {
1731		fuse_set_nowrite(inode);
1732		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1733		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1734			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1735	}
1736
1737	memset(&inarg, 0, sizeof(inarg));
1738	memset(&outarg, 0, sizeof(outarg));
1739	iattr_to_fattr(attr, &inarg, trust_local_cmtime);
1740	if (file) {
1741		struct fuse_file *ff = file->private_data;
1742		inarg.valid |= FATTR_FH;
1743		inarg.fh = ff->fh;
1744	}
1745	if (attr->ia_valid & ATTR_SIZE) {
1746		/* For mandatory locking in truncate */
1747		inarg.valid |= FATTR_LOCKOWNER;
1748		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1749	}
1750	fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
1751	fuse_request_send(fc, req);
1752	err = req->out.h.error;
1753	fuse_put_request(fc, req);
1754	if (err) {
1755		if (err == -EINTR)
1756			fuse_invalidate_attr(inode);
1757		goto error;
1758	}
1759
1760	if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1761		make_bad_inode(inode);
1762		err = -EIO;
1763		goto error;
1764	}
1765
1766	spin_lock(&fc->lock);
1767	/* the kernel maintains i_mtime locally */
1768	if (trust_local_cmtime) {
1769		if (attr->ia_valid & ATTR_MTIME)
1770			inode->i_mtime = attr->ia_mtime;
1771		if (attr->ia_valid & ATTR_CTIME)
1772			inode->i_ctime = attr->ia_ctime;
1773		/* FIXME: clear I_DIRTY_SYNC? */
1774	}
1775
1776	fuse_change_attributes_common(inode, &outarg.attr,
1777				      attr_timeout(&outarg));
1778	oldsize = inode->i_size;
1779	/* see the comment in fuse_change_attributes() */
1780	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1781		i_size_write(inode, outarg.attr.size);
1782
1783	if (is_truncate) {
1784		/* NOTE: this may release/reacquire fc->lock */
1785		__fuse_release_nowrite(inode);
1786	}
1787	spin_unlock(&fc->lock);
1788
1789	/*
1790	 * Only call invalidate_inode_pages2() after removing
1791	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1792	 */
1793	if ((is_truncate || !is_wb) &&
1794	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1795		truncate_pagecache(inode, outarg.attr.size);
1796		invalidate_inode_pages2(inode->i_mapping);
1797	}
1798
1799	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1800	return 0;
1801
1802error:
1803	if (is_truncate)
1804		fuse_release_nowrite(inode);
1805
1806	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1807	return err;
1808}
1809
1810static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1811{
1812	struct inode *inode = entry->d_inode;
1813
1814	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1815		return -EACCES;
1816
1817	if (attr->ia_valid & ATTR_FILE)
1818		return fuse_do_setattr(inode, attr, attr->ia_file);
1819	else
1820		return fuse_do_setattr(inode, attr, NULL);
1821}
1822
1823static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1824			struct kstat *stat)
1825{
1826	struct inode *inode = entry->d_inode;
1827	struct fuse_conn *fc = get_fuse_conn(inode);
1828
1829	if (!fuse_allow_current_process(fc))
1830		return -EACCES;
1831
1832	return fuse_update_attributes(inode, stat, NULL, NULL);
1833}
1834
1835static int fuse_setxattr(struct dentry *entry, const char *name,
1836			 const void *value, size_t size, int flags)
1837{
1838	struct inode *inode = entry->d_inode;
1839	struct fuse_conn *fc = get_fuse_conn(inode);
1840	struct fuse_req *req;
1841	struct fuse_setxattr_in inarg;
1842	int err;
1843
1844	if (fc->no_setxattr)
1845		return -EOPNOTSUPP;
1846
1847	req = fuse_get_req_nopages(fc);
1848	if (IS_ERR(req))
1849		return PTR_ERR(req);
1850
1851	memset(&inarg, 0, sizeof(inarg));
1852	inarg.size = size;
1853	inarg.flags = flags;
1854	req->in.h.opcode = FUSE_SETXATTR;
1855	req->in.h.nodeid = get_node_id(inode);
1856	req->in.numargs = 3;
1857	req->in.args[0].size = sizeof(inarg);
1858	req->in.args[0].value = &inarg;
1859	req->in.args[1].size = strlen(name) + 1;
1860	req->in.args[1].value = name;
1861	req->in.args[2].size = size;
1862	req->in.args[2].value = value;
1863	fuse_request_send(fc, req);
1864	err = req->out.h.error;
1865	fuse_put_request(fc, req);
1866	if (err == -ENOSYS) {
1867		fc->no_setxattr = 1;
1868		err = -EOPNOTSUPP;
1869	}
1870	if (!err) {
1871		fuse_invalidate_attr(inode);
1872		fuse_update_ctime(inode);
1873	}
1874	return err;
1875}
1876
1877static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1878			     void *value, size_t size)
1879{
1880	struct inode *inode = entry->d_inode;
1881	struct fuse_conn *fc = get_fuse_conn(inode);
1882	struct fuse_req *req;
1883	struct fuse_getxattr_in inarg;
1884	struct fuse_getxattr_out outarg;
1885	ssize_t ret;
1886
1887	if (fc->no_getxattr)
1888		return -EOPNOTSUPP;
1889
1890	req = fuse_get_req_nopages(fc);
1891	if (IS_ERR(req))
1892		return PTR_ERR(req);
1893
1894	memset(&inarg, 0, sizeof(inarg));
1895	inarg.size = size;
1896	req->in.h.opcode = FUSE_GETXATTR;
1897	req->in.h.nodeid = get_node_id(inode);
1898	req->in.numargs = 2;
1899	req->in.args[0].size = sizeof(inarg);
1900	req->in.args[0].value = &inarg;
1901	req->in.args[1].size = strlen(name) + 1;
1902	req->in.args[1].value = name;
1903	/* This is really two different operations rolled into one */
1904	req->out.numargs = 1;
1905	if (size) {
1906		req->out.argvar = 1;
1907		req->out.args[0].size = size;
1908		req->out.args[0].value = value;
1909	} else {
1910		req->out.args[0].size = sizeof(outarg);
1911		req->out.args[0].value = &outarg;
1912	}
1913	fuse_request_send(fc, req);
1914	ret = req->out.h.error;
1915	if (!ret)
1916		ret = size ? req->out.args[0].size : outarg.size;
1917	else {
1918		if (ret == -ENOSYS) {
1919			fc->no_getxattr = 1;
1920			ret = -EOPNOTSUPP;
1921		}
1922	}
1923	fuse_put_request(fc, req);
1924	return ret;
1925}
1926
1927static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1928{
1929	struct inode *inode = entry->d_inode;
1930	struct fuse_conn *fc = get_fuse_conn(inode);
1931	struct fuse_req *req;
1932	struct fuse_getxattr_in inarg;
1933	struct fuse_getxattr_out outarg;
1934	ssize_t ret;
1935
1936	if (!fuse_allow_current_process(fc))
1937		return -EACCES;
1938
1939	if (fc->no_listxattr)
1940		return -EOPNOTSUPP;
1941
1942	req = fuse_get_req_nopages(fc);
1943	if (IS_ERR(req))
1944		return PTR_ERR(req);
1945
1946	memset(&inarg, 0, sizeof(inarg));
1947	inarg.size = size;
1948	req->in.h.opcode = FUSE_LISTXATTR;
1949	req->in.h.nodeid = get_node_id(inode);
1950	req->in.numargs = 1;
1951	req->in.args[0].size = sizeof(inarg);
1952	req->in.args[0].value = &inarg;
1953	/* This is really two different operations rolled into one */
1954	req->out.numargs = 1;
1955	if (size) {
1956		req->out.argvar = 1;
1957		req->out.args[0].size = size;
1958		req->out.args[0].value = list;
1959	} else {
1960		req->out.args[0].size = sizeof(outarg);
1961		req->out.args[0].value = &outarg;
1962	}
1963	fuse_request_send(fc, req);
1964	ret = req->out.h.error;
1965	if (!ret)
1966		ret = size ? req->out.args[0].size : outarg.size;
1967	else {
1968		if (ret == -ENOSYS) {
1969			fc->no_listxattr = 1;
1970			ret = -EOPNOTSUPP;
1971		}
1972	}
1973	fuse_put_request(fc, req);
1974	return ret;
1975}
1976
1977static int fuse_removexattr(struct dentry *entry, const char *name)
1978{
1979	struct inode *inode = entry->d_inode;
1980	struct fuse_conn *fc = get_fuse_conn(inode);
1981	struct fuse_req *req;
1982	int err;
1983
1984	if (fc->no_removexattr)
1985		return -EOPNOTSUPP;
1986
1987	req = fuse_get_req_nopages(fc);
1988	if (IS_ERR(req))
1989		return PTR_ERR(req);
1990
1991	req->in.h.opcode = FUSE_REMOVEXATTR;
1992	req->in.h.nodeid = get_node_id(inode);
1993	req->in.numargs = 1;
1994	req->in.args[0].size = strlen(name) + 1;
1995	req->in.args[0].value = name;
1996	fuse_request_send(fc, req);
1997	err = req->out.h.error;
1998	fuse_put_request(fc, req);
1999	if (err == -ENOSYS) {
2000		fc->no_removexattr = 1;
2001		err = -EOPNOTSUPP;
2002	}
2003	if (!err) {
2004		fuse_invalidate_attr(inode);
2005		fuse_update_ctime(inode);
2006	}
2007	return err;
2008}
2009
2010static const struct inode_operations fuse_dir_inode_operations = {
2011	.lookup		= fuse_lookup,
2012	.mkdir		= fuse_mkdir,
2013	.symlink	= fuse_symlink,
2014	.unlink		= fuse_unlink,
2015	.rmdir		= fuse_rmdir,
2016	.rename2	= fuse_rename2,
2017	.link		= fuse_link,
2018	.setattr	= fuse_setattr,
2019	.create		= fuse_create,
2020	.atomic_open	= fuse_atomic_open,
2021	.mknod		= fuse_mknod,
2022	.permission	= fuse_permission,
2023	.getattr	= fuse_getattr,
2024	.setxattr	= fuse_setxattr,
2025	.getxattr	= fuse_getxattr,
2026	.listxattr	= fuse_listxattr,
2027	.removexattr	= fuse_removexattr,
2028};
2029
2030static const struct file_operations fuse_dir_operations = {
2031	.llseek		= generic_file_llseek,
2032	.read		= generic_read_dir,
2033	.iterate	= fuse_readdir,
2034	.open		= fuse_dir_open,
2035	.release	= fuse_dir_release,
2036	.fsync		= fuse_dir_fsync,
2037	.unlocked_ioctl	= fuse_dir_ioctl,
2038	.compat_ioctl	= fuse_dir_compat_ioctl,
2039};
2040
2041static const struct inode_operations fuse_common_inode_operations = {
2042	.setattr	= fuse_setattr,
2043	.permission	= fuse_permission,
2044	.getattr	= fuse_getattr,
2045	.setxattr	= fuse_setxattr,
2046	.getxattr	= fuse_getxattr,
2047	.listxattr	= fuse_listxattr,
2048	.removexattr	= fuse_removexattr,
2049};
2050
2051static const struct inode_operations fuse_symlink_inode_operations = {
2052	.setattr	= fuse_setattr,
2053	.follow_link	= fuse_follow_link,
2054	.put_link	= fuse_put_link,
2055	.readlink	= generic_readlink,
2056	.getattr	= fuse_getattr,
2057	.setxattr	= fuse_setxattr,
2058	.getxattr	= fuse_getxattr,
2059	.listxattr	= fuse_listxattr,
2060	.removexattr	= fuse_removexattr,
2061};
2062
2063void fuse_init_common(struct inode *inode)
2064{
2065	inode->i_op = &fuse_common_inode_operations;
2066}
2067
2068void fuse_init_dir(struct inode *inode)
2069{
2070	inode->i_op = &fuse_dir_inode_operations;
2071	inode->i_fop = &fuse_dir_operations;
2072}
2073
2074void fuse_init_symlink(struct inode *inode)
2075{
2076	inode->i_op = &fuse_symlink_inode_operations;
2077}
2078