dir.c revision 1ae06819c77cff1ea2833c94f8c093fe8a5c79db
1/* 2 * fs/kernfs/dir.c - kernfs directory implementation 3 * 4 * Copyright (c) 2001-3 Patrick Mochel 5 * Copyright (c) 2007 SUSE Linux Products GmbH 6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> 7 * 8 * This file is released under the GPLv2. 9 */ 10 11#include <linux/sched.h> 12#include <linux/fs.h> 13#include <linux/namei.h> 14#include <linux/idr.h> 15#include <linux/slab.h> 16#include <linux/security.h> 17#include <linux/hash.h> 18 19#include "kernfs-internal.h" 20 21DEFINE_MUTEX(kernfs_mutex); 22 23#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) 24 25static bool kernfs_lockdep(struct kernfs_node *kn) 26{ 27#ifdef CONFIG_DEBUG_LOCK_ALLOC 28 return kn->flags & KERNFS_LOCKDEP; 29#else 30 return false; 31#endif 32} 33 34/** 35 * kernfs_name_hash 36 * @name: Null terminated string to hash 37 * @ns: Namespace tag to hash 38 * 39 * Returns 31 bit hash of ns + name (so it fits in an off_t ) 40 */ 41static unsigned int kernfs_name_hash(const char *name, const void *ns) 42{ 43 unsigned long hash = init_name_hash(); 44 unsigned int len = strlen(name); 45 while (len--) 46 hash = partial_name_hash(*name++, hash); 47 hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); 48 hash &= 0x7fffffffU; 49 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ 50 if (hash < 1) 51 hash += 2; 52 if (hash >= INT_MAX) 53 hash = INT_MAX - 1; 54 return hash; 55} 56 57static int kernfs_name_compare(unsigned int hash, const char *name, 58 const void *ns, const struct kernfs_node *kn) 59{ 60 if (hash != kn->hash) 61 return hash - kn->hash; 62 if (ns != kn->ns) 63 return ns - kn->ns; 64 return strcmp(name, kn->name); 65} 66 67static int kernfs_sd_compare(const struct kernfs_node *left, 68 const struct kernfs_node *right) 69{ 70 return kernfs_name_compare(left->hash, left->name, left->ns, right); 71} 72 73/** 74 * kernfs_link_sibling - link kernfs_node into sibling rbtree 75 * @kn: kernfs_node of interest 76 * 77 * Link @kn into its sibling rbtree which starts from 78 * @kn->parent->dir.children. 79 * 80 * Locking: 81 * mutex_lock(kernfs_mutex) 82 * 83 * RETURNS: 84 * 0 on susccess -EEXIST on failure. 85 */ 86static int kernfs_link_sibling(struct kernfs_node *kn) 87{ 88 struct rb_node **node = &kn->parent->dir.children.rb_node; 89 struct rb_node *parent = NULL; 90 91 if (kernfs_type(kn) == KERNFS_DIR) 92 kn->parent->dir.subdirs++; 93 94 while (*node) { 95 struct kernfs_node *pos; 96 int result; 97 98 pos = rb_to_kn(*node); 99 parent = *node; 100 result = kernfs_sd_compare(kn, pos); 101 if (result < 0) 102 node = &pos->rb.rb_left; 103 else if (result > 0) 104 node = &pos->rb.rb_right; 105 else 106 return -EEXIST; 107 } 108 /* add new node and rebalance the tree */ 109 rb_link_node(&kn->rb, parent, node); 110 rb_insert_color(&kn->rb, &kn->parent->dir.children); 111 return 0; 112} 113 114/** 115 * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree 116 * @kn: kernfs_node of interest 117 * 118 * Unlink @kn from its sibling rbtree which starts from 119 * kn->parent->dir.children. 120 * 121 * Locking: 122 * mutex_lock(kernfs_mutex) 123 */ 124static bool kernfs_unlink_sibling(struct kernfs_node *kn) 125{ 126 if (RB_EMPTY_NODE(&kn->rb)) 127 return false; 128 129 if (kernfs_type(kn) == KERNFS_DIR) 130 kn->parent->dir.subdirs--; 131 132 rb_erase(&kn->rb, &kn->parent->dir.children); 133 RB_CLEAR_NODE(&kn->rb); 134 return true; 135} 136 137/** 138 * kernfs_get_active - get an active reference to kernfs_node 139 * @kn: kernfs_node to get an active reference to 140 * 141 * Get an active reference of @kn. This function is noop if @kn 142 * is NULL. 143 * 144 * RETURNS: 145 * Pointer to @kn on success, NULL on failure. 146 */ 147struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) 148{ 149 if (unlikely(!kn)) 150 return NULL; 151 152 if (kernfs_lockdep(kn)) 153 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); 154 155 /* 156 * Try to obtain an active ref. If @kn is deactivated, we block 157 * till either it's reactivated or killed. 158 */ 159 do { 160 if (atomic_inc_unless_negative(&kn->active)) 161 return kn; 162 163 wait_event(kernfs_root(kn)->deactivate_waitq, 164 atomic_read(&kn->active) >= 0 || 165 RB_EMPTY_NODE(&kn->rb)); 166 } while (!RB_EMPTY_NODE(&kn->rb)); 167 168 if (kernfs_lockdep(kn)) 169 rwsem_release(&kn->dep_map, 1, _RET_IP_); 170 return NULL; 171} 172 173/** 174 * kernfs_put_active - put an active reference to kernfs_node 175 * @kn: kernfs_node to put an active reference to 176 * 177 * Put an active reference to @kn. This function is noop if @kn 178 * is NULL. 179 */ 180void kernfs_put_active(struct kernfs_node *kn) 181{ 182 struct kernfs_root *root = kernfs_root(kn); 183 int v; 184 185 if (unlikely(!kn)) 186 return; 187 188 if (kernfs_lockdep(kn)) 189 rwsem_release(&kn->dep_map, 1, _RET_IP_); 190 v = atomic_dec_return(&kn->active); 191 if (likely(v != KN_DEACTIVATED_BIAS)) 192 return; 193 194 wake_up_all(&root->deactivate_waitq); 195} 196 197/** 198 * kernfs_drain - drain kernfs_node 199 * @kn: kernfs_node to drain 200 * 201 * Drain existing usages of @kn. Mutiple removers may invoke this function 202 * concurrently on @kn and all will return after draining is complete. 203 * Returns %true if drain is performed and kernfs_mutex was temporarily 204 * released. %false if @kn was already drained and no operation was 205 * necessary. 206 * 207 * The caller is responsible for ensuring @kn stays pinned while this 208 * function is in progress even if it gets removed by someone else. 209 */ 210static bool kernfs_drain(struct kernfs_node *kn) 211 __releases(&kernfs_mutex) __acquires(&kernfs_mutex) 212{ 213 struct kernfs_root *root = kernfs_root(kn); 214 215 lockdep_assert_held(&kernfs_mutex); 216 WARN_ON_ONCE(atomic_read(&kn->active) >= 0); 217 218 /* 219 * We want to go through the active ref lockdep annotation at least 220 * once for all node removals, but the lockdep annotation can't be 221 * nested inside kernfs_mutex and deactivation can't make forward 222 * progress if we keep dropping the mutex. Use JUST_ACTIVATED to 223 * force the slow path once for each deactivation if lockdep is 224 * enabled. 225 */ 226 if ((!kernfs_lockdep(kn) || !(kn->flags & KERNFS_JUST_DEACTIVATED)) && 227 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS) 228 return false; 229 230 kn->flags &= ~KERNFS_JUST_DEACTIVATED; 231 mutex_unlock(&kernfs_mutex); 232 233 if (kernfs_lockdep(kn)) { 234 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); 235 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) 236 lock_contended(&kn->dep_map, _RET_IP_); 237 } 238 239 wait_event(root->deactivate_waitq, 240 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); 241 242 if (kernfs_lockdep(kn)) { 243 lock_acquired(&kn->dep_map, _RET_IP_); 244 rwsem_release(&kn->dep_map, 1, _RET_IP_); 245 } 246 247 mutex_lock(&kernfs_mutex); 248 return true; 249} 250 251/** 252 * kernfs_get - get a reference count on a kernfs_node 253 * @kn: the target kernfs_node 254 */ 255void kernfs_get(struct kernfs_node *kn) 256{ 257 if (kn) { 258 WARN_ON(!atomic_read(&kn->count)); 259 atomic_inc(&kn->count); 260 } 261} 262EXPORT_SYMBOL_GPL(kernfs_get); 263 264/** 265 * kernfs_put - put a reference count on a kernfs_node 266 * @kn: the target kernfs_node 267 * 268 * Put a reference count of @kn and destroy it if it reached zero. 269 */ 270void kernfs_put(struct kernfs_node *kn) 271{ 272 struct kernfs_node *parent; 273 struct kernfs_root *root; 274 275 if (!kn || !atomic_dec_and_test(&kn->count)) 276 return; 277 root = kernfs_root(kn); 278 repeat: 279 /* 280 * Moving/renaming is always done while holding reference. 281 * kn->parent won't change beneath us. 282 */ 283 parent = kn->parent; 284 285 WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, 286 "kernfs_put: %s/%s: released with incorrect active_ref %d\n", 287 parent ? parent->name : "", kn->name, atomic_read(&kn->active)); 288 289 if (kernfs_type(kn) == KERNFS_LINK) 290 kernfs_put(kn->symlink.target_kn); 291 if (!(kn->flags & KERNFS_STATIC_NAME)) 292 kfree(kn->name); 293 if (kn->iattr) { 294 if (kn->iattr->ia_secdata) 295 security_release_secctx(kn->iattr->ia_secdata, 296 kn->iattr->ia_secdata_len); 297 simple_xattrs_free(&kn->iattr->xattrs); 298 } 299 kfree(kn->iattr); 300 ida_simple_remove(&root->ino_ida, kn->ino); 301 kmem_cache_free(kernfs_node_cache, kn); 302 303 kn = parent; 304 if (kn) { 305 if (atomic_dec_and_test(&kn->count)) 306 goto repeat; 307 } else { 308 /* just released the root kn, free @root too */ 309 ida_destroy(&root->ino_ida); 310 kfree(root); 311 } 312} 313EXPORT_SYMBOL_GPL(kernfs_put); 314 315static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) 316{ 317 struct kernfs_node *kn; 318 319 if (flags & LOOKUP_RCU) 320 return -ECHILD; 321 322 /* Always perform fresh lookup for negatives */ 323 if (!dentry->d_inode) 324 goto out_bad_unlocked; 325 326 kn = dentry->d_fsdata; 327 mutex_lock(&kernfs_mutex); 328 329 /* Force fresh lookup if removed */ 330 if (kn->parent && RB_EMPTY_NODE(&kn->rb)) 331 goto out_bad; 332 333 /* The kernfs node has been moved? */ 334 if (dentry->d_parent->d_fsdata != kn->parent) 335 goto out_bad; 336 337 /* The kernfs node has been renamed */ 338 if (strcmp(dentry->d_name.name, kn->name) != 0) 339 goto out_bad; 340 341 /* The kernfs node has been moved to a different namespace */ 342 if (kn->parent && kernfs_ns_enabled(kn->parent) && 343 kernfs_info(dentry->d_sb)->ns != kn->ns) 344 goto out_bad; 345 346 mutex_unlock(&kernfs_mutex); 347out_valid: 348 return 1; 349out_bad: 350 mutex_unlock(&kernfs_mutex); 351out_bad_unlocked: 352 /* 353 * @dentry doesn't match the underlying kernfs node, drop the 354 * dentry and force lookup. If we have submounts we must allow the 355 * vfs caches to lie about the state of the filesystem to prevent 356 * leaks and other nasty things, so use check_submounts_and_drop() 357 * instead of d_drop(). 358 */ 359 if (check_submounts_and_drop(dentry) != 0) 360 goto out_valid; 361 362 return 0; 363} 364 365static void kernfs_dop_release(struct dentry *dentry) 366{ 367 kernfs_put(dentry->d_fsdata); 368} 369 370const struct dentry_operations kernfs_dops = { 371 .d_revalidate = kernfs_dop_revalidate, 372 .d_release = kernfs_dop_release, 373}; 374 375struct kernfs_node *kernfs_new_node(struct kernfs_root *root, const char *name, 376 umode_t mode, unsigned flags) 377{ 378 char *dup_name = NULL; 379 struct kernfs_node *kn; 380 int ret; 381 382 if (!(flags & KERNFS_STATIC_NAME)) { 383 name = dup_name = kstrdup(name, GFP_KERNEL); 384 if (!name) 385 return NULL; 386 } 387 388 kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL); 389 if (!kn) 390 goto err_out1; 391 392 ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL); 393 if (ret < 0) 394 goto err_out2; 395 kn->ino = ret; 396 397 atomic_set(&kn->count, 1); 398 atomic_set(&kn->active, KN_DEACTIVATED_BIAS); 399 kn->deact_depth = 1; 400 RB_CLEAR_NODE(&kn->rb); 401 402 kn->name = name; 403 kn->mode = mode; 404 kn->flags = flags; 405 406 return kn; 407 408 err_out2: 409 kmem_cache_free(kernfs_node_cache, kn); 410 err_out1: 411 kfree(dup_name); 412 return NULL; 413} 414 415/** 416 * kernfs_add_one - add kernfs_node to parent without warning 417 * @kn: kernfs_node to be added 418 * @parent: the parent kernfs_node to add @kn to 419 * 420 * Get @parent and set @kn->parent to it and increment nlink of the 421 * parent inode if @kn is a directory and link into the children list 422 * of the parent. 423 * 424 * RETURNS: 425 * 0 on success, -EEXIST if entry with the given name already 426 * exists. 427 */ 428int kernfs_add_one(struct kernfs_node *kn, struct kernfs_node *parent) 429{ 430 struct kernfs_iattrs *ps_iattr; 431 bool has_ns; 432 int ret; 433 434 if (!kernfs_get_active(parent)) 435 return -ENOENT; 436 437 mutex_lock(&kernfs_mutex); 438 439 ret = -EINVAL; 440 has_ns = kernfs_ns_enabled(parent); 441 if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", 442 has_ns ? "required" : "invalid", parent->name, kn->name)) 443 goto out_unlock; 444 445 if (kernfs_type(parent) != KERNFS_DIR) 446 goto out_unlock; 447 448 kn->hash = kernfs_name_hash(kn->name, kn->ns); 449 kn->parent = parent; 450 kernfs_get(parent); 451 452 ret = kernfs_link_sibling(kn); 453 if (ret) 454 goto out_unlock; 455 456 /* Update timestamps on the parent */ 457 ps_iattr = parent->iattr; 458 if (ps_iattr) { 459 struct iattr *ps_iattrs = &ps_iattr->ia_iattr; 460 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; 461 } 462 463 /* Mark the entry added into directory tree */ 464 atomic_sub(KN_DEACTIVATED_BIAS, &kn->active); 465 kn->deact_depth--; 466 ret = 0; 467out_unlock: 468 mutex_unlock(&kernfs_mutex); 469 kernfs_put_active(parent); 470 return ret; 471} 472 473/** 474 * kernfs_find_ns - find kernfs_node with the given name 475 * @parent: kernfs_node to search under 476 * @name: name to look for 477 * @ns: the namespace tag to use 478 * 479 * Look for kernfs_node with name @name under @parent. Returns pointer to 480 * the found kernfs_node on success, %NULL on failure. 481 */ 482static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, 483 const unsigned char *name, 484 const void *ns) 485{ 486 struct rb_node *node = parent->dir.children.rb_node; 487 bool has_ns = kernfs_ns_enabled(parent); 488 unsigned int hash; 489 490 lockdep_assert_held(&kernfs_mutex); 491 492 if (has_ns != (bool)ns) { 493 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", 494 has_ns ? "required" : "invalid", parent->name, name); 495 return NULL; 496 } 497 498 hash = kernfs_name_hash(name, ns); 499 while (node) { 500 struct kernfs_node *kn; 501 int result; 502 503 kn = rb_to_kn(node); 504 result = kernfs_name_compare(hash, name, ns, kn); 505 if (result < 0) 506 node = node->rb_left; 507 else if (result > 0) 508 node = node->rb_right; 509 else 510 return kn; 511 } 512 return NULL; 513} 514 515/** 516 * kernfs_find_and_get_ns - find and get kernfs_node with the given name 517 * @parent: kernfs_node to search under 518 * @name: name to look for 519 * @ns: the namespace tag to use 520 * 521 * Look for kernfs_node with name @name under @parent and get a reference 522 * if found. This function may sleep and returns pointer to the found 523 * kernfs_node on success, %NULL on failure. 524 */ 525struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, 526 const char *name, const void *ns) 527{ 528 struct kernfs_node *kn; 529 530 mutex_lock(&kernfs_mutex); 531 kn = kernfs_find_ns(parent, name, ns); 532 kernfs_get(kn); 533 mutex_unlock(&kernfs_mutex); 534 535 return kn; 536} 537EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); 538 539/** 540 * kernfs_create_root - create a new kernfs hierarchy 541 * @kdops: optional directory syscall operations for the hierarchy 542 * @priv: opaque data associated with the new directory 543 * 544 * Returns the root of the new hierarchy on success, ERR_PTR() value on 545 * failure. 546 */ 547struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) 548{ 549 struct kernfs_root *root; 550 struct kernfs_node *kn; 551 552 root = kzalloc(sizeof(*root), GFP_KERNEL); 553 if (!root) 554 return ERR_PTR(-ENOMEM); 555 556 ida_init(&root->ino_ida); 557 558 kn = kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, KERNFS_DIR); 559 if (!kn) { 560 ida_destroy(&root->ino_ida); 561 kfree(root); 562 return ERR_PTR(-ENOMEM); 563 } 564 565 atomic_sub(KN_DEACTIVATED_BIAS, &kn->active); 566 kn->deact_depth--; 567 kn->priv = priv; 568 kn->dir.root = root; 569 570 root->dir_ops = kdops; 571 root->kn = kn; 572 init_waitqueue_head(&root->deactivate_waitq); 573 574 return root; 575} 576 577/** 578 * kernfs_destroy_root - destroy a kernfs hierarchy 579 * @root: root of the hierarchy to destroy 580 * 581 * Destroy the hierarchy anchored at @root by removing all existing 582 * directories and destroying @root. 583 */ 584void kernfs_destroy_root(struct kernfs_root *root) 585{ 586 kernfs_remove(root->kn); /* will also free @root */ 587} 588 589/** 590 * kernfs_create_dir_ns - create a directory 591 * @parent: parent in which to create a new directory 592 * @name: name of the new directory 593 * @mode: mode of the new directory 594 * @priv: opaque data associated with the new directory 595 * @ns: optional namespace tag of the directory 596 * 597 * Returns the created node on success, ERR_PTR() value on failure. 598 */ 599struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, 600 const char *name, umode_t mode, 601 void *priv, const void *ns) 602{ 603 struct kernfs_node *kn; 604 int rc; 605 606 /* allocate */ 607 kn = kernfs_new_node(kernfs_root(parent), name, mode | S_IFDIR, 608 KERNFS_DIR); 609 if (!kn) 610 return ERR_PTR(-ENOMEM); 611 612 kn->dir.root = parent->dir.root; 613 kn->ns = ns; 614 kn->priv = priv; 615 616 /* link in */ 617 rc = kernfs_add_one(kn, parent); 618 if (!rc) 619 return kn; 620 621 kernfs_put(kn); 622 return ERR_PTR(rc); 623} 624 625static struct dentry *kernfs_iop_lookup(struct inode *dir, 626 struct dentry *dentry, 627 unsigned int flags) 628{ 629 struct dentry *ret; 630 struct kernfs_node *parent = dentry->d_parent->d_fsdata; 631 struct kernfs_node *kn; 632 struct inode *inode; 633 const void *ns = NULL; 634 635 mutex_lock(&kernfs_mutex); 636 637 if (kernfs_ns_enabled(parent)) 638 ns = kernfs_info(dir->i_sb)->ns; 639 640 kn = kernfs_find_ns(parent, dentry->d_name.name, ns); 641 642 /* no such entry */ 643 if (!kn) { 644 ret = NULL; 645 goto out_unlock; 646 } 647 kernfs_get(kn); 648 dentry->d_fsdata = kn; 649 650 /* attach dentry and inode */ 651 inode = kernfs_get_inode(dir->i_sb, kn); 652 if (!inode) { 653 ret = ERR_PTR(-ENOMEM); 654 goto out_unlock; 655 } 656 657 /* instantiate and hash dentry */ 658 ret = d_materialise_unique(dentry, inode); 659 out_unlock: 660 mutex_unlock(&kernfs_mutex); 661 return ret; 662} 663 664static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry, 665 umode_t mode) 666{ 667 struct kernfs_node *parent = dir->i_private; 668 struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops; 669 670 if (!kdops || !kdops->mkdir) 671 return -EPERM; 672 673 return kdops->mkdir(parent, dentry->d_name.name, mode); 674} 675 676static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) 677{ 678 struct kernfs_node *kn = dentry->d_fsdata; 679 struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; 680 681 if (!kdops || !kdops->rmdir) 682 return -EPERM; 683 684 return kdops->rmdir(kn); 685} 686 687static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, 688 struct inode *new_dir, struct dentry *new_dentry) 689{ 690 struct kernfs_node *kn = old_dentry->d_fsdata; 691 struct kernfs_node *new_parent = new_dir->i_private; 692 struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; 693 694 if (!kdops || !kdops->rename) 695 return -EPERM; 696 697 return kdops->rename(kn, new_parent, new_dentry->d_name.name); 698} 699 700const struct inode_operations kernfs_dir_iops = { 701 .lookup = kernfs_iop_lookup, 702 .permission = kernfs_iop_permission, 703 .setattr = kernfs_iop_setattr, 704 .getattr = kernfs_iop_getattr, 705 .setxattr = kernfs_iop_setxattr, 706 .removexattr = kernfs_iop_removexattr, 707 .getxattr = kernfs_iop_getxattr, 708 .listxattr = kernfs_iop_listxattr, 709 710 .mkdir = kernfs_iop_mkdir, 711 .rmdir = kernfs_iop_rmdir, 712 .rename = kernfs_iop_rename, 713}; 714 715static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos) 716{ 717 struct kernfs_node *last; 718 719 while (true) { 720 struct rb_node *rbn; 721 722 last = pos; 723 724 if (kernfs_type(pos) != KERNFS_DIR) 725 break; 726 727 rbn = rb_first(&pos->dir.children); 728 if (!rbn) 729 break; 730 731 pos = rb_to_kn(rbn); 732 } 733 734 return last; 735} 736 737/** 738 * kernfs_next_descendant_post - find the next descendant for post-order walk 739 * @pos: the current position (%NULL to initiate traversal) 740 * @root: kernfs_node whose descendants to walk 741 * 742 * Find the next descendant to visit for post-order traversal of @root's 743 * descendants. @root is included in the iteration and the last node to be 744 * visited. 745 */ 746static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, 747 struct kernfs_node *root) 748{ 749 struct rb_node *rbn; 750 751 lockdep_assert_held(&kernfs_mutex); 752 753 /* if first iteration, visit leftmost descendant which may be root */ 754 if (!pos) 755 return kernfs_leftmost_descendant(root); 756 757 /* if we visited @root, we're done */ 758 if (pos == root) 759 return NULL; 760 761 /* if there's an unvisited sibling, visit its leftmost descendant */ 762 rbn = rb_next(&pos->rb); 763 if (rbn) 764 return kernfs_leftmost_descendant(rb_to_kn(rbn)); 765 766 /* no sibling left, visit parent */ 767 return pos->parent; 768} 769 770static void __kernfs_deactivate(struct kernfs_node *kn) 771{ 772 struct kernfs_node *pos; 773 774 lockdep_assert_held(&kernfs_mutex); 775 776 /* prevent any new usage under @kn by deactivating all nodes */ 777 pos = NULL; 778 while ((pos = kernfs_next_descendant_post(pos, kn))) { 779 if (!pos->deact_depth++) { 780 WARN_ON_ONCE(atomic_read(&pos->active) < 0); 781 atomic_add(KN_DEACTIVATED_BIAS, &pos->active); 782 pos->flags |= KERNFS_JUST_DEACTIVATED; 783 } 784 } 785 786 /* 787 * Drain the subtree. If kernfs_drain() blocked to drain, which is 788 * indicated by %true return, it temporarily released kernfs_mutex 789 * and the rbtree might have been modified inbetween breaking our 790 * future walk. Restart the walk after each %true return. 791 */ 792 pos = NULL; 793 while ((pos = kernfs_next_descendant_post(pos, kn))) { 794 bool drained; 795 796 kernfs_get(pos); 797 drained = kernfs_drain(pos); 798 kernfs_put(pos); 799 if (drained) 800 pos = NULL; 801 } 802} 803 804static void __kernfs_reactivate(struct kernfs_node *kn) 805{ 806 struct kernfs_node *pos; 807 808 lockdep_assert_held(&kernfs_mutex); 809 810 pos = NULL; 811 while ((pos = kernfs_next_descendant_post(pos, kn))) { 812 if (!--pos->deact_depth) { 813 WARN_ON_ONCE(atomic_read(&pos->active) >= 0); 814 atomic_sub(KN_DEACTIVATED_BIAS, &pos->active); 815 } 816 WARN_ON_ONCE(pos->deact_depth < 0); 817 } 818 819 /* some nodes reactivated, kick get_active waiters */ 820 wake_up_all(&kernfs_root(kn)->deactivate_waitq); 821} 822 823static void __kernfs_deactivate_self(struct kernfs_node *kn) 824{ 825 /* 826 * Take out ourself out of the active ref dependency chain and 827 * deactivate. If we're called without an active ref, lockdep will 828 * complain. 829 */ 830 kernfs_put_active(kn); 831 __kernfs_deactivate(kn); 832} 833 834static void __kernfs_reactivate_self(struct kernfs_node *kn) 835{ 836 __kernfs_reactivate(kn); 837 /* 838 * Restore active ref dropped by deactivate_self() so that it's 839 * balanced on return. put_active() will soon be called on @kn, so 840 * this can't break anything regardless of @kn's state. 841 */ 842 atomic_inc(&kn->active); 843 if (kernfs_lockdep(kn)) 844 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_); 845} 846 847/** 848 * kernfs_deactivate - deactivate subtree of a node 849 * @kn: kernfs_node to deactivate subtree of 850 * 851 * Deactivate the subtree of @kn. On return, there's no active operation 852 * going on under @kn and creation or renaming of a node under @kn is 853 * blocked until @kn is reactivated or removed. This function can be 854 * called multiple times and nests properly. Each invocation should be 855 * paired with kernfs_reactivate(). 856 * 857 * For a kernfs user which uses simple locking, the subsystem lock would 858 * nest inside active reference. This becomes problematic if the user 859 * tries to remove nodes while holding the subystem lock as it would create 860 * a reverse locking dependency from the subsystem lock to active ref. 861 * This function can be used to break such reverse dependency. The user 862 * can call this function outside the subsystem lock and then proceed to 863 * invoke kernfs_remove() while holding the subsystem lock without 864 * introducing such reverse dependency. 865 */ 866void kernfs_deactivate(struct kernfs_node *kn) 867{ 868 mutex_lock(&kernfs_mutex); 869 __kernfs_deactivate(kn); 870 mutex_unlock(&kernfs_mutex); 871} 872 873/** 874 * kernfs_reactivate - reactivate subtree of a node 875 * @kn: kernfs_node to reactivate subtree of 876 * 877 * Undo kernfs_deactivate(). 878 */ 879void kernfs_reactivate(struct kernfs_node *kn) 880{ 881 mutex_lock(&kernfs_mutex); 882 __kernfs_reactivate(kn); 883 mutex_unlock(&kernfs_mutex); 884} 885 886/** 887 * kernfs_deactivate_self - deactivate subtree of a node from its own method 888 * @kn: the self kernfs_node to deactivate subtree of 889 * 890 * The caller must be running off of a kernfs operation which is invoked 891 * with an active reference - e.g. one of kernfs_ops. Once this function 892 * is called, @kn may be removed by someone else while the enclosing method 893 * is in progress. Other than that, this function is equivalent to 894 * kernfs_deactivate() and should be paired with kernfs_reactivate_self(). 895 */ 896void kernfs_deactivate_self(struct kernfs_node *kn) 897{ 898 mutex_lock(&kernfs_mutex); 899 __kernfs_deactivate_self(kn); 900 mutex_unlock(&kernfs_mutex); 901} 902 903/** 904 * kernfs_reactivate_self - reactivate subtree of a node from its own method 905 * @kn: the self kernfs_node to reactivate subtree of 906 * 907 * Undo kernfs_deactivate_self(). 908 */ 909void kernfs_reactivate_self(struct kernfs_node *kn) 910{ 911 mutex_lock(&kernfs_mutex); 912 __kernfs_reactivate_self(kn); 913 mutex_unlock(&kernfs_mutex); 914} 915 916static void __kernfs_remove(struct kernfs_node *kn) 917{ 918 struct kernfs_root *root = kernfs_root(kn); 919 struct kernfs_node *pos; 920 921 lockdep_assert_held(&kernfs_mutex); 922 923 if (!kn) 924 return; 925 926 pr_debug("kernfs %s: removing\n", kn->name); 927 928 __kernfs_deactivate(kn); 929 930 /* unlink the subtree node-by-node */ 931 do { 932 pos = kernfs_leftmost_descendant(kn); 933 934 /* 935 * We're gonna release kernfs_mutex to unmap bin files, 936 * Make sure @pos doesn't go away inbetween. 937 */ 938 kernfs_get(pos); 939 940 /* 941 * This must be come before unlinking; otherwise, when 942 * there are multiple removers, some may finish before 943 * unmapping is complete. 944 */ 945 if (pos->flags & KERNFS_HAS_MMAP) { 946 mutex_unlock(&kernfs_mutex); 947 kernfs_unmap_file(pos); 948 mutex_lock(&kernfs_mutex); 949 } 950 951 /* 952 * kernfs_unlink_sibling() succeeds once per node. Use it 953 * to decide who's responsible for cleanups. 954 */ 955 if (!pos->parent || kernfs_unlink_sibling(pos)) { 956 struct kernfs_iattrs *ps_iattr = 957 pos->parent ? pos->parent->iattr : NULL; 958 959 /* update timestamps on the parent */ 960 if (ps_iattr) { 961 ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; 962 ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; 963 } 964 965 kernfs_put(pos); 966 } 967 968 kernfs_put(pos); 969 } while (pos != kn); 970 971 /* some nodes killed, kick get_active waiters */ 972 wake_up_all(&root->deactivate_waitq); 973} 974 975/** 976 * kernfs_remove - remove a kernfs_node recursively 977 * @kn: the kernfs_node to remove 978 * 979 * Remove @kn along with all its subdirectories and files. 980 */ 981void kernfs_remove(struct kernfs_node *kn) 982{ 983 mutex_lock(&kernfs_mutex); 984 __kernfs_remove(kn); 985 mutex_unlock(&kernfs_mutex); 986} 987 988/** 989 * kernfs_remove_self - remove a kernfs_node from its own method 990 * @kn: the self kernfs_node to remove 991 * 992 * The caller must be running off of a kernfs operation which is invoked 993 * with an active reference - e.g. one of kernfs_ops. This can be used to 994 * implement a file operation which deletes itself. 995 * 996 * For example, the "delete" file for a sysfs device directory can be 997 * implemented by invoking kernfs_remove_self() on the "delete" file 998 * itself. This function breaks the circular dependency of trying to 999 * deactivate self while holding an active ref itself. It isn't necessary 1000 * to modify the usual removal path to use kernfs_remove_self(). The 1001 * "delete" implementation can simply invoke kernfs_remove_self() on self 1002 * before proceeding with the usual removal path. kernfs will ignore later 1003 * kernfs_remove() on self. 1004 * 1005 * kernfs_remove_self() can be called multiple times concurrently on the 1006 * same kernfs_node. Only the first one actually performs removal and 1007 * returns %true. All others will wait until the kernfs operation which 1008 * won self-removal finishes and return %false. Note that the losers wait 1009 * for the completion of not only the winning kernfs_remove_self() but also 1010 * the whole kernfs_ops which won the arbitration. This can be used to 1011 * guarantee, for example, all concurrent writes to a "delete" file to 1012 * finish only after the whole operation is complete. 1013 */ 1014bool kernfs_remove_self(struct kernfs_node *kn) 1015{ 1016 bool ret; 1017 1018 mutex_lock(&kernfs_mutex); 1019 __kernfs_deactivate_self(kn); 1020 1021 /* 1022 * SUICIDAL is used to arbitrate among competing invocations. Only 1023 * the first one will actually perform removal. When the removal 1024 * is complete, SUICIDED is set and the active ref is restored 1025 * while holding kernfs_mutex. The ones which lost arbitration 1026 * waits for SUICDED && drained which can happen only after the 1027 * enclosing kernfs operation which executed the winning instance 1028 * of kernfs_remove_self() finished. 1029 */ 1030 if (!(kn->flags & KERNFS_SUICIDAL)) { 1031 kn->flags |= KERNFS_SUICIDAL; 1032 __kernfs_remove(kn); 1033 kn->flags |= KERNFS_SUICIDED; 1034 ret = true; 1035 } else { 1036 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq; 1037 DEFINE_WAIT(wait); 1038 1039 while (true) { 1040 prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE); 1041 1042 if ((kn->flags & KERNFS_SUICIDED) && 1043 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS) 1044 break; 1045 1046 mutex_unlock(&kernfs_mutex); 1047 schedule(); 1048 mutex_lock(&kernfs_mutex); 1049 } 1050 finish_wait(waitq, &wait); 1051 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb)); 1052 ret = false; 1053 } 1054 1055 __kernfs_reactivate_self(kn); 1056 mutex_unlock(&kernfs_mutex); 1057 return ret; 1058} 1059 1060/** 1061 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it 1062 * @parent: parent of the target 1063 * @name: name of the kernfs_node to remove 1064 * @ns: namespace tag of the kernfs_node to remove 1065 * 1066 * Look for the kernfs_node with @name and @ns under @parent and remove it. 1067 * Returns 0 on success, -ENOENT if such entry doesn't exist. 1068 */ 1069int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, 1070 const void *ns) 1071{ 1072 struct kernfs_node *kn; 1073 1074 if (!parent) { 1075 WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n", 1076 name); 1077 return -ENOENT; 1078 } 1079 1080 mutex_lock(&kernfs_mutex); 1081 1082 kn = kernfs_find_ns(parent, name, ns); 1083 if (kn) 1084 __kernfs_remove(kn); 1085 1086 mutex_unlock(&kernfs_mutex); 1087 1088 if (kn) 1089 return 0; 1090 else 1091 return -ENOENT; 1092} 1093 1094/** 1095 * kernfs_rename_ns - move and rename a kernfs_node 1096 * @kn: target node 1097 * @new_parent: new parent to put @sd under 1098 * @new_name: new name 1099 * @new_ns: new namespace tag 1100 */ 1101int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, 1102 const char *new_name, const void *new_ns) 1103{ 1104 int error; 1105 1106 error = -ENOENT; 1107 if (!kernfs_get_active(new_parent)) 1108 goto out; 1109 if (!kernfs_get_active(kn)) 1110 goto out_put_new_parent; 1111 1112 mutex_lock(&kernfs_mutex); 1113 1114 error = 0; 1115 if ((kn->parent == new_parent) && (kn->ns == new_ns) && 1116 (strcmp(kn->name, new_name) == 0)) 1117 goto out_unlock; /* nothing to rename */ 1118 1119 error = -EEXIST; 1120 if (kernfs_find_ns(new_parent, new_name, new_ns)) 1121 goto out_unlock; 1122 1123 /* rename kernfs_node */ 1124 if (strcmp(kn->name, new_name) != 0) { 1125 error = -ENOMEM; 1126 new_name = kstrdup(new_name, GFP_KERNEL); 1127 if (!new_name) 1128 goto out_unlock; 1129 1130 if (kn->flags & KERNFS_STATIC_NAME) 1131 kn->flags &= ~KERNFS_STATIC_NAME; 1132 else 1133 kfree(kn->name); 1134 1135 kn->name = new_name; 1136 } 1137 1138 /* 1139 * Move to the appropriate place in the appropriate directories rbtree. 1140 */ 1141 kernfs_unlink_sibling(kn); 1142 kernfs_get(new_parent); 1143 kernfs_put(kn->parent); 1144 kn->ns = new_ns; 1145 kn->hash = kernfs_name_hash(kn->name, kn->ns); 1146 kn->parent = new_parent; 1147 kernfs_link_sibling(kn); 1148 1149 error = 0; 1150out_unlock: 1151 mutex_unlock(&kernfs_mutex); 1152 kernfs_put_active(kn); 1153out_put_new_parent: 1154 kernfs_put_active(new_parent); 1155out: 1156 return error; 1157} 1158 1159/* Relationship between s_mode and the DT_xxx types */ 1160static inline unsigned char dt_type(struct kernfs_node *kn) 1161{ 1162 return (kn->mode >> 12) & 15; 1163} 1164 1165static int kernfs_dir_fop_release(struct inode *inode, struct file *filp) 1166{ 1167 kernfs_put(filp->private_data); 1168 return 0; 1169} 1170 1171static struct kernfs_node *kernfs_dir_pos(const void *ns, 1172 struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) 1173{ 1174 if (pos) { 1175 int valid = pos->parent == parent && hash == pos->hash; 1176 kernfs_put(pos); 1177 if (!valid) 1178 pos = NULL; 1179 } 1180 if (!pos && (hash > 1) && (hash < INT_MAX)) { 1181 struct rb_node *node = parent->dir.children.rb_node; 1182 while (node) { 1183 pos = rb_to_kn(node); 1184 1185 if (hash < pos->hash) 1186 node = node->rb_left; 1187 else if (hash > pos->hash) 1188 node = node->rb_right; 1189 else 1190 break; 1191 } 1192 } 1193 /* Skip over entries in the wrong namespace */ 1194 while (pos && pos->ns != ns) { 1195 struct rb_node *node = rb_next(&pos->rb); 1196 if (!node) 1197 pos = NULL; 1198 else 1199 pos = rb_to_kn(node); 1200 } 1201 return pos; 1202} 1203 1204static struct kernfs_node *kernfs_dir_next_pos(const void *ns, 1205 struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) 1206{ 1207 pos = kernfs_dir_pos(ns, parent, ino, pos); 1208 if (pos) 1209 do { 1210 struct rb_node *node = rb_next(&pos->rb); 1211 if (!node) 1212 pos = NULL; 1213 else 1214 pos = rb_to_kn(node); 1215 } while (pos && pos->ns != ns); 1216 return pos; 1217} 1218 1219static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) 1220{ 1221 struct dentry *dentry = file->f_path.dentry; 1222 struct kernfs_node *parent = dentry->d_fsdata; 1223 struct kernfs_node *pos = file->private_data; 1224 const void *ns = NULL; 1225 1226 if (!dir_emit_dots(file, ctx)) 1227 return 0; 1228 mutex_lock(&kernfs_mutex); 1229 1230 if (kernfs_ns_enabled(parent)) 1231 ns = kernfs_info(dentry->d_sb)->ns; 1232 1233 for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos); 1234 pos; 1235 pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) { 1236 const char *name = pos->name; 1237 unsigned int type = dt_type(pos); 1238 int len = strlen(name); 1239 ino_t ino = pos->ino; 1240 1241 ctx->pos = pos->hash; 1242 file->private_data = pos; 1243 kernfs_get(pos); 1244 1245 mutex_unlock(&kernfs_mutex); 1246 if (!dir_emit(ctx, name, len, ino, type)) 1247 return 0; 1248 mutex_lock(&kernfs_mutex); 1249 } 1250 mutex_unlock(&kernfs_mutex); 1251 file->private_data = NULL; 1252 ctx->pos = INT_MAX; 1253 return 0; 1254} 1255 1256static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset, 1257 int whence) 1258{ 1259 struct inode *inode = file_inode(file); 1260 loff_t ret; 1261 1262 mutex_lock(&inode->i_mutex); 1263 ret = generic_file_llseek(file, offset, whence); 1264 mutex_unlock(&inode->i_mutex); 1265 1266 return ret; 1267} 1268 1269const struct file_operations kernfs_dir_fops = { 1270 .read = generic_read_dir, 1271 .iterate = kernfs_fop_readdir, 1272 .release = kernfs_dir_fop_release, 1273 .llseek = kernfs_dir_fop_llseek, 1274}; 1275