1/* 2 * In-kernel transcendent memory (generic implementation) 3 * 4 * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. 5 * 6 * The primary purpose of Transcedent Memory ("tmem") is to map object-oriented 7 * "handles" (triples containing a pool id, and object id, and an index), to 8 * pages in a page-accessible memory (PAM). Tmem references the PAM pages via 9 * an abstract "pampd" (PAM page-descriptor), which can be operated on by a 10 * set of functions (pamops). Each pampd contains some representation of 11 * PAGE_SIZE bytes worth of data. Tmem must support potentially millions of 12 * pages and must be able to insert, find, and delete these pages at a 13 * potential frequency of thousands per second concurrently across many CPUs, 14 * (and, if used with KVM, across many vcpus across many guests). 15 * Tmem is tracked with a hierarchy of data structures, organized by 16 * the elements in a handle-tuple: pool_id, object_id, and page index. 17 * One or more "clients" (e.g. guests) each provide one or more tmem_pools. 18 * Each pool, contains a hash table of rb_trees of tmem_objs. Each 19 * tmem_obj contains a radix-tree-like tree of pointers, with intermediate 20 * nodes called tmem_objnodes. Each leaf pointer in this tree points to 21 * a pampd, which is accessible only through a small set of callbacks 22 * registered by the PAM implementation (see tmem_register_pamops). Tmem 23 * does all memory allocation via a set of callbacks registered by the tmem 24 * host implementation (e.g. see tmem_register_hostops). 25 */ 26 27#include <linux/list.h> 28#include <linux/spinlock.h> 29#include <linux/atomic.h> 30 31#include "tmem.h" 32 33/* data structure sentinels used for debugging... see tmem.h */ 34#define POOL_SENTINEL 0x87658765 35#define OBJ_SENTINEL 0x12345678 36#define OBJNODE_SENTINEL 0xfedcba09 37 38/* 39 * A tmem host implementation must use this function to register callbacks 40 * for memory allocation. 41 */ 42static struct tmem_hostops tmem_hostops; 43 44static void tmem_objnode_tree_init(void); 45 46void tmem_register_hostops(struct tmem_hostops *m) 47{ 48 tmem_objnode_tree_init(); 49 tmem_hostops = *m; 50} 51 52/* 53 * A tmem host implementation must use this function to register 54 * callbacks for a page-accessible memory (PAM) implementation 55 */ 56static struct tmem_pamops tmem_pamops; 57 58void tmem_register_pamops(struct tmem_pamops *m) 59{ 60 tmem_pamops = *m; 61} 62 63/* 64 * Oid's are potentially very sparse and tmem_objs may have an indeterminately 65 * short life, being added and deleted at a relatively high frequency. 66 * So an rb_tree is an ideal data structure to manage tmem_objs. But because 67 * of the potentially huge number of tmem_objs, each pool manages a hashtable 68 * of rb_trees to reduce search, insert, delete, and rebalancing time. 69 * Each hashbucket also has a lock to manage concurrent access. 70 * 71 * The following routines manage tmem_objs. When any tmem_obj is accessed, 72 * the hashbucket lock must be held. 73 */ 74 75/* searches for object==oid in pool, returns locked object if found */ 76static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb, 77 struct tmem_oid *oidp) 78{ 79 struct rb_node *rbnode; 80 struct tmem_obj *obj; 81 82 rbnode = hb->obj_rb_root.rb_node; 83 while (rbnode) { 84 BUG_ON(RB_EMPTY_NODE(rbnode)); 85 obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node); 86 switch (tmem_oid_compare(oidp, &obj->oid)) { 87 case 0: /* equal */ 88 goto out; 89 case -1: 90 rbnode = rbnode->rb_left; 91 break; 92 case 1: 93 rbnode = rbnode->rb_right; 94 break; 95 } 96 } 97 obj = NULL; 98out: 99 return obj; 100} 101 102static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *); 103 104/* free an object that has no more pampds in it */ 105static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb) 106{ 107 struct tmem_pool *pool; 108 109 BUG_ON(obj == NULL); 110 ASSERT_SENTINEL(obj, OBJ); 111 BUG_ON(obj->pampd_count > 0); 112 pool = obj->pool; 113 BUG_ON(pool == NULL); 114 if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */ 115 tmem_pampd_destroy_all_in_obj(obj); 116 BUG_ON(obj->objnode_tree_root != NULL); 117 BUG_ON((long)obj->objnode_count != 0); 118 atomic_dec(&pool->obj_count); 119 BUG_ON(atomic_read(&pool->obj_count) < 0); 120 INVERT_SENTINEL(obj, OBJ); 121 obj->pool = NULL; 122 tmem_oid_set_invalid(&obj->oid); 123 rb_erase(&obj->rb_tree_node, &hb->obj_rb_root); 124} 125 126/* 127 * initialize, and insert an tmem_object_root (called only if find failed) 128 */ 129static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb, 130 struct tmem_pool *pool, 131 struct tmem_oid *oidp) 132{ 133 struct rb_root *root = &hb->obj_rb_root; 134 struct rb_node **new = &(root->rb_node), *parent = NULL; 135 struct tmem_obj *this; 136 137 BUG_ON(pool == NULL); 138 atomic_inc(&pool->obj_count); 139 obj->objnode_tree_height = 0; 140 obj->objnode_tree_root = NULL; 141 obj->pool = pool; 142 obj->oid = *oidp; 143 obj->objnode_count = 0; 144 obj->pampd_count = 0; 145 (*tmem_pamops.new_obj)(obj); 146 SET_SENTINEL(obj, OBJ); 147 while (*new) { 148 BUG_ON(RB_EMPTY_NODE(*new)); 149 this = rb_entry(*new, struct tmem_obj, rb_tree_node); 150 parent = *new; 151 switch (tmem_oid_compare(oidp, &this->oid)) { 152 case 0: 153 BUG(); /* already present; should never happen! */ 154 break; 155 case -1: 156 new = &(*new)->rb_left; 157 break; 158 case 1: 159 new = &(*new)->rb_right; 160 break; 161 } 162 } 163 rb_link_node(&obj->rb_tree_node, parent, new); 164 rb_insert_color(&obj->rb_tree_node, root); 165} 166 167/* 168 * Tmem is managed as a set of tmem_pools with certain attributes, such as 169 * "ephemeral" vs "persistent". These attributes apply to all tmem_objs 170 * and all pampds that belong to a tmem_pool. A tmem_pool is created 171 * or deleted relatively rarely (for example, when a filesystem is 172 * mounted or unmounted. 173 */ 174 175/* flush all data from a pool and, optionally, free it */ 176static void tmem_pool_flush(struct tmem_pool *pool, bool destroy) 177{ 178 struct rb_node *rbnode; 179 struct tmem_obj *obj; 180 struct tmem_hashbucket *hb = &pool->hashbucket[0]; 181 int i; 182 183 BUG_ON(pool == NULL); 184 for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { 185 spin_lock(&hb->lock); 186 rbnode = rb_first(&hb->obj_rb_root); 187 while (rbnode != NULL) { 188 obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node); 189 rbnode = rb_next(rbnode); 190 tmem_pampd_destroy_all_in_obj(obj); 191 tmem_obj_free(obj, hb); 192 (*tmem_hostops.obj_free)(obj, pool); 193 } 194 spin_unlock(&hb->lock); 195 } 196 if (destroy) 197 list_del(&pool->pool_list); 198} 199 200/* 201 * A tmem_obj contains a radix-tree-like tree in which the intermediate 202 * nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation 203 * is very specialized and tuned for specific uses and is not particularly 204 * suited for use from this code, though some code from the core algorithms has 205 * been reused, thus the copyright notices below). Each tmem_objnode contains 206 * a set of pointers which point to either a set of intermediate tmem_objnodes 207 * or a set of of pampds. 208 * 209 * Portions Copyright (C) 2001 Momchil Velikov 210 * Portions Copyright (C) 2001 Christoph Hellwig 211 * Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> 212 */ 213 214struct tmem_objnode_tree_path { 215 struct tmem_objnode *objnode; 216 int offset; 217}; 218 219/* objnode height_to_maxindex translation */ 220static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1]; 221 222static void tmem_objnode_tree_init(void) 223{ 224 unsigned int ht, tmp; 225 226 for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) { 227 tmp = ht * OBJNODE_TREE_MAP_SHIFT; 228 if (tmp >= OBJNODE_TREE_INDEX_BITS) 229 tmem_objnode_tree_h2max[ht] = ~0UL; 230 else 231 tmem_objnode_tree_h2max[ht] = 232 (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1; 233 } 234} 235 236static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj) 237{ 238 struct tmem_objnode *objnode; 239 240 ASSERT_SENTINEL(obj, OBJ); 241 BUG_ON(obj->pool == NULL); 242 ASSERT_SENTINEL(obj->pool, POOL); 243 objnode = (*tmem_hostops.objnode_alloc)(obj->pool); 244 if (unlikely(objnode == NULL)) 245 goto out; 246 objnode->obj = obj; 247 SET_SENTINEL(objnode, OBJNODE); 248 memset(&objnode->slots, 0, sizeof(objnode->slots)); 249 objnode->slots_in_use = 0; 250 obj->objnode_count++; 251out: 252 return objnode; 253} 254 255static void tmem_objnode_free(struct tmem_objnode *objnode) 256{ 257 struct tmem_pool *pool; 258 int i; 259 260 BUG_ON(objnode == NULL); 261 for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) 262 BUG_ON(objnode->slots[i] != NULL); 263 ASSERT_SENTINEL(objnode, OBJNODE); 264 INVERT_SENTINEL(objnode, OBJNODE); 265 BUG_ON(objnode->obj == NULL); 266 ASSERT_SENTINEL(objnode->obj, OBJ); 267 pool = objnode->obj->pool; 268 BUG_ON(pool == NULL); 269 ASSERT_SENTINEL(pool, POOL); 270 objnode->obj->objnode_count--; 271 objnode->obj = NULL; 272 (*tmem_hostops.objnode_free)(objnode, pool); 273} 274 275/* 276 * lookup index in object and return associated pampd (or NULL if not found) 277 */ 278static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) 279{ 280 unsigned int height, shift; 281 struct tmem_objnode **slot = NULL; 282 283 BUG_ON(obj == NULL); 284 ASSERT_SENTINEL(obj, OBJ); 285 BUG_ON(obj->pool == NULL); 286 ASSERT_SENTINEL(obj->pool, POOL); 287 288 height = obj->objnode_tree_height; 289 if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) 290 goto out; 291 if (height == 0 && obj->objnode_tree_root) { 292 slot = &obj->objnode_tree_root; 293 goto out; 294 } 295 shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; 296 slot = &obj->objnode_tree_root; 297 while (height > 0) { 298 if (*slot == NULL) 299 goto out; 300 slot = (struct tmem_objnode **) 301 ((*slot)->slots + 302 ((index >> shift) & OBJNODE_TREE_MAP_MASK)); 303 shift -= OBJNODE_TREE_MAP_SHIFT; 304 height--; 305 } 306out: 307 return slot != NULL ? (void **)slot : NULL; 308} 309 310static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) 311{ 312 struct tmem_objnode **slot; 313 314 slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index); 315 return slot != NULL ? *slot : NULL; 316} 317 318static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index, 319 void *new_pampd) 320{ 321 struct tmem_objnode **slot; 322 void *ret = NULL; 323 324 slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index); 325 if ((slot != NULL) && (*slot != NULL)) { 326 void *old_pampd = *(void **)slot; 327 *(void **)slot = new_pampd; 328 (*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0); 329 ret = new_pampd; 330 } 331 return ret; 332} 333 334static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index, 335 void *pampd) 336{ 337 int ret = 0; 338 struct tmem_objnode *objnode = NULL, *newnode, *slot; 339 unsigned int height, shift; 340 int offset = 0; 341 342 /* if necessary, extend the tree to be higher */ 343 if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) { 344 height = obj->objnode_tree_height + 1; 345 if (index > tmem_objnode_tree_h2max[height]) 346 while (index > tmem_objnode_tree_h2max[height]) 347 height++; 348 if (obj->objnode_tree_root == NULL) { 349 obj->objnode_tree_height = height; 350 goto insert; 351 } 352 do { 353 newnode = tmem_objnode_alloc(obj); 354 if (!newnode) { 355 ret = -ENOMEM; 356 goto out; 357 } 358 newnode->slots[0] = obj->objnode_tree_root; 359 newnode->slots_in_use = 1; 360 obj->objnode_tree_root = newnode; 361 obj->objnode_tree_height++; 362 } while (height > obj->objnode_tree_height); 363 } 364insert: 365 slot = obj->objnode_tree_root; 366 height = obj->objnode_tree_height; 367 shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; 368 while (height > 0) { 369 if (slot == NULL) { 370 /* add a child objnode. */ 371 slot = tmem_objnode_alloc(obj); 372 if (!slot) { 373 ret = -ENOMEM; 374 goto out; 375 } 376 if (objnode) { 377 378 objnode->slots[offset] = slot; 379 objnode->slots_in_use++; 380 } else 381 obj->objnode_tree_root = slot; 382 } 383 /* go down a level */ 384 offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; 385 objnode = slot; 386 slot = objnode->slots[offset]; 387 shift -= OBJNODE_TREE_MAP_SHIFT; 388 height--; 389 } 390 BUG_ON(slot != NULL); 391 if (objnode) { 392 objnode->slots_in_use++; 393 objnode->slots[offset] = pampd; 394 } else 395 obj->objnode_tree_root = pampd; 396 obj->pampd_count++; 397out: 398 return ret; 399} 400 401static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index) 402{ 403 struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1]; 404 struct tmem_objnode_tree_path *pathp = path; 405 struct tmem_objnode *slot = NULL; 406 unsigned int height, shift; 407 int offset; 408 409 BUG_ON(obj == NULL); 410 ASSERT_SENTINEL(obj, OBJ); 411 BUG_ON(obj->pool == NULL); 412 ASSERT_SENTINEL(obj->pool, POOL); 413 height = obj->objnode_tree_height; 414 if (index > tmem_objnode_tree_h2max[height]) 415 goto out; 416 slot = obj->objnode_tree_root; 417 if (height == 0 && obj->objnode_tree_root) { 418 obj->objnode_tree_root = NULL; 419 goto out; 420 } 421 shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT; 422 pathp->objnode = NULL; 423 do { 424 if (slot == NULL) 425 goto out; 426 pathp++; 427 offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; 428 pathp->offset = offset; 429 pathp->objnode = slot; 430 slot = slot->slots[offset]; 431 shift -= OBJNODE_TREE_MAP_SHIFT; 432 height--; 433 } while (height > 0); 434 if (slot == NULL) 435 goto out; 436 while (pathp->objnode) { 437 pathp->objnode->slots[pathp->offset] = NULL; 438 pathp->objnode->slots_in_use--; 439 if (pathp->objnode->slots_in_use) { 440 if (pathp->objnode == obj->objnode_tree_root) { 441 while (obj->objnode_tree_height > 0 && 442 obj->objnode_tree_root->slots_in_use == 1 && 443 obj->objnode_tree_root->slots[0]) { 444 struct tmem_objnode *to_free = 445 obj->objnode_tree_root; 446 447 obj->objnode_tree_root = 448 to_free->slots[0]; 449 obj->objnode_tree_height--; 450 to_free->slots[0] = NULL; 451 to_free->slots_in_use = 0; 452 tmem_objnode_free(to_free); 453 } 454 } 455 goto out; 456 } 457 tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */ 458 pathp--; 459 } 460 obj->objnode_tree_height = 0; 461 obj->objnode_tree_root = NULL; 462 463out: 464 if (slot != NULL) 465 obj->pampd_count--; 466 BUG_ON(obj->pampd_count < 0); 467 return slot; 468} 469 470/* recursively walk the objnode_tree destroying pampds and objnodes */ 471static void tmem_objnode_node_destroy(struct tmem_obj *obj, 472 struct tmem_objnode *objnode, 473 unsigned int ht) 474{ 475 int i; 476 477 if (ht == 0) 478 return; 479 for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) { 480 if (objnode->slots[i]) { 481 if (ht == 1) { 482 obj->pampd_count--; 483 (*tmem_pamops.free)(objnode->slots[i], 484 obj->pool, NULL, 0); 485 objnode->slots[i] = NULL; 486 continue; 487 } 488 tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1); 489 tmem_objnode_free(objnode->slots[i]); 490 objnode->slots[i] = NULL; 491 } 492 } 493} 494 495static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj) 496{ 497 if (obj->objnode_tree_root == NULL) 498 return; 499 if (obj->objnode_tree_height == 0) { 500 obj->pampd_count--; 501 (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0); 502 } else { 503 tmem_objnode_node_destroy(obj, obj->objnode_tree_root, 504 obj->objnode_tree_height); 505 tmem_objnode_free(obj->objnode_tree_root); 506 obj->objnode_tree_height = 0; 507 } 508 obj->objnode_tree_root = NULL; 509 (*tmem_pamops.free_obj)(obj->pool, obj); 510} 511 512/* 513 * Tmem is operated on by a set of well-defined actions: 514 * "put", "get", "flush", "flush_object", "new pool" and "destroy pool". 515 * (The tmem ABI allows for subpages and exchanges but these operations 516 * are not included in this implementation.) 517 * 518 * These "tmem core" operations are implemented in the following functions. 519 */ 520 521/* 522 * "Put" a page, e.g. copy a page from the kernel into newly allocated 523 * PAM space (if such space is available). Tmem_put is complicated by 524 * a corner case: What if a page with matching handle already exists in 525 * tmem? To guarantee coherency, one of two actions is necessary: Either 526 * the data for the page must be overwritten, or the page must be 527 * "flushed" so that the data is not accessible to a subsequent "get". 528 * Since these "duplicate puts" are relatively rare, this implementation 529 * always flushes for simplicity. 530 */ 531int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, 532 char *data, size_t size, bool raw, bool ephemeral) 533{ 534 struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL; 535 void *pampd = NULL, *pampd_del = NULL; 536 int ret = -ENOMEM; 537 struct tmem_hashbucket *hb; 538 539 hb = &pool->hashbucket[tmem_oid_hash(oidp)]; 540 spin_lock(&hb->lock); 541 obj = objfound = tmem_obj_find(hb, oidp); 542 if (obj != NULL) { 543 pampd = tmem_pampd_lookup_in_obj(objfound, index); 544 if (pampd != NULL) { 545 /* if found, is a dup put, flush the old one */ 546 pampd_del = tmem_pampd_delete_from_obj(obj, index); 547 BUG_ON(pampd_del != pampd); 548 (*tmem_pamops.free)(pampd, pool, oidp, index); 549 if (obj->pampd_count == 0) { 550 objnew = obj; 551 objfound = NULL; 552 } 553 pampd = NULL; 554 } 555 } else { 556 obj = objnew = (*tmem_hostops.obj_alloc)(pool); 557 if (unlikely(obj == NULL)) { 558 ret = -ENOMEM; 559 goto out; 560 } 561 tmem_obj_init(obj, hb, pool, oidp); 562 } 563 BUG_ON(obj == NULL); 564 BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound)); 565 pampd = (*tmem_pamops.create)(data, size, raw, ephemeral, 566 obj->pool, &obj->oid, index); 567 if (unlikely(pampd == NULL)) 568 goto free; 569 ret = tmem_pampd_add_to_obj(obj, index, pampd); 570 if (unlikely(ret == -ENOMEM)) 571 /* may have partially built objnode tree ("stump") */ 572 goto delete_and_free; 573 goto out; 574 575delete_and_free: 576 (void)tmem_pampd_delete_from_obj(obj, index); 577free: 578 if (pampd) 579 (*tmem_pamops.free)(pampd, pool, NULL, 0); 580 if (objnew) { 581 tmem_obj_free(objnew, hb); 582 (*tmem_hostops.obj_free)(objnew, pool); 583 } 584out: 585 spin_unlock(&hb->lock); 586 return ret; 587} 588 589/* 590 * "Get" a page, e.g. if one can be found, copy the tmem page with the 591 * matching handle from PAM space to the kernel. By tmem definition, 592 * when a "get" is successful on an ephemeral page, the page is "flushed", 593 * and when a "get" is successful on a persistent page, the page is retained 594 * in tmem. Note that to preserve 595 * coherency, "get" can never be skipped if tmem contains the data. 596 * That is, if a get is done with a certain handle and fails, any 597 * subsequent "get" must also fail (unless of course there is a 598 * "put" done with the same handle). 599 600 */ 601int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, 602 char *data, size_t *size, bool raw, int get_and_free) 603{ 604 struct tmem_obj *obj; 605 void *pampd; 606 bool ephemeral = is_ephemeral(pool); 607 int ret = -1; 608 struct tmem_hashbucket *hb; 609 bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral); 610 bool lock_held = false; 611 612 hb = &pool->hashbucket[tmem_oid_hash(oidp)]; 613 spin_lock(&hb->lock); 614 lock_held = true; 615 obj = tmem_obj_find(hb, oidp); 616 if (obj == NULL) 617 goto out; 618 if (free) 619 pampd = tmem_pampd_delete_from_obj(obj, index); 620 else 621 pampd = tmem_pampd_lookup_in_obj(obj, index); 622 if (pampd == NULL) 623 goto out; 624 if (free) { 625 if (obj->pampd_count == 0) { 626 tmem_obj_free(obj, hb); 627 (*tmem_hostops.obj_free)(obj, pool); 628 obj = NULL; 629 } 630 } 631 if (tmem_pamops.is_remote(pampd)) { 632 lock_held = false; 633 spin_unlock(&hb->lock); 634 } 635 if (free) 636 ret = (*tmem_pamops.get_data_and_free)( 637 data, size, raw, pampd, pool, oidp, index); 638 else 639 ret = (*tmem_pamops.get_data)( 640 data, size, raw, pampd, pool, oidp, index); 641 if (ret < 0) 642 goto out; 643 ret = 0; 644out: 645 if (lock_held) 646 spin_unlock(&hb->lock); 647 return ret; 648} 649 650/* 651 * If a page in tmem matches the handle, "flush" this page from tmem such 652 * that any subsequent "get" does not succeed (unless, of course, there 653 * was another "put" with the same handle). 654 */ 655int tmem_flush_page(struct tmem_pool *pool, 656 struct tmem_oid *oidp, uint32_t index) 657{ 658 struct tmem_obj *obj; 659 void *pampd; 660 int ret = -1; 661 struct tmem_hashbucket *hb; 662 663 hb = &pool->hashbucket[tmem_oid_hash(oidp)]; 664 spin_lock(&hb->lock); 665 obj = tmem_obj_find(hb, oidp); 666 if (obj == NULL) 667 goto out; 668 pampd = tmem_pampd_delete_from_obj(obj, index); 669 if (pampd == NULL) 670 goto out; 671 (*tmem_pamops.free)(pampd, pool, oidp, index); 672 if (obj->pampd_count == 0) { 673 tmem_obj_free(obj, hb); 674 (*tmem_hostops.obj_free)(obj, pool); 675 } 676 ret = 0; 677 678out: 679 spin_unlock(&hb->lock); 680 return ret; 681} 682 683/* 684 * If a page in tmem matches the handle, replace the page so that any 685 * subsequent "get" gets the new page. Returns 0 if 686 * there was a page to replace, else returns -1. 687 */ 688int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp, 689 uint32_t index, void *new_pampd) 690{ 691 struct tmem_obj *obj; 692 int ret = -1; 693 struct tmem_hashbucket *hb; 694 695 hb = &pool->hashbucket[tmem_oid_hash(oidp)]; 696 spin_lock(&hb->lock); 697 obj = tmem_obj_find(hb, oidp); 698 if (obj == NULL) 699 goto out; 700 new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd); 701 ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj); 702out: 703 spin_unlock(&hb->lock); 704 return ret; 705} 706 707/* 708 * "Flush" all pages in tmem matching this oid. 709 */ 710int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp) 711{ 712 struct tmem_obj *obj; 713 struct tmem_hashbucket *hb; 714 int ret = -1; 715 716 hb = &pool->hashbucket[tmem_oid_hash(oidp)]; 717 spin_lock(&hb->lock); 718 obj = tmem_obj_find(hb, oidp); 719 if (obj == NULL) 720 goto out; 721 tmem_pampd_destroy_all_in_obj(obj); 722 tmem_obj_free(obj, hb); 723 (*tmem_hostops.obj_free)(obj, pool); 724 ret = 0; 725 726out: 727 spin_unlock(&hb->lock); 728 return ret; 729} 730 731/* 732 * "Flush" all pages (and tmem_objs) from this tmem_pool and disable 733 * all subsequent access to this tmem_pool. 734 */ 735int tmem_destroy_pool(struct tmem_pool *pool) 736{ 737 int ret = -1; 738 739 if (pool == NULL) 740 goto out; 741 tmem_pool_flush(pool, 1); 742 ret = 0; 743out: 744 return ret; 745} 746 747static LIST_HEAD(tmem_global_pool_list); 748 749/* 750 * Create a new tmem_pool with the provided flag and return 751 * a pool id provided by the tmem host implementation. 752 */ 753void tmem_new_pool(struct tmem_pool *pool, uint32_t flags) 754{ 755 int persistent = flags & TMEM_POOL_PERSIST; 756 int shared = flags & TMEM_POOL_SHARED; 757 struct tmem_hashbucket *hb = &pool->hashbucket[0]; 758 int i; 759 760 for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { 761 hb->obj_rb_root = RB_ROOT; 762 spin_lock_init(&hb->lock); 763 } 764 INIT_LIST_HEAD(&pool->pool_list); 765 atomic_set(&pool->obj_count, 0); 766 SET_SENTINEL(pool, POOL); 767 list_add_tail(&pool->pool_list, &tmem_global_pool_list); 768 pool->persistent = persistent; 769 pool->shared = shared; 770} 771