1/* 2 * tmem.h 3 * 4 * Transcendent memory 5 * 6 * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. 7 */ 8 9#ifndef _TMEM_H_ 10#define _TMEM_H_ 11 12#include <linux/highmem.h> 13#include <linux/hash.h> 14#include <linux/atomic.h> 15 16/* 17 * These are pre-defined by the Xen<->Linux ABI 18 */ 19#define TMEM_PUT_PAGE 4 20#define TMEM_GET_PAGE 5 21#define TMEM_FLUSH_PAGE 6 22#define TMEM_FLUSH_OBJECT 7 23#define TMEM_POOL_PERSIST 1 24#define TMEM_POOL_SHARED 2 25#define TMEM_POOL_PRECOMPRESSED 4 26#define TMEM_POOL_PAGESIZE_SHIFT 4 27#define TMEM_POOL_PAGESIZE_MASK 0xf 28#define TMEM_POOL_RESERVED_BITS 0x00ffff00 29 30/* 31 * sentinels have proven very useful for debugging but can be removed 32 * or disabled before final merge. 33 */ 34#define SENTINELS 35#ifdef SENTINELS 36#define DECL_SENTINEL uint32_t sentinel; 37#define SET_SENTINEL(_x, _y) (_x->sentinel = _y##_SENTINEL) 38#define INVERT_SENTINEL(_x, _y) (_x->sentinel = ~_y##_SENTINEL) 39#define ASSERT_SENTINEL(_x, _y) WARN_ON(_x->sentinel != _y##_SENTINEL) 40#define ASSERT_INVERTED_SENTINEL(_x, _y) WARN_ON(_x->sentinel != ~_y##_SENTINEL) 41#else 42#define DECL_SENTINEL 43#define SET_SENTINEL(_x, _y) do { } while (0) 44#define INVERT_SENTINEL(_x, _y) do { } while (0) 45#define ASSERT_SENTINEL(_x, _y) do { } while (0) 46#define ASSERT_INVERTED_SENTINEL(_x, _y) do { } while (0) 47#endif 48 49#define ASSERT_SPINLOCK(_l) WARN_ON(!spin_is_locked(_l)) 50 51/* 52 * A pool is the highest-level data structure managed by tmem and 53 * usually corresponds to a large independent set of pages such as 54 * a filesystem. Each pool has an id, and certain attributes and counters. 55 * It also contains a set of hash buckets, each of which contains an rbtree 56 * of objects and a lock to manage concurrency within the pool. 57 */ 58 59#define TMEM_HASH_BUCKET_BITS 8 60#define TMEM_HASH_BUCKETS (1<<TMEM_HASH_BUCKET_BITS) 61 62struct tmem_hashbucket { 63 struct rb_root obj_rb_root; 64 spinlock_t lock; 65}; 66 67struct tmem_pool { 68 void *client; /* "up" for some clients, avoids table lookup */ 69 struct list_head pool_list; 70 uint32_t pool_id; 71 bool persistent; 72 bool shared; 73 atomic_t obj_count; 74 atomic_t refcount; 75 struct tmem_hashbucket hashbucket[TMEM_HASH_BUCKETS]; 76 DECL_SENTINEL 77}; 78 79#define is_persistent(_p) (_p->persistent) 80#define is_ephemeral(_p) (!(_p->persistent)) 81 82/* 83 * An object id ("oid") is large: 192-bits (to ensure, for example, files 84 * in a modern filesystem can be uniquely identified). 85 */ 86 87struct tmem_oid { 88 uint64_t oid[3]; 89}; 90 91struct tmem_xhandle { 92 uint8_t client_id; 93 uint8_t xh_data_cksum; 94 uint16_t xh_data_size; 95 uint16_t pool_id; 96 struct tmem_oid oid; 97 uint32_t index; 98 void *extra; 99}; 100 101static inline struct tmem_xhandle tmem_xhandle_fill(uint16_t client_id, 102 struct tmem_pool *pool, 103 struct tmem_oid *oidp, 104 uint32_t index) 105{ 106 struct tmem_xhandle xh; 107 xh.client_id = client_id; 108 xh.xh_data_cksum = (uint8_t)-1; 109 xh.xh_data_size = (uint16_t)-1; 110 xh.pool_id = pool->pool_id; 111 xh.oid = *oidp; 112 xh.index = index; 113 return xh; 114} 115 116static inline void tmem_oid_set_invalid(struct tmem_oid *oidp) 117{ 118 oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL; 119} 120 121static inline bool tmem_oid_valid(struct tmem_oid *oidp) 122{ 123 return oidp->oid[0] != -1UL || oidp->oid[1] != -1UL || 124 oidp->oid[2] != -1UL; 125} 126 127static inline int tmem_oid_compare(struct tmem_oid *left, 128 struct tmem_oid *right) 129{ 130 int ret; 131 132 if (left->oid[2] == right->oid[2]) { 133 if (left->oid[1] == right->oid[1]) { 134 if (left->oid[0] == right->oid[0]) 135 ret = 0; 136 else if (left->oid[0] < right->oid[0]) 137 ret = -1; 138 else 139 return 1; 140 } else if (left->oid[1] < right->oid[1]) 141 ret = -1; 142 else 143 ret = 1; 144 } else if (left->oid[2] < right->oid[2]) 145 ret = -1; 146 else 147 ret = 1; 148 return ret; 149} 150 151static inline unsigned tmem_oid_hash(struct tmem_oid *oidp) 152{ 153 return hash_long(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2], 154 TMEM_HASH_BUCKET_BITS); 155} 156 157/* 158 * A tmem_obj contains an identifier (oid), pointers to the parent 159 * pool and the rb_tree to which it belongs, counters, and an ordered 160 * set of pampds, structured in a radix-tree-like tree. The intermediate 161 * nodes of the tree are called tmem_objnodes. 162 */ 163 164struct tmem_objnode; 165 166struct tmem_obj { 167 struct tmem_oid oid; 168 struct tmem_pool *pool; 169 struct rb_node rb_tree_node; 170 struct tmem_objnode *objnode_tree_root; 171 unsigned int objnode_tree_height; 172 unsigned long objnode_count; 173 long pampd_count; 174 /* for current design of ramster, all pages belonging to 175 * an object reside on the same remotenode and extra is 176 * used to record the number of the remotenode so a 177 * flush-object operation can specify it */ 178 void *extra; /* for use by pampd implementation */ 179 DECL_SENTINEL 180}; 181 182#define OBJNODE_TREE_MAP_SHIFT 6 183#define OBJNODE_TREE_MAP_SIZE (1UL << OBJNODE_TREE_MAP_SHIFT) 184#define OBJNODE_TREE_MAP_MASK (OBJNODE_TREE_MAP_SIZE-1) 185#define OBJNODE_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 186#define OBJNODE_TREE_MAX_PATH \ 187 (OBJNODE_TREE_INDEX_BITS/OBJNODE_TREE_MAP_SHIFT + 2) 188 189struct tmem_objnode { 190 struct tmem_obj *obj; 191 DECL_SENTINEL 192 void *slots[OBJNODE_TREE_MAP_SIZE]; 193 unsigned int slots_in_use; 194}; 195 196/* pampd abstract datatype methods provided by the PAM implementation */ 197struct tmem_pamops { 198 void *(*create)(char *, size_t, bool, int, 199 struct tmem_pool *, struct tmem_oid *, uint32_t); 200 int (*get_data)(char *, size_t *, bool, void *, struct tmem_pool *, 201 struct tmem_oid *, uint32_t); 202 int (*get_data_and_free)(char *, size_t *, bool, void *, 203 struct tmem_pool *, struct tmem_oid *, 204 uint32_t); 205 void (*free)(void *, struct tmem_pool *, 206 struct tmem_oid *, uint32_t, bool); 207 void (*free_obj)(struct tmem_pool *, struct tmem_obj *); 208 bool (*is_remote)(void *); 209 void *(*repatriate_preload)(void *, struct tmem_pool *, 210 struct tmem_oid *, uint32_t, bool *); 211 int (*repatriate)(void *, void *, struct tmem_pool *, 212 struct tmem_oid *, uint32_t, bool, void *); 213 void (*new_obj)(struct tmem_obj *); 214 int (*replace_in_obj)(void *, struct tmem_obj *); 215}; 216extern void tmem_register_pamops(struct tmem_pamops *m); 217 218/* memory allocation methods provided by the host implementation */ 219struct tmem_hostops { 220 struct tmem_obj *(*obj_alloc)(struct tmem_pool *); 221 void (*obj_free)(struct tmem_obj *, struct tmem_pool *); 222 struct tmem_objnode *(*objnode_alloc)(struct tmem_pool *); 223 void (*objnode_free)(struct tmem_objnode *, struct tmem_pool *); 224}; 225extern void tmem_register_hostops(struct tmem_hostops *m); 226 227/* core tmem accessor functions */ 228extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index, 229 char *, size_t, bool, int); 230extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index, 231 char *, size_t *, bool, int); 232extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index, 233 void *); 234extern void *tmem_localify_get_pampd(struct tmem_pool *, struct tmem_oid *, 235 uint32_t index, struct tmem_obj **, 236 void **); 237extern void tmem_localify_finish(struct tmem_obj *, uint32_t index, 238 void *, void *, bool); 239extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *, 240 uint32_t index); 241extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *); 242extern int tmem_destroy_pool(struct tmem_pool *); 243extern void tmem_new_pool(struct tmem_pool *, uint32_t); 244#endif /* _TMEM_H */ 245