1/*
2 * tmem.h
3 *
4 * Transcendent memory
5 *
6 * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
7 */
8
9#ifndef _TMEM_H_
10#define _TMEM_H_
11
12#include <linux/highmem.h>
13#include <linux/hash.h>
14#include <linux/atomic.h>
15
16/*
17 * These are pre-defined by the Xen<->Linux ABI
18 */
19#define TMEM_PUT_PAGE			4
20#define TMEM_GET_PAGE			5
21#define TMEM_FLUSH_PAGE			6
22#define TMEM_FLUSH_OBJECT		7
23#define TMEM_POOL_PERSIST		1
24#define TMEM_POOL_SHARED		2
25#define TMEM_POOL_PRECOMPRESSED		4
26#define TMEM_POOL_PAGESIZE_SHIFT	4
27#define TMEM_POOL_PAGESIZE_MASK		0xf
28#define TMEM_POOL_RESERVED_BITS		0x00ffff00
29
30/*
31 * sentinels have proven very useful for debugging but can be removed
32 * or disabled before final merge.
33 */
34#define SENTINELS
35#ifdef SENTINELS
36#define DECL_SENTINEL uint32_t sentinel;
37#define SET_SENTINEL(_x, _y) (_x->sentinel = _y##_SENTINEL)
38#define INVERT_SENTINEL(_x, _y) (_x->sentinel = ~_y##_SENTINEL)
39#define ASSERT_SENTINEL(_x, _y) WARN_ON(_x->sentinel != _y##_SENTINEL)
40#define ASSERT_INVERTED_SENTINEL(_x, _y) WARN_ON(_x->sentinel != ~_y##_SENTINEL)
41#else
42#define DECL_SENTINEL
43#define SET_SENTINEL(_x, _y) do { } while (0)
44#define INVERT_SENTINEL(_x, _y) do { } while (0)
45#define ASSERT_SENTINEL(_x, _y) do { } while (0)
46#define ASSERT_INVERTED_SENTINEL(_x, _y) do { } while (0)
47#endif
48
49#define ASSERT_SPINLOCK(_l)	WARN_ON(!spin_is_locked(_l))
50
51/*
52 * A pool is the highest-level data structure managed by tmem and
53 * usually corresponds to a large independent set of pages such as
54 * a filesystem.  Each pool has an id, and certain attributes and counters.
55 * It also contains a set of hash buckets, each of which contains an rbtree
56 * of objects and a lock to manage concurrency within the pool.
57 */
58
59#define TMEM_HASH_BUCKET_BITS	8
60#define TMEM_HASH_BUCKETS	(1<<TMEM_HASH_BUCKET_BITS)
61
62struct tmem_hashbucket {
63	struct rb_root obj_rb_root;
64	spinlock_t lock;
65};
66
67struct tmem_pool {
68	void *client; /* "up" for some clients, avoids table lookup */
69	struct list_head pool_list;
70	uint32_t pool_id;
71	bool persistent;
72	bool shared;
73	atomic_t obj_count;
74	atomic_t refcount;
75	struct tmem_hashbucket hashbucket[TMEM_HASH_BUCKETS];
76	DECL_SENTINEL
77};
78
79#define is_persistent(_p)  (_p->persistent)
80#define is_ephemeral(_p)   (!(_p->persistent))
81
82/*
83 * An object id ("oid") is large: 192-bits (to ensure, for example, files
84 * in a modern filesystem can be uniquely identified).
85 */
86
87struct tmem_oid {
88	uint64_t oid[3];
89};
90
91struct tmem_xhandle {
92	uint8_t client_id;
93	uint8_t xh_data_cksum;
94	uint16_t xh_data_size;
95	uint16_t pool_id;
96	struct tmem_oid oid;
97	uint32_t index;
98	void *extra;
99};
100
101static inline struct tmem_xhandle tmem_xhandle_fill(uint16_t client_id,
102					struct tmem_pool *pool,
103					struct tmem_oid *oidp,
104					uint32_t index)
105{
106	struct tmem_xhandle xh;
107	xh.client_id = client_id;
108	xh.xh_data_cksum = (uint8_t)-1;
109	xh.xh_data_size = (uint16_t)-1;
110	xh.pool_id = pool->pool_id;
111	xh.oid = *oidp;
112	xh.index = index;
113	return xh;
114}
115
116static inline void tmem_oid_set_invalid(struct tmem_oid *oidp)
117{
118	oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL;
119}
120
121static inline bool tmem_oid_valid(struct tmem_oid *oidp)
122{
123	return oidp->oid[0] != -1UL || oidp->oid[1] != -1UL ||
124		oidp->oid[2] != -1UL;
125}
126
127static inline int tmem_oid_compare(struct tmem_oid *left,
128					struct tmem_oid *right)
129{
130	int ret;
131
132	if (left->oid[2] == right->oid[2]) {
133		if (left->oid[1] == right->oid[1]) {
134			if (left->oid[0] == right->oid[0])
135				ret = 0;
136			else if (left->oid[0] < right->oid[0])
137				ret = -1;
138			else
139				return 1;
140		} else if (left->oid[1] < right->oid[1])
141			ret = -1;
142		else
143			ret = 1;
144	} else if (left->oid[2] < right->oid[2])
145		ret = -1;
146	else
147		ret = 1;
148	return ret;
149}
150
151static inline unsigned tmem_oid_hash(struct tmem_oid *oidp)
152{
153	return hash_long(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2],
154				TMEM_HASH_BUCKET_BITS);
155}
156
157/*
158 * A tmem_obj contains an identifier (oid), pointers to the parent
159 * pool and the rb_tree to which it belongs, counters, and an ordered
160 * set of pampds, structured in a radix-tree-like tree.  The intermediate
161 * nodes of the tree are called tmem_objnodes.
162 */
163
164struct tmem_objnode;
165
166struct tmem_obj {
167	struct tmem_oid oid;
168	struct tmem_pool *pool;
169	struct rb_node rb_tree_node;
170	struct tmem_objnode *objnode_tree_root;
171	unsigned int objnode_tree_height;
172	unsigned long objnode_count;
173	long pampd_count;
174	/* for current design of ramster, all pages belonging to
175	 * an object reside on the same remotenode and extra is
176	 * used to record the number of the remotenode so a
177	 * flush-object operation can specify it */
178	void *extra; /* for use by pampd implementation */
179	DECL_SENTINEL
180};
181
182#define OBJNODE_TREE_MAP_SHIFT 6
183#define OBJNODE_TREE_MAP_SIZE (1UL << OBJNODE_TREE_MAP_SHIFT)
184#define OBJNODE_TREE_MAP_MASK (OBJNODE_TREE_MAP_SIZE-1)
185#define OBJNODE_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
186#define OBJNODE_TREE_MAX_PATH \
187		(OBJNODE_TREE_INDEX_BITS/OBJNODE_TREE_MAP_SHIFT + 2)
188
189struct tmem_objnode {
190	struct tmem_obj *obj;
191	DECL_SENTINEL
192	void *slots[OBJNODE_TREE_MAP_SIZE];
193	unsigned int slots_in_use;
194};
195
196/* pampd abstract datatype methods provided by the PAM implementation */
197struct tmem_pamops {
198	void *(*create)(char *, size_t, bool, int,
199			struct tmem_pool *, struct tmem_oid *, uint32_t);
200	int (*get_data)(char *, size_t *, bool, void *, struct tmem_pool *,
201				struct tmem_oid *, uint32_t);
202	int (*get_data_and_free)(char *, size_t *, bool, void *,
203				struct tmem_pool *, struct tmem_oid *,
204				uint32_t);
205	void (*free)(void *, struct tmem_pool *,
206				struct tmem_oid *, uint32_t, bool);
207	void (*free_obj)(struct tmem_pool *, struct tmem_obj *);
208	bool (*is_remote)(void *);
209	void *(*repatriate_preload)(void *, struct tmem_pool *,
210					struct tmem_oid *, uint32_t, bool *);
211	int (*repatriate)(void *, void *, struct tmem_pool *,
212				struct tmem_oid *, uint32_t, bool, void *);
213	void (*new_obj)(struct tmem_obj *);
214	int (*replace_in_obj)(void *, struct tmem_obj *);
215};
216extern void tmem_register_pamops(struct tmem_pamops *m);
217
218/* memory allocation methods provided by the host implementation */
219struct tmem_hostops {
220	struct tmem_obj *(*obj_alloc)(struct tmem_pool *);
221	void (*obj_free)(struct tmem_obj *, struct tmem_pool *);
222	struct tmem_objnode *(*objnode_alloc)(struct tmem_pool *);
223	void (*objnode_free)(struct tmem_objnode *, struct tmem_pool *);
224};
225extern void tmem_register_hostops(struct tmem_hostops *m);
226
227/* core tmem accessor functions */
228extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index,
229			char *, size_t, bool, int);
230extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index,
231			char *, size_t *, bool, int);
232extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index,
233			void *);
234extern void *tmem_localify_get_pampd(struct tmem_pool *, struct tmem_oid *,
235				   uint32_t index, struct tmem_obj **,
236				   void **);
237extern void tmem_localify_finish(struct tmem_obj *, uint32_t index,
238				 void *, void *, bool);
239extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *,
240			uint32_t index);
241extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *);
242extern int tmem_destroy_pool(struct tmem_pool *);
243extern void tmem_new_pool(struct tmem_pool *, uint32_t);
244#endif /* _TMEM_H */
245