fld_cache.c revision 0a3bdb00710bf253ba8ba8f645645f22297c7a04
1/*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26/*
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2012, 2013, Intel Corporation.
31 */
32/*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/fld/fld_cache.c
37 *
38 * FLD (Fids Location Database)
39 *
40 * Author: Pravin Shelar <pravin.shelar@sun.com>
41 * Author: Yury Umanets <umka@clusterfs.com>
42 */
43
44#define DEBUG_SUBSYSTEM S_FLD
45
46# include <linux/libcfs/libcfs.h>
47# include <linux/module.h>
48# include <asm/div64.h>
49
50#include <obd.h>
51#include <obd_class.h>
52#include <lustre_ver.h>
53#include <obd_support.h>
54#include <lprocfs_status.h>
55
56#include <dt_object.h>
57#include <md_object.h>
58#include <lustre_req_layout.h>
59#include <lustre_fld.h>
60#include "fld_internal.h"
61
62/**
63 * create fld cache.
64 */
65struct fld_cache *fld_cache_init(const char *name,
66				 int cache_size, int cache_threshold)
67{
68	struct fld_cache *cache;
69
70	LASSERT(name != NULL);
71	LASSERT(cache_threshold < cache_size);
72
73	OBD_ALLOC_PTR(cache);
74	if (cache == NULL)
75		return ERR_PTR(-ENOMEM);
76
77	INIT_LIST_HEAD(&cache->fci_entries_head);
78	INIT_LIST_HEAD(&cache->fci_lru);
79
80	cache->fci_cache_count = 0;
81	rwlock_init(&cache->fci_lock);
82
83	strlcpy(cache->fci_name, name,
84		sizeof(cache->fci_name));
85
86	cache->fci_cache_size = cache_size;
87	cache->fci_threshold = cache_threshold;
88
89	/* Init fld cache info. */
90	memset(&cache->fci_stat, 0, sizeof(cache->fci_stat));
91
92	CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n",
93	       cache->fci_name, cache_size, cache_threshold);
94
95	return cache;
96}
97
98/**
99 * destroy fld cache.
100 */
101void fld_cache_fini(struct fld_cache *cache)
102{
103	__u64 pct;
104
105	LASSERT(cache != NULL);
106	fld_cache_flush(cache);
107
108	if (cache->fci_stat.fst_count > 0) {
109		pct = cache->fci_stat.fst_cache * 100;
110		do_div(pct, cache->fci_stat.fst_count);
111	} else {
112		pct = 0;
113	}
114
115	CDEBUG(D_INFO, "FLD cache statistics (%s):\n", cache->fci_name);
116	CDEBUG(D_INFO, "  Total reqs: "LPU64"\n", cache->fci_stat.fst_count);
117	CDEBUG(D_INFO, "  Cache reqs: "LPU64"\n", cache->fci_stat.fst_cache);
118	CDEBUG(D_INFO, "  Cache hits: "LPU64"%%\n", pct);
119
120	OBD_FREE_PTR(cache);
121}
122
123/**
124 * delete given node from list.
125 */
126void fld_cache_entry_delete(struct fld_cache *cache,
127			    struct fld_cache_entry *node)
128{
129	list_del(&node->fce_list);
130	list_del(&node->fce_lru);
131	cache->fci_cache_count--;
132	OBD_FREE_PTR(node);
133}
134
135/**
136 * fix list by checking new entry with NEXT entry in order.
137 */
138static void fld_fix_new_list(struct fld_cache *cache)
139{
140	struct fld_cache_entry *f_curr;
141	struct fld_cache_entry *f_next;
142	struct lu_seq_range *c_range;
143	struct lu_seq_range *n_range;
144	struct list_head *head = &cache->fci_entries_head;
145
146restart_fixup:
147
148	list_for_each_entry_safe(f_curr, f_next, head, fce_list) {
149		c_range = &f_curr->fce_range;
150		n_range = &f_next->fce_range;
151
152		LASSERT(range_is_sane(c_range));
153		if (&f_next->fce_list == head)
154			break;
155
156		if (c_range->lsr_flags != n_range->lsr_flags)
157			continue;
158
159		LASSERTF(c_range->lsr_start <= n_range->lsr_start,
160			 "cur lsr_start "DRANGE" next lsr_start "DRANGE"\n",
161			 PRANGE(c_range), PRANGE(n_range));
162
163		/* check merge possibility with next range */
164		if (c_range->lsr_end == n_range->lsr_start) {
165			if (c_range->lsr_index != n_range->lsr_index)
166				continue;
167			n_range->lsr_start = c_range->lsr_start;
168			fld_cache_entry_delete(cache, f_curr);
169			continue;
170		}
171
172		/* check if current range overlaps with next range. */
173		if (n_range->lsr_start < c_range->lsr_end) {
174			if (c_range->lsr_index == n_range->lsr_index) {
175				n_range->lsr_start = c_range->lsr_start;
176				n_range->lsr_end = max(c_range->lsr_end,
177						       n_range->lsr_end);
178				fld_cache_entry_delete(cache, f_curr);
179			} else {
180				if (n_range->lsr_end <= c_range->lsr_end) {
181					*n_range = *c_range;
182					fld_cache_entry_delete(cache, f_curr);
183				} else
184					n_range->lsr_start = c_range->lsr_end;
185			}
186
187			/* we could have overlap over next
188			 * range too. better restart. */
189			goto restart_fixup;
190		}
191
192		/* kill duplicates */
193		if (c_range->lsr_start == n_range->lsr_start &&
194		    c_range->lsr_end == n_range->lsr_end)
195			fld_cache_entry_delete(cache, f_curr);
196	}
197}
198
199/**
200 * add node to fld cache
201 */
202static inline void fld_cache_entry_add(struct fld_cache *cache,
203				       struct fld_cache_entry *f_new,
204				       struct list_head *pos)
205{
206	list_add(&f_new->fce_list, pos);
207	list_add(&f_new->fce_lru, &cache->fci_lru);
208
209	cache->fci_cache_count++;
210	fld_fix_new_list(cache);
211}
212
213/**
214 * Check if cache needs to be shrunk. If so - do it.
215 * Remove one entry in list and so on until cache is shrunk enough.
216 */
217static int fld_cache_shrink(struct fld_cache *cache)
218{
219	struct fld_cache_entry *flde;
220	struct list_head *curr;
221	int num = 0;
222
223	LASSERT(cache != NULL);
224
225	if (cache->fci_cache_count < cache->fci_cache_size)
226		return 0;
227
228	curr = cache->fci_lru.prev;
229
230	while (cache->fci_cache_count + cache->fci_threshold >
231	       cache->fci_cache_size && curr != &cache->fci_lru) {
232
233		flde = list_entry(curr, struct fld_cache_entry, fce_lru);
234		curr = curr->prev;
235		fld_cache_entry_delete(cache, flde);
236		num++;
237	}
238
239	CDEBUG(D_INFO, "%s: FLD cache - Shrunk by "
240	       "%d entries\n", cache->fci_name, num);
241
242	return 0;
243}
244
245/**
246 * kill all fld cache entries.
247 */
248void fld_cache_flush(struct fld_cache *cache)
249{
250	write_lock(&cache->fci_lock);
251	cache->fci_cache_size = 0;
252	fld_cache_shrink(cache);
253	write_unlock(&cache->fci_lock);
254}
255
256/**
257 * punch hole in existing range. divide this range and add new
258 * entry accordingly.
259 */
260
261void fld_cache_punch_hole(struct fld_cache *cache,
262			  struct fld_cache_entry *f_curr,
263			  struct fld_cache_entry *f_new)
264{
265	const struct lu_seq_range *range = &f_new->fce_range;
266	const seqno_t new_start  = range->lsr_start;
267	const seqno_t new_end  = range->lsr_end;
268	struct fld_cache_entry *fldt;
269
270	OBD_ALLOC_GFP(fldt, sizeof *fldt, GFP_ATOMIC);
271	if (!fldt) {
272		OBD_FREE_PTR(f_new);
273		/* overlap is not allowed, so dont mess up list. */
274		return;
275	}
276	/*  break f_curr RANGE into three RANGES:
277	 *	f_curr, f_new , fldt
278	 */
279
280	/* f_new = *range */
281
282	/* fldt */
283	fldt->fce_range.lsr_start = new_end;
284	fldt->fce_range.lsr_end = f_curr->fce_range.lsr_end;
285	fldt->fce_range.lsr_index = f_curr->fce_range.lsr_index;
286
287	/* f_curr */
288	f_curr->fce_range.lsr_end = new_start;
289
290	/* add these two entries to list */
291	fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
292	fld_cache_entry_add(cache, fldt, &f_new->fce_list);
293
294	/* no need to fixup */
295}
296
297/**
298 * handle range overlap in fld cache.
299 */
300static void fld_cache_overlap_handle(struct fld_cache *cache,
301				struct fld_cache_entry *f_curr,
302				struct fld_cache_entry *f_new)
303{
304	const struct lu_seq_range *range = &f_new->fce_range;
305	const seqno_t new_start  = range->lsr_start;
306	const seqno_t new_end  = range->lsr_end;
307	const mdsno_t mdt = range->lsr_index;
308
309	/* this is overlap case, these case are checking overlapping with
310	 * prev range only. fixup will handle overlaping with next range. */
311
312	if (f_curr->fce_range.lsr_index == mdt) {
313		f_curr->fce_range.lsr_start = min(f_curr->fce_range.lsr_start,
314						  new_start);
315
316		f_curr->fce_range.lsr_end = max(f_curr->fce_range.lsr_end,
317						new_end);
318
319		OBD_FREE_PTR(f_new);
320		fld_fix_new_list(cache);
321
322	} else if (new_start <= f_curr->fce_range.lsr_start &&
323			f_curr->fce_range.lsr_end <= new_end) {
324		/* case 1: new range completely overshadowed existing range.
325		 *	 e.g. whole range migrated. update fld cache entry */
326
327		f_curr->fce_range = *range;
328		OBD_FREE_PTR(f_new);
329		fld_fix_new_list(cache);
330
331	} else if (f_curr->fce_range.lsr_start < new_start &&
332			new_end < f_curr->fce_range.lsr_end) {
333		/* case 2: new range fit within existing range. */
334
335		fld_cache_punch_hole(cache, f_curr, f_new);
336
337	} else  if (new_end <= f_curr->fce_range.lsr_end) {
338		/* case 3: overlap:
339		 *	 [new_start [c_start  new_end)  c_end)
340		 */
341
342		LASSERT(new_start <= f_curr->fce_range.lsr_start);
343
344		f_curr->fce_range.lsr_start = new_end;
345		fld_cache_entry_add(cache, f_new, f_curr->fce_list.prev);
346
347	} else if (f_curr->fce_range.lsr_start <= new_start) {
348		/* case 4: overlap:
349		 *	 [c_start [new_start c_end) new_end)
350		 */
351
352		LASSERT(f_curr->fce_range.lsr_end <= new_end);
353
354		f_curr->fce_range.lsr_end = new_start;
355		fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
356	} else
357		CERROR("NEW range ="DRANGE" curr = "DRANGE"\n",
358		       PRANGE(range),PRANGE(&f_curr->fce_range));
359}
360
361struct fld_cache_entry
362*fld_cache_entry_create(const struct lu_seq_range *range)
363{
364	struct fld_cache_entry *f_new;
365
366	LASSERT(range_is_sane(range));
367
368	OBD_ALLOC_PTR(f_new);
369	if (!f_new)
370		return ERR_PTR(-ENOMEM);
371
372	f_new->fce_range = *range;
373	return f_new;
374}
375
376/**
377 * Insert FLD entry in FLD cache.
378 *
379 * This function handles all cases of merging and breaking up of
380 * ranges.
381 */
382int fld_cache_insert_nolock(struct fld_cache *cache,
383			    struct fld_cache_entry *f_new)
384{
385	struct fld_cache_entry *f_curr;
386	struct fld_cache_entry *n;
387	struct list_head *head;
388	struct list_head *prev = NULL;
389	const seqno_t new_start  = f_new->fce_range.lsr_start;
390	const seqno_t new_end  = f_new->fce_range.lsr_end;
391	__u32 new_flags  = f_new->fce_range.lsr_flags;
392
393	/*
394	 * Duplicate entries are eliminated in insert op.
395	 * So we don't need to search new entry before starting
396	 * insertion loop.
397	 */
398
399	if (!cache->fci_no_shrink)
400		fld_cache_shrink(cache);
401
402	head = &cache->fci_entries_head;
403
404	list_for_each_entry_safe(f_curr, n, head, fce_list) {
405		/* add list if next is end of list */
406		if (new_end < f_curr->fce_range.lsr_start ||
407		   (new_end == f_curr->fce_range.lsr_start &&
408		    new_flags != f_curr->fce_range.lsr_flags))
409			break;
410
411		prev = &f_curr->fce_list;
412		/* check if this range is to left of new range. */
413		if (new_start < f_curr->fce_range.lsr_end &&
414		    new_flags == f_curr->fce_range.lsr_flags) {
415			fld_cache_overlap_handle(cache, f_curr, f_new);
416			goto out;
417		}
418	}
419
420	if (prev == NULL)
421		prev = head;
422
423	CDEBUG(D_INFO, "insert range "DRANGE"\n", PRANGE(&f_new->fce_range));
424	/* Add new entry to cache and lru list. */
425	fld_cache_entry_add(cache, f_new, prev);
426out:
427	return 0;
428}
429
430int fld_cache_insert(struct fld_cache *cache,
431		     const struct lu_seq_range *range)
432{
433	struct fld_cache_entry	*flde;
434	int rc;
435
436	flde = fld_cache_entry_create(range);
437	if (IS_ERR(flde))
438		return PTR_ERR(flde);
439
440	write_lock(&cache->fci_lock);
441	rc = fld_cache_insert_nolock(cache, flde);
442	write_unlock(&cache->fci_lock);
443	if (rc)
444		OBD_FREE_PTR(flde);
445
446	return rc;
447}
448
449void fld_cache_delete_nolock(struct fld_cache *cache,
450		      const struct lu_seq_range *range)
451{
452	struct fld_cache_entry *flde;
453	struct fld_cache_entry *tmp;
454	struct list_head *head;
455
456	head = &cache->fci_entries_head;
457	list_for_each_entry_safe(flde, tmp, head, fce_list) {
458		/* add list if next is end of list */
459		if (range->lsr_start == flde->fce_range.lsr_start ||
460		   (range->lsr_end == flde->fce_range.lsr_end &&
461		    range->lsr_flags == flde->fce_range.lsr_flags)) {
462			fld_cache_entry_delete(cache, flde);
463			break;
464		}
465	}
466}
467
468/**
469 * Delete FLD entry in FLD cache.
470 *
471 */
472void fld_cache_delete(struct fld_cache *cache,
473		      const struct lu_seq_range *range)
474{
475	write_lock(&cache->fci_lock);
476	fld_cache_delete_nolock(cache, range);
477	write_unlock(&cache->fci_lock);
478}
479
480struct fld_cache_entry
481*fld_cache_entry_lookup_nolock(struct fld_cache *cache,
482			      struct lu_seq_range *range)
483{
484	struct fld_cache_entry *flde;
485	struct fld_cache_entry *got = NULL;
486	struct list_head *head;
487
488	head = &cache->fci_entries_head;
489	list_for_each_entry(flde, head, fce_list) {
490		if (range->lsr_start == flde->fce_range.lsr_start ||
491		   (range->lsr_end == flde->fce_range.lsr_end &&
492		    range->lsr_flags == flde->fce_range.lsr_flags)) {
493			got = flde;
494			break;
495		}
496	}
497
498	return got;
499}
500
501/**
502 * lookup \a seq sequence for range in fld cache.
503 */
504struct fld_cache_entry
505*fld_cache_entry_lookup(struct fld_cache *cache, struct lu_seq_range *range)
506{
507	struct fld_cache_entry *got = NULL;
508
509	read_lock(&cache->fci_lock);
510	got = fld_cache_entry_lookup_nolock(cache, range);
511	read_unlock(&cache->fci_lock);
512	return got;
513}
514
515/**
516 * lookup \a seq sequence for range in fld cache.
517 */
518int fld_cache_lookup(struct fld_cache *cache,
519		     const seqno_t seq, struct lu_seq_range *range)
520{
521	struct fld_cache_entry *flde;
522	struct fld_cache_entry *prev = NULL;
523	struct list_head *head;
524
525	read_lock(&cache->fci_lock);
526	head = &cache->fci_entries_head;
527
528	cache->fci_stat.fst_count++;
529	list_for_each_entry(flde, head, fce_list) {
530		if (flde->fce_range.lsr_start > seq) {
531			if (prev != NULL)
532				*range = prev->fce_range;
533			break;
534		}
535
536		prev = flde;
537		if (range_within(&flde->fce_range, seq)) {
538			*range = flde->fce_range;
539
540			cache->fci_stat.fst_cache++;
541			read_unlock(&cache->fci_lock);
542			return 0;
543		}
544	}
545	read_unlock(&cache->fci_lock);
546	return -ENOENT;
547}
548