prio_tree.c revision 268bb0ce3e87872cb9290c322b0d35bce230d88f
1/*
2 * mm/prio_tree.c - priority search tree for mapping->i_mmap
3 *
4 * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
5 *
6 * This file is released under the GPL v2.
7 *
8 * Based on the radix priority search tree proposed by Edward M. McCreight
9 * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
10 *
11 * 02Feb2004	Initial version
12 */
13
14#include <linux/mm.h>
15#include <linux/prio_tree.h>
16#include <linux/prefetch.h>
17
18/*
19 * See lib/prio_tree.c for details on the general radix priority search tree
20 * code.
21 */
22
23/*
24 * The following #defines are mirrored from lib/prio_tree.c. They're only used
25 * for debugging, and should be removed (along with the debugging code using
26 * them) when switching also VMAs to the regular prio_tree code.
27 */
28
29#define RADIX_INDEX(vma)  ((vma)->vm_pgoff)
30#define VMA_SIZE(vma)	  (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
31/* avoid overflow */
32#define HEAP_INDEX(vma)   ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
33
34/*
35 * Radix priority search tree for address_space->i_mmap
36 *
37 * For each vma that map a unique set of file pages i.e., unique [radix_index,
38 * heap_index] value, we have a corresponding priority search tree node. If
39 * multiple vmas have identical [radix_index, heap_index] value, then one of
40 * them is used as a tree node and others are stored in a vm_set list. The tree
41 * node points to the first vma (head) of the list using vm_set.head.
42 *
43 * prio_tree_root
44 *      |
45 *      A       vm_set.head
46 *     / \      /
47 *    L   R -> H-I-J-K-M-N-O-P-Q-S
48 *    ^   ^    <-- vm_set.list -->
49 *  tree nodes
50 *
51 * We need some way to identify whether a vma is a tree node, head of a vm_set
52 * list, or just a member of a vm_set list. We cannot use vm_flags to store
53 * such information. The reason is, in the above figure, it is possible that
54 * vm_flags' of R and H are covered by the different mmap_sems. When R is
55 * removed under R->mmap_sem, H replaces R as a tree node. Since we do not hold
56 * H->mmap_sem, we cannot use H->vm_flags for marking that H is a tree node now.
57 * That's why some trick involving shared.vm_set.parent is used for identifying
58 * tree nodes and list head nodes.
59 *
60 * vma radix priority search tree node rules:
61 *
62 * vma->shared.vm_set.parent != NULL    ==> a tree node
63 *      vma->shared.vm_set.head != NULL ==> list of others mapping same range
64 *      vma->shared.vm_set.head == NULL ==> no others map the same range
65 *
66 * vma->shared.vm_set.parent == NULL
67 * 	vma->shared.vm_set.head != NULL ==> list head of vmas mapping same range
68 * 	vma->shared.vm_set.head == NULL ==> a list node
69 */
70
71/*
72 * Add a new vma known to map the same set of pages as the old vma:
73 * useful for fork's dup_mmap as well as vma_prio_tree_insert below.
74 * Note that it just happens to work correctly on i_mmap_nonlinear too.
75 */
76void vma_prio_tree_add(struct vm_area_struct *vma, struct vm_area_struct *old)
77{
78	/* Leave these BUG_ONs till prio_tree patch stabilizes */
79	BUG_ON(RADIX_INDEX(vma) != RADIX_INDEX(old));
80	BUG_ON(HEAP_INDEX(vma) != HEAP_INDEX(old));
81
82	vma->shared.vm_set.head = NULL;
83	vma->shared.vm_set.parent = NULL;
84
85	if (!old->shared.vm_set.parent)
86		list_add(&vma->shared.vm_set.list,
87				&old->shared.vm_set.list);
88	else if (old->shared.vm_set.head)
89		list_add_tail(&vma->shared.vm_set.list,
90				&old->shared.vm_set.head->shared.vm_set.list);
91	else {
92		INIT_LIST_HEAD(&vma->shared.vm_set.list);
93		vma->shared.vm_set.head = old;
94		old->shared.vm_set.head = vma;
95	}
96}
97
98void vma_prio_tree_insert(struct vm_area_struct *vma,
99			  struct prio_tree_root *root)
100{
101	struct prio_tree_node *ptr;
102	struct vm_area_struct *old;
103
104	vma->shared.vm_set.head = NULL;
105
106	ptr = raw_prio_tree_insert(root, &vma->shared.prio_tree_node);
107	if (ptr != (struct prio_tree_node *) &vma->shared.prio_tree_node) {
108		old = prio_tree_entry(ptr, struct vm_area_struct,
109					shared.prio_tree_node);
110		vma_prio_tree_add(vma, old);
111	}
112}
113
114void vma_prio_tree_remove(struct vm_area_struct *vma,
115			  struct prio_tree_root *root)
116{
117	struct vm_area_struct *node, *head, *new_head;
118
119	if (!vma->shared.vm_set.head) {
120		if (!vma->shared.vm_set.parent)
121			list_del_init(&vma->shared.vm_set.list);
122		else
123			raw_prio_tree_remove(root, &vma->shared.prio_tree_node);
124	} else {
125		/* Leave this BUG_ON till prio_tree patch stabilizes */
126		BUG_ON(vma->shared.vm_set.head->shared.vm_set.head != vma);
127		if (vma->shared.vm_set.parent) {
128			head = vma->shared.vm_set.head;
129			if (!list_empty(&head->shared.vm_set.list)) {
130				new_head = list_entry(
131					head->shared.vm_set.list.next,
132					struct vm_area_struct,
133					shared.vm_set.list);
134				list_del_init(&head->shared.vm_set.list);
135			} else
136				new_head = NULL;
137
138			raw_prio_tree_replace(root, &vma->shared.prio_tree_node,
139					&head->shared.prio_tree_node);
140			head->shared.vm_set.head = new_head;
141			if (new_head)
142				new_head->shared.vm_set.head = head;
143
144		} else {
145			node = vma->shared.vm_set.head;
146			if (!list_empty(&vma->shared.vm_set.list)) {
147				new_head = list_entry(
148					vma->shared.vm_set.list.next,
149					struct vm_area_struct,
150					shared.vm_set.list);
151				list_del_init(&vma->shared.vm_set.list);
152				node->shared.vm_set.head = new_head;
153				new_head->shared.vm_set.head = node;
154			} else
155				node->shared.vm_set.head = NULL;
156		}
157	}
158}
159
160/*
161 * Helper function to enumerate vmas that map a given file page or a set of
162 * contiguous file pages. The function returns vmas that at least map a single
163 * page in the given range of contiguous file pages.
164 */
165struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
166					struct prio_tree_iter *iter)
167{
168	struct prio_tree_node *ptr;
169	struct vm_area_struct *next;
170
171	if (!vma) {
172		/*
173		 * First call is with NULL vma
174		 */
175		ptr = prio_tree_next(iter);
176		if (ptr) {
177			next = prio_tree_entry(ptr, struct vm_area_struct,
178						shared.prio_tree_node);
179			prefetch(next->shared.vm_set.head);
180			return next;
181		} else
182			return NULL;
183	}
184
185	if (vma->shared.vm_set.parent) {
186		if (vma->shared.vm_set.head) {
187			next = vma->shared.vm_set.head;
188			prefetch(next->shared.vm_set.list.next);
189			return next;
190		}
191	} else {
192		next = list_entry(vma->shared.vm_set.list.next,
193				struct vm_area_struct, shared.vm_set.list);
194		if (!next->shared.vm_set.head) {
195			prefetch(next->shared.vm_set.list.next);
196			return next;
197		}
198	}
199
200	ptr = prio_tree_next(iter);
201	if (ptr) {
202		next = prio_tree_entry(ptr, struct vm_area_struct,
203					shared.prio_tree_node);
204		prefetch(next->shared.vm_set.head);
205		return next;
206	} else
207		return NULL;
208}
209