hugetlb.c revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2
1/* 2 * Generic hugetlb support. 3 * (C) William Irwin, April 2004 4 */ 5#include <linux/gfp.h> 6#include <linux/list.h> 7#include <linux/init.h> 8#include <linux/module.h> 9#include <linux/mm.h> 10#include <linux/hugetlb.h> 11#include <linux/sysctl.h> 12#include <linux/highmem.h> 13#include <linux/nodemask.h> 14 15const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; 16static unsigned long nr_huge_pages, free_huge_pages; 17unsigned long max_huge_pages; 18static struct list_head hugepage_freelists[MAX_NUMNODES]; 19static unsigned int nr_huge_pages_node[MAX_NUMNODES]; 20static unsigned int free_huge_pages_node[MAX_NUMNODES]; 21static DEFINE_SPINLOCK(hugetlb_lock); 22 23static void enqueue_huge_page(struct page *page) 24{ 25 int nid = page_to_nid(page); 26 list_add(&page->lru, &hugepage_freelists[nid]); 27 free_huge_pages++; 28 free_huge_pages_node[nid]++; 29} 30 31static struct page *dequeue_huge_page(void) 32{ 33 int nid = numa_node_id(); 34 struct page *page = NULL; 35 36 if (list_empty(&hugepage_freelists[nid])) { 37 for (nid = 0; nid < MAX_NUMNODES; ++nid) 38 if (!list_empty(&hugepage_freelists[nid])) 39 break; 40 } 41 if (nid >= 0 && nid < MAX_NUMNODES && 42 !list_empty(&hugepage_freelists[nid])) { 43 page = list_entry(hugepage_freelists[nid].next, 44 struct page, lru); 45 list_del(&page->lru); 46 free_huge_pages--; 47 free_huge_pages_node[nid]--; 48 } 49 return page; 50} 51 52static struct page *alloc_fresh_huge_page(void) 53{ 54 static int nid = 0; 55 struct page *page; 56 page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, 57 HUGETLB_PAGE_ORDER); 58 nid = (nid + 1) % num_online_nodes(); 59 if (page) { 60 nr_huge_pages++; 61 nr_huge_pages_node[page_to_nid(page)]++; 62 } 63 return page; 64} 65 66void free_huge_page(struct page *page) 67{ 68 BUG_ON(page_count(page)); 69 70 INIT_LIST_HEAD(&page->lru); 71 page[1].mapping = NULL; 72 73 spin_lock(&hugetlb_lock); 74 enqueue_huge_page(page); 75 spin_unlock(&hugetlb_lock); 76} 77 78struct page *alloc_huge_page(void) 79{ 80 struct page *page; 81 int i; 82 83 spin_lock(&hugetlb_lock); 84 page = dequeue_huge_page(); 85 if (!page) { 86 spin_unlock(&hugetlb_lock); 87 return NULL; 88 } 89 spin_unlock(&hugetlb_lock); 90 set_page_count(page, 1); 91 page[1].mapping = (void *)free_huge_page; 92 for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i) 93 clear_highpage(&page[i]); 94 return page; 95} 96 97static int __init hugetlb_init(void) 98{ 99 unsigned long i; 100 struct page *page; 101 102 for (i = 0; i < MAX_NUMNODES; ++i) 103 INIT_LIST_HEAD(&hugepage_freelists[i]); 104 105 for (i = 0; i < max_huge_pages; ++i) { 106 page = alloc_fresh_huge_page(); 107 if (!page) 108 break; 109 spin_lock(&hugetlb_lock); 110 enqueue_huge_page(page); 111 spin_unlock(&hugetlb_lock); 112 } 113 max_huge_pages = free_huge_pages = nr_huge_pages = i; 114 printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); 115 return 0; 116} 117module_init(hugetlb_init); 118 119static int __init hugetlb_setup(char *s) 120{ 121 if (sscanf(s, "%lu", &max_huge_pages) <= 0) 122 max_huge_pages = 0; 123 return 1; 124} 125__setup("hugepages=", hugetlb_setup); 126 127#ifdef CONFIG_SYSCTL 128static void update_and_free_page(struct page *page) 129{ 130 int i; 131 nr_huge_pages--; 132 nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--; 133 for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { 134 page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 135 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | 136 1 << PG_private | 1<< PG_writeback); 137 set_page_count(&page[i], 0); 138 } 139 set_page_count(page, 1); 140 __free_pages(page, HUGETLB_PAGE_ORDER); 141} 142 143#ifdef CONFIG_HIGHMEM 144static void try_to_free_low(unsigned long count) 145{ 146 int i, nid; 147 for (i = 0; i < MAX_NUMNODES; ++i) { 148 struct page *page, *next; 149 list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) { 150 if (PageHighMem(page)) 151 continue; 152 list_del(&page->lru); 153 update_and_free_page(page); 154 nid = page_zone(page)->zone_pgdat->node_id; 155 free_huge_pages--; 156 free_huge_pages_node[nid]--; 157 if (count >= nr_huge_pages) 158 return; 159 } 160 } 161} 162#else 163static inline void try_to_free_low(unsigned long count) 164{ 165} 166#endif 167 168static unsigned long set_max_huge_pages(unsigned long count) 169{ 170 while (count > nr_huge_pages) { 171 struct page *page = alloc_fresh_huge_page(); 172 if (!page) 173 return nr_huge_pages; 174 spin_lock(&hugetlb_lock); 175 enqueue_huge_page(page); 176 spin_unlock(&hugetlb_lock); 177 } 178 if (count >= nr_huge_pages) 179 return nr_huge_pages; 180 181 spin_lock(&hugetlb_lock); 182 try_to_free_low(count); 183 while (count < nr_huge_pages) { 184 struct page *page = dequeue_huge_page(); 185 if (!page) 186 break; 187 update_and_free_page(page); 188 } 189 spin_unlock(&hugetlb_lock); 190 return nr_huge_pages; 191} 192 193int hugetlb_sysctl_handler(struct ctl_table *table, int write, 194 struct file *file, void __user *buffer, 195 size_t *length, loff_t *ppos) 196{ 197 proc_doulongvec_minmax(table, write, file, buffer, length, ppos); 198 max_huge_pages = set_max_huge_pages(max_huge_pages); 199 return 0; 200} 201#endif /* CONFIG_SYSCTL */ 202 203int hugetlb_report_meminfo(char *buf) 204{ 205 return sprintf(buf, 206 "HugePages_Total: %5lu\n" 207 "HugePages_Free: %5lu\n" 208 "Hugepagesize: %5lu kB\n", 209 nr_huge_pages, 210 free_huge_pages, 211 HPAGE_SIZE/1024); 212} 213 214int hugetlb_report_node_meminfo(int nid, char *buf) 215{ 216 return sprintf(buf, 217 "Node %d HugePages_Total: %5u\n" 218 "Node %d HugePages_Free: %5u\n", 219 nid, nr_huge_pages_node[nid], 220 nid, free_huge_pages_node[nid]); 221} 222 223int is_hugepage_mem_enough(size_t size) 224{ 225 return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages; 226} 227 228/* Return the number pages of memory we physically have, in PAGE_SIZE units. */ 229unsigned long hugetlb_total_pages(void) 230{ 231 return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE); 232} 233EXPORT_SYMBOL(hugetlb_total_pages); 234 235/* 236 * We cannot handle pagefaults against hugetlb pages at all. They cause 237 * handle_mm_fault() to try to instantiate regular-sized pages in the 238 * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get 239 * this far. 240 */ 241static struct page *hugetlb_nopage(struct vm_area_struct *vma, 242 unsigned long address, int *unused) 243{ 244 BUG(); 245 return NULL; 246} 247 248struct vm_operations_struct hugetlb_vm_ops = { 249 .nopage = hugetlb_nopage, 250}; 251 252void zap_hugepage_range(struct vm_area_struct *vma, 253 unsigned long start, unsigned long length) 254{ 255 struct mm_struct *mm = vma->vm_mm; 256 257 spin_lock(&mm->page_table_lock); 258 unmap_hugepage_range(vma, start, start + length); 259 spin_unlock(&mm->page_table_lock); 260} 261