balloon.c revision 06f521d5d67a23d91add6f1beb73edc6f2b70d0c
1/****************************************************************************** 2 * balloon.c 3 * 4 * Xen balloon driver - enables returning/claiming memory to/from Xen. 5 * 6 * Copyright (c) 2003, B Dragovic 7 * Copyright (c) 2003-2004, M Williamson, K Fraser 8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35#include <linux/kernel.h> 36#include <linux/module.h> 37#include <linux/sched.h> 38#include <linux/errno.h> 39#include <linux/mm.h> 40#include <linux/bootmem.h> 41#include <linux/pagemap.h> 42#include <linux/highmem.h> 43#include <linux/mutex.h> 44#include <linux/list.h> 45#include <linux/sysdev.h> 46#include <linux/gfp.h> 47 48#include <asm/page.h> 49#include <asm/pgalloc.h> 50#include <asm/pgtable.h> 51#include <asm/uaccess.h> 52#include <asm/tlb.h> 53#include <asm/e820.h> 54 55#include <asm/xen/hypervisor.h> 56#include <asm/xen/hypercall.h> 57 58#include <xen/xen.h> 59#include <xen/interface/xen.h> 60#include <xen/interface/memory.h> 61#include <xen/xenbus.h> 62#include <xen/features.h> 63#include <xen/page.h> 64 65#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) 66 67#define BALLOON_CLASS_NAME "xen_memory" 68 69struct balloon_stats { 70 /* We aim for 'current allocation' == 'target allocation'. */ 71 unsigned long current_pages; 72 unsigned long target_pages; 73 /* Number of pages in high- and low-memory balloons. */ 74 unsigned long balloon_low; 75 unsigned long balloon_high; 76}; 77 78static DEFINE_MUTEX(balloon_mutex); 79 80static struct sys_device balloon_sysdev; 81 82static int register_balloon(struct sys_device *sysdev); 83 84static struct balloon_stats balloon_stats; 85 86/* We increase/decrease in batches which fit in a page */ 87static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 88 89#ifdef CONFIG_HIGHMEM 90#define inc_totalhigh_pages() (totalhigh_pages++) 91#define dec_totalhigh_pages() (totalhigh_pages--) 92#else 93#define inc_totalhigh_pages() do {} while(0) 94#define dec_totalhigh_pages() do {} while(0) 95#endif 96 97/* List of ballooned pages, threaded through the mem_map array. */ 98static LIST_HEAD(ballooned_pages); 99 100/* Main work function, always executed in process context. */ 101static void balloon_process(struct work_struct *work); 102static DECLARE_WORK(balloon_worker, balloon_process); 103static struct timer_list balloon_timer; 104 105/* When ballooning out (allocating memory to return to Xen) we don't really 106 want the kernel to try too hard since that can trigger the oom killer. */ 107#define GFP_BALLOON \ 108 (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC) 109 110static void scrub_page(struct page *page) 111{ 112#ifdef CONFIG_XEN_SCRUB_PAGES 113 clear_highpage(page); 114#endif 115} 116 117/* balloon_append: add the given page to the balloon. */ 118static void __balloon_append(struct page *page) 119{ 120 /* Lowmem is re-populated first, so highmem pages go at list tail. */ 121 if (PageHighMem(page)) { 122 list_add_tail(&page->lru, &ballooned_pages); 123 balloon_stats.balloon_high++; 124 dec_totalhigh_pages(); 125 } else { 126 list_add(&page->lru, &ballooned_pages); 127 balloon_stats.balloon_low++; 128 } 129} 130 131static void balloon_append(struct page *page) 132{ 133 __balloon_append(page); 134 totalram_pages--; 135} 136 137/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ 138static struct page *balloon_retrieve(void) 139{ 140 struct page *page; 141 142 if (list_empty(&ballooned_pages)) 143 return NULL; 144 145 page = list_entry(ballooned_pages.next, struct page, lru); 146 list_del(&page->lru); 147 148 if (PageHighMem(page)) { 149 balloon_stats.balloon_high--; 150 inc_totalhigh_pages(); 151 } 152 else 153 balloon_stats.balloon_low--; 154 155 totalram_pages++; 156 157 return page; 158} 159 160static struct page *balloon_first_page(void) 161{ 162 if (list_empty(&ballooned_pages)) 163 return NULL; 164 return list_entry(ballooned_pages.next, struct page, lru); 165} 166 167static struct page *balloon_next_page(struct page *page) 168{ 169 struct list_head *next = page->lru.next; 170 if (next == &ballooned_pages) 171 return NULL; 172 return list_entry(next, struct page, lru); 173} 174 175static void balloon_alarm(unsigned long unused) 176{ 177 schedule_work(&balloon_worker); 178} 179 180static unsigned long current_target(void) 181{ 182 unsigned long target = balloon_stats.target_pages; 183 184 target = min(target, 185 balloon_stats.current_pages + 186 balloon_stats.balloon_low + 187 balloon_stats.balloon_high); 188 189 return target; 190} 191 192static int increase_reservation(unsigned long nr_pages) 193{ 194 unsigned long pfn, i; 195 struct page *page; 196 long rc; 197 struct xen_memory_reservation reservation = { 198 .address_bits = 0, 199 .extent_order = 0, 200 .domid = DOMID_SELF 201 }; 202 203 if (nr_pages > ARRAY_SIZE(frame_list)) 204 nr_pages = ARRAY_SIZE(frame_list); 205 206 page = balloon_first_page(); 207 for (i = 0; i < nr_pages; i++) { 208 BUG_ON(page == NULL); 209 frame_list[i] = page_to_pfn(page); 210 page = balloon_next_page(page); 211 } 212 213 set_xen_guest_handle(reservation.extent_start, frame_list); 214 reservation.nr_extents = nr_pages; 215 rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); 216 if (rc < 0) 217 goto out; 218 219 for (i = 0; i < rc; i++) { 220 page = balloon_retrieve(); 221 BUG_ON(page == NULL); 222 223 pfn = page_to_pfn(page); 224 BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && 225 phys_to_machine_mapping_valid(pfn)); 226 227 set_phys_to_machine(pfn, frame_list[i]); 228 229 /* Link back into the page tables if not highmem. */ 230 if (pfn < max_low_pfn) { 231 int ret; 232 ret = HYPERVISOR_update_va_mapping( 233 (unsigned long)__va(pfn << PAGE_SHIFT), 234 mfn_pte(frame_list[i], PAGE_KERNEL), 235 0); 236 BUG_ON(ret); 237 } 238 239 /* Relinquish the page back to the allocator. */ 240 ClearPageReserved(page); 241 init_page_count(page); 242 __free_page(page); 243 } 244 245 balloon_stats.current_pages += rc; 246 247 out: 248 return rc < 0 ? rc : rc != nr_pages; 249} 250 251static int decrease_reservation(unsigned long nr_pages) 252{ 253 unsigned long pfn, i; 254 struct page *page; 255 int need_sleep = 0; 256 int ret; 257 struct xen_memory_reservation reservation = { 258 .address_bits = 0, 259 .extent_order = 0, 260 .domid = DOMID_SELF 261 }; 262 263 if (nr_pages > ARRAY_SIZE(frame_list)) 264 nr_pages = ARRAY_SIZE(frame_list); 265 266 for (i = 0; i < nr_pages; i++) { 267 if ((page = alloc_page(GFP_BALLOON)) == NULL) { 268 nr_pages = i; 269 need_sleep = 1; 270 break; 271 } 272 273 pfn = page_to_pfn(page); 274 frame_list[i] = pfn_to_mfn(pfn); 275 276 scrub_page(page); 277 278 if (!PageHighMem(page)) { 279 ret = HYPERVISOR_update_va_mapping( 280 (unsigned long)__va(pfn << PAGE_SHIFT), 281 __pte_ma(0), 0); 282 BUG_ON(ret); 283 } 284 285 } 286 287 /* Ensure that ballooned highmem pages don't have kmaps. */ 288 kmap_flush_unused(); 289 flush_tlb_all(); 290 291 /* No more mappings: invalidate P2M and add to balloon. */ 292 for (i = 0; i < nr_pages; i++) { 293 pfn = mfn_to_pfn(frame_list[i]); 294 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 295 balloon_append(pfn_to_page(pfn)); 296 } 297 298 set_xen_guest_handle(reservation.extent_start, frame_list); 299 reservation.nr_extents = nr_pages; 300 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 301 BUG_ON(ret != nr_pages); 302 303 balloon_stats.current_pages -= nr_pages; 304 305 return need_sleep; 306} 307 308/* 309 * We avoid multiple worker processes conflicting via the balloon mutex. 310 * We may of course race updates of the target counts (which are protected 311 * by the balloon lock), or with changes to the Xen hard limit, but we will 312 * recover from these in time. 313 */ 314static void balloon_process(struct work_struct *work) 315{ 316 int need_sleep = 0; 317 long credit; 318 319 mutex_lock(&balloon_mutex); 320 321 do { 322 credit = current_target() - balloon_stats.current_pages; 323 if (credit > 0) 324 need_sleep = (increase_reservation(credit) != 0); 325 if (credit < 0) 326 need_sleep = (decrease_reservation(-credit) != 0); 327 328#ifndef CONFIG_PREEMPT 329 if (need_resched()) 330 schedule(); 331#endif 332 } while ((credit != 0) && !need_sleep); 333 334 /* Schedule more work if there is some still to be done. */ 335 if (current_target() != balloon_stats.current_pages) 336 mod_timer(&balloon_timer, jiffies + HZ); 337 338 mutex_unlock(&balloon_mutex); 339} 340 341/* Resets the Xen limit, sets new target, and kicks off processing. */ 342static void balloon_set_new_target(unsigned long target) 343{ 344 /* No need for lock. Not read-modify-write updates. */ 345 balloon_stats.target_pages = target; 346 schedule_work(&balloon_worker); 347} 348 349static struct xenbus_watch target_watch = 350{ 351 .node = "memory/target" 352}; 353 354/* React to a change in the target key */ 355static void watch_target(struct xenbus_watch *watch, 356 const char **vec, unsigned int len) 357{ 358 unsigned long long new_target; 359 int err; 360 361 err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); 362 if (err != 1) { 363 /* This is ok (for domain0 at least) - so just return */ 364 return; 365 } 366 367 /* The given memory/target value is in KiB, so it needs converting to 368 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 369 */ 370 balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); 371} 372 373static int balloon_init_watcher(struct notifier_block *notifier, 374 unsigned long event, 375 void *data) 376{ 377 int err; 378 379 err = register_xenbus_watch(&target_watch); 380 if (err) 381 printk(KERN_ERR "Failed to set balloon watcher\n"); 382 383 return NOTIFY_DONE; 384} 385 386static struct notifier_block xenstore_notifier; 387 388static int __init balloon_init(void) 389{ 390 unsigned long pfn, extra_pfn_end; 391 struct page *page; 392 393 if (!xen_pv_domain()) 394 return -ENODEV; 395 396 pr_info("xen_balloon: Initialising balloon driver.\n"); 397 398 balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); 399 balloon_stats.target_pages = balloon_stats.current_pages; 400 balloon_stats.balloon_low = 0; 401 balloon_stats.balloon_high = 0; 402 403 init_timer(&balloon_timer); 404 balloon_timer.data = 0; 405 balloon_timer.function = balloon_alarm; 406 407 register_balloon(&balloon_sysdev); 408 409 /* 410 * Initialise the balloon with excess memory space. We need 411 * to make sure we don't add memory which doesn't exist or 412 * logically exist. The E820 map can be trimmed to be smaller 413 * than the amount of physical memory due to the mem= command 414 * line parameter. And if this is a 32-bit non-HIGHMEM kernel 415 * on a system with memory which requires highmem to access, 416 * don't try to use it. 417 */ 418 extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()), 419 (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size)); 420 for (pfn = PFN_UP(xen_extra_mem_start); 421 pfn < extra_pfn_end; 422 pfn++) { 423 page = pfn_to_page(pfn); 424 /* totalram_pages doesn't include the boot-time 425 balloon extension, so don't subtract from it. */ 426 __balloon_append(page); 427 } 428 429 target_watch.callback = watch_target; 430 xenstore_notifier.notifier_call = balloon_init_watcher; 431 432 register_xenstore_notifier(&xenstore_notifier); 433 434 return 0; 435} 436 437subsys_initcall(balloon_init); 438 439static void balloon_exit(void) 440{ 441 /* XXX - release balloon here */ 442 return; 443} 444 445module_exit(balloon_exit); 446 447#define BALLOON_SHOW(name, format, args...) \ 448 static ssize_t show_##name(struct sys_device *dev, \ 449 struct sysdev_attribute *attr, \ 450 char *buf) \ 451 { \ 452 return sprintf(buf, format, ##args); \ 453 } \ 454 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) 455 456BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); 457BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); 458BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); 459 460static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, 461 char *buf) 462{ 463 return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); 464} 465 466static ssize_t store_target_kb(struct sys_device *dev, 467 struct sysdev_attribute *attr, 468 const char *buf, 469 size_t count) 470{ 471 char *endchar; 472 unsigned long long target_bytes; 473 474 if (!capable(CAP_SYS_ADMIN)) 475 return -EPERM; 476 477 target_bytes = simple_strtoull(buf, &endchar, 0) * 1024; 478 479 balloon_set_new_target(target_bytes >> PAGE_SHIFT); 480 481 return count; 482} 483 484static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, 485 show_target_kb, store_target_kb); 486 487 488static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr, 489 char *buf) 490{ 491 return sprintf(buf, "%llu\n", 492 (unsigned long long)balloon_stats.target_pages 493 << PAGE_SHIFT); 494} 495 496static ssize_t store_target(struct sys_device *dev, 497 struct sysdev_attribute *attr, 498 const char *buf, 499 size_t count) 500{ 501 char *endchar; 502 unsigned long long target_bytes; 503 504 if (!capable(CAP_SYS_ADMIN)) 505 return -EPERM; 506 507 target_bytes = memparse(buf, &endchar); 508 509 balloon_set_new_target(target_bytes >> PAGE_SHIFT); 510 511 return count; 512} 513 514static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR, 515 show_target, store_target); 516 517 518static struct sysdev_attribute *balloon_attrs[] = { 519 &attr_target_kb, 520 &attr_target, 521}; 522 523static struct attribute *balloon_info_attrs[] = { 524 &attr_current_kb.attr, 525 &attr_low_kb.attr, 526 &attr_high_kb.attr, 527 NULL 528}; 529 530static struct attribute_group balloon_info_group = { 531 .name = "info", 532 .attrs = balloon_info_attrs, 533}; 534 535static struct sysdev_class balloon_sysdev_class = { 536 .name = BALLOON_CLASS_NAME, 537}; 538 539static int register_balloon(struct sys_device *sysdev) 540{ 541 int i, error; 542 543 error = sysdev_class_register(&balloon_sysdev_class); 544 if (error) 545 return error; 546 547 sysdev->id = 0; 548 sysdev->cls = &balloon_sysdev_class; 549 550 error = sysdev_register(sysdev); 551 if (error) { 552 sysdev_class_unregister(&balloon_sysdev_class); 553 return error; 554 } 555 556 for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { 557 error = sysdev_create_file(sysdev, balloon_attrs[i]); 558 if (error) 559 goto fail; 560 } 561 562 error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); 563 if (error) 564 goto fail; 565 566 return 0; 567 568 fail: 569 while (--i >= 0) 570 sysdev_remove_file(sysdev, balloon_attrs[i]); 571 sysdev_unregister(sysdev); 572 sysdev_class_unregister(&balloon_sysdev_class); 573 return error; 574} 575 576MODULE_LICENSE("GPL"); 577