vmstat.c revision bab1846a0582f627f5ec22aa2dc5f4f3e82e8176
1/* 2 * linux/mm/vmstat.c 3 * 4 * Manages VM statistics 5 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 6 * 7 * zoned VM statistics 8 * Copyright (C) 2006 Silicon Graphics, Inc., 9 * Christoph Lameter <christoph@lameter.com> 10 */ 11 12#include <linux/config.h> 13#include <linux/mm.h> 14#include <linux/module.h> 15 16/* 17 * Accumulate the page_state information across all CPUs. 18 * The result is unavoidably approximate - it can change 19 * during and after execution of this function. 20 */ 21DEFINE_PER_CPU(struct page_state, page_states) = {0}; 22 23static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) 24{ 25 unsigned cpu; 26 27 memset(ret, 0, nr * sizeof(unsigned long)); 28 cpus_and(*cpumask, *cpumask, cpu_online_map); 29 30 for_each_cpu_mask(cpu, *cpumask) { 31 unsigned long *in; 32 unsigned long *out; 33 unsigned off; 34 unsigned next_cpu; 35 36 in = (unsigned long *)&per_cpu(page_states, cpu); 37 38 next_cpu = next_cpu(cpu, *cpumask); 39 if (likely(next_cpu < NR_CPUS)) 40 prefetch(&per_cpu(page_states, next_cpu)); 41 42 out = (unsigned long *)ret; 43 for (off = 0; off < nr; off++) 44 *out++ += *in++; 45 } 46} 47 48void get_full_page_state(struct page_state *ret) 49{ 50 cpumask_t mask = CPU_MASK_ALL; 51 52 __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); 53} 54 55void __mod_page_state_offset(unsigned long offset, unsigned long delta) 56{ 57 void *ptr; 58 59 ptr = &__get_cpu_var(page_states); 60 *(unsigned long *)(ptr + offset) += delta; 61} 62EXPORT_SYMBOL(__mod_page_state_offset); 63 64void mod_page_state_offset(unsigned long offset, unsigned long delta) 65{ 66 unsigned long flags; 67 void *ptr; 68 69 local_irq_save(flags); 70 ptr = &__get_cpu_var(page_states); 71 *(unsigned long *)(ptr + offset) += delta; 72 local_irq_restore(flags); 73} 74EXPORT_SYMBOL(mod_page_state_offset); 75 76void __get_zone_counts(unsigned long *active, unsigned long *inactive, 77 unsigned long *free, struct pglist_data *pgdat) 78{ 79 struct zone *zones = pgdat->node_zones; 80 int i; 81 82 *active = 0; 83 *inactive = 0; 84 *free = 0; 85 for (i = 0; i < MAX_NR_ZONES; i++) { 86 *active += zones[i].nr_active; 87 *inactive += zones[i].nr_inactive; 88 *free += zones[i].free_pages; 89 } 90} 91 92void get_zone_counts(unsigned long *active, 93 unsigned long *inactive, unsigned long *free) 94{ 95 struct pglist_data *pgdat; 96 97 *active = 0; 98 *inactive = 0; 99 *free = 0; 100 for_each_online_pgdat(pgdat) { 101 unsigned long l, m, n; 102 __get_zone_counts(&l, &m, &n, pgdat); 103 *active += l; 104 *inactive += m; 105 *free += n; 106 } 107} 108 109/* 110 * Manage combined zone based / global counters 111 * 112 * vm_stat contains the global counters 113 */ 114atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; 115EXPORT_SYMBOL(vm_stat); 116 117#ifdef CONFIG_SMP 118 119#define STAT_THRESHOLD 32 120 121/* 122 * Determine pointer to currently valid differential byte given a zone and 123 * the item number. 124 * 125 * Preemption must be off 126 */ 127static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item) 128{ 129 return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item]; 130} 131 132/* 133 * For use when we know that interrupts are disabled. 134 */ 135void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 136 int delta) 137{ 138 s8 *p; 139 long x; 140 141 p = diff_pointer(zone, item); 142 x = delta + *p; 143 144 if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) { 145 zone_page_state_add(x, zone, item); 146 x = 0; 147 } 148 149 *p = x; 150} 151EXPORT_SYMBOL(__mod_zone_page_state); 152 153/* 154 * For an unknown interrupt state 155 */ 156void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 157 int delta) 158{ 159 unsigned long flags; 160 161 local_irq_save(flags); 162 __mod_zone_page_state(zone, item, delta); 163 local_irq_restore(flags); 164} 165EXPORT_SYMBOL(mod_zone_page_state); 166 167/* 168 * Optimized increment and decrement functions. 169 * 170 * These are only for a single page and therefore can take a struct page * 171 * argument instead of struct zone *. This allows the inclusion of the code 172 * generated for page_zone(page) into the optimized functions. 173 * 174 * No overflow check is necessary and therefore the differential can be 175 * incremented or decremented in place which may allow the compilers to 176 * generate better code. 177 * 178 * The increment or decrement is known and therefore one boundary check can 179 * be omitted. 180 * 181 * Some processors have inc/dec instructions that are atomic vs an interrupt. 182 * However, the code must first determine the differential location in a zone 183 * based on the processor number and then inc/dec the counter. There is no 184 * guarantee without disabling preemption that the processor will not change 185 * in between and therefore the atomicity vs. interrupt cannot be exploited 186 * in a useful way here. 187 */ 188void __inc_zone_page_state(struct page *page, enum zone_stat_item item) 189{ 190 struct zone *zone = page_zone(page); 191 s8 *p = diff_pointer(zone, item); 192 193 (*p)++; 194 195 if (unlikely(*p > STAT_THRESHOLD)) { 196 zone_page_state_add(*p, zone, item); 197 *p = 0; 198 } 199} 200EXPORT_SYMBOL(__inc_zone_page_state); 201 202void __dec_zone_page_state(struct page *page, enum zone_stat_item item) 203{ 204 struct zone *zone = page_zone(page); 205 s8 *p = diff_pointer(zone, item); 206 207 (*p)--; 208 209 if (unlikely(*p < -STAT_THRESHOLD)) { 210 zone_page_state_add(*p, zone, item); 211 *p = 0; 212 } 213} 214EXPORT_SYMBOL(__dec_zone_page_state); 215 216void inc_zone_page_state(struct page *page, enum zone_stat_item item) 217{ 218 unsigned long flags; 219 struct zone *zone; 220 s8 *p; 221 222 zone = page_zone(page); 223 local_irq_save(flags); 224 p = diff_pointer(zone, item); 225 226 (*p)++; 227 228 if (unlikely(*p > STAT_THRESHOLD)) { 229 zone_page_state_add(*p, zone, item); 230 *p = 0; 231 } 232 local_irq_restore(flags); 233} 234EXPORT_SYMBOL(inc_zone_page_state); 235 236void dec_zone_page_state(struct page *page, enum zone_stat_item item) 237{ 238 unsigned long flags; 239 struct zone *zone; 240 s8 *p; 241 242 zone = page_zone(page); 243 local_irq_save(flags); 244 p = diff_pointer(zone, item); 245 246 (*p)--; 247 248 if (unlikely(*p < -STAT_THRESHOLD)) { 249 zone_page_state_add(*p, zone, item); 250 *p = 0; 251 } 252 local_irq_restore(flags); 253} 254EXPORT_SYMBOL(dec_zone_page_state); 255 256/* 257 * Update the zone counters for one cpu. 258 */ 259void refresh_cpu_vm_stats(int cpu) 260{ 261 struct zone *zone; 262 int i; 263 unsigned long flags; 264 265 for_each_zone(zone) { 266 struct per_cpu_pageset *pcp; 267 268 pcp = zone_pcp(zone, cpu); 269 270 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 271 if (pcp->vm_stat_diff[i]) { 272 local_irq_save(flags); 273 zone_page_state_add(pcp->vm_stat_diff[i], 274 zone, i); 275 pcp->vm_stat_diff[i] = 0; 276 local_irq_restore(flags); 277 } 278 } 279} 280 281static void __refresh_cpu_vm_stats(void *dummy) 282{ 283 refresh_cpu_vm_stats(smp_processor_id()); 284} 285 286/* 287 * Consolidate all counters. 288 * 289 * Note that the result is less inaccurate but still inaccurate 290 * if concurrent processes are allowed to run. 291 */ 292void refresh_vm_stats(void) 293{ 294 on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1); 295} 296EXPORT_SYMBOL(refresh_vm_stats); 297 298#endif 299 300#ifdef CONFIG_PROC_FS 301 302#include <linux/seq_file.h> 303 304static void *frag_start(struct seq_file *m, loff_t *pos) 305{ 306 pg_data_t *pgdat; 307 loff_t node = *pos; 308 for (pgdat = first_online_pgdat(); 309 pgdat && node; 310 pgdat = next_online_pgdat(pgdat)) 311 --node; 312 313 return pgdat; 314} 315 316static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) 317{ 318 pg_data_t *pgdat = (pg_data_t *)arg; 319 320 (*pos)++; 321 return next_online_pgdat(pgdat); 322} 323 324static void frag_stop(struct seq_file *m, void *arg) 325{ 326} 327 328/* 329 * This walks the free areas for each zone. 330 */ 331static int frag_show(struct seq_file *m, void *arg) 332{ 333 pg_data_t *pgdat = (pg_data_t *)arg; 334 struct zone *zone; 335 struct zone *node_zones = pgdat->node_zones; 336 unsigned long flags; 337 int order; 338 339 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { 340 if (!populated_zone(zone)) 341 continue; 342 343 spin_lock_irqsave(&zone->lock, flags); 344 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 345 for (order = 0; order < MAX_ORDER; ++order) 346 seq_printf(m, "%6lu ", zone->free_area[order].nr_free); 347 spin_unlock_irqrestore(&zone->lock, flags); 348 seq_putc(m, '\n'); 349 } 350 return 0; 351} 352 353struct seq_operations fragmentation_op = { 354 .start = frag_start, 355 .next = frag_next, 356 .stop = frag_stop, 357 .show = frag_show, 358}; 359 360static char *vmstat_text[] = { 361 /* Zoned VM counters */ 362 "nr_anon_pages", 363 "nr_mapped", 364 "nr_file_pages", 365 "nr_slab", 366 "nr_page_table_pages", 367 "nr_dirty", 368 "nr_writeback", 369 "nr_unstable", 370 "nr_bounce", 371 372 /* Event counters */ 373 "pgpgin", 374 "pgpgout", 375 "pswpin", 376 "pswpout", 377 378 "pgalloc_high", 379 "pgalloc_normal", 380 "pgalloc_dma32", 381 "pgalloc_dma", 382 383 "pgfree", 384 "pgactivate", 385 "pgdeactivate", 386 387 "pgfault", 388 "pgmajfault", 389 390 "pgrefill_high", 391 "pgrefill_normal", 392 "pgrefill_dma32", 393 "pgrefill_dma", 394 395 "pgsteal_high", 396 "pgsteal_normal", 397 "pgsteal_dma32", 398 "pgsteal_dma", 399 400 "pgscan_kswapd_high", 401 "pgscan_kswapd_normal", 402 "pgscan_kswapd_dma32", 403 "pgscan_kswapd_dma", 404 405 "pgscan_direct_high", 406 "pgscan_direct_normal", 407 "pgscan_direct_dma32", 408 "pgscan_direct_dma", 409 410 "pginodesteal", 411 "slabs_scanned", 412 "kswapd_steal", 413 "kswapd_inodesteal", 414 "pageoutrun", 415 "allocstall", 416 417 "pgrotated", 418}; 419 420/* 421 * Output information about zones in @pgdat. 422 */ 423static int zoneinfo_show(struct seq_file *m, void *arg) 424{ 425 pg_data_t *pgdat = arg; 426 struct zone *zone; 427 struct zone *node_zones = pgdat->node_zones; 428 unsigned long flags; 429 430 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { 431 int i; 432 433 if (!populated_zone(zone)) 434 continue; 435 436 spin_lock_irqsave(&zone->lock, flags); 437 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); 438 seq_printf(m, 439 "\n pages free %lu" 440 "\n min %lu" 441 "\n low %lu" 442 "\n high %lu" 443 "\n active %lu" 444 "\n inactive %lu" 445 "\n scanned %lu (a: %lu i: %lu)" 446 "\n spanned %lu" 447 "\n present %lu", 448 zone->free_pages, 449 zone->pages_min, 450 zone->pages_low, 451 zone->pages_high, 452 zone->nr_active, 453 zone->nr_inactive, 454 zone->pages_scanned, 455 zone->nr_scan_active, zone->nr_scan_inactive, 456 zone->spanned_pages, 457 zone->present_pages); 458 459 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 460 seq_printf(m, "\n %-12s %lu", vmstat_text[i], 461 zone_page_state(zone, i)); 462 463 seq_printf(m, 464 "\n protection: (%lu", 465 zone->lowmem_reserve[0]); 466 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) 467 seq_printf(m, ", %lu", zone->lowmem_reserve[i]); 468 seq_printf(m, 469 ")" 470 "\n pagesets"); 471 for_each_online_cpu(i) { 472 struct per_cpu_pageset *pageset; 473 int j; 474 475 pageset = zone_pcp(zone, i); 476 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { 477 if (pageset->pcp[j].count) 478 break; 479 } 480 if (j == ARRAY_SIZE(pageset->pcp)) 481 continue; 482 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { 483 seq_printf(m, 484 "\n cpu: %i pcp: %i" 485 "\n count: %i" 486 "\n high: %i" 487 "\n batch: %i", 488 i, j, 489 pageset->pcp[j].count, 490 pageset->pcp[j].high, 491 pageset->pcp[j].batch); 492 } 493#ifdef CONFIG_NUMA 494 seq_printf(m, 495 "\n numa_hit: %lu" 496 "\n numa_miss: %lu" 497 "\n numa_foreign: %lu" 498 "\n interleave_hit: %lu" 499 "\n local_node: %lu" 500 "\n other_node: %lu", 501 pageset->numa_hit, 502 pageset->numa_miss, 503 pageset->numa_foreign, 504 pageset->interleave_hit, 505 pageset->local_node, 506 pageset->other_node); 507#endif 508 } 509 seq_printf(m, 510 "\n all_unreclaimable: %u" 511 "\n prev_priority: %i" 512 "\n temp_priority: %i" 513 "\n start_pfn: %lu", 514 zone->all_unreclaimable, 515 zone->prev_priority, 516 zone->temp_priority, 517 zone->zone_start_pfn); 518 spin_unlock_irqrestore(&zone->lock, flags); 519 seq_putc(m, '\n'); 520 } 521 return 0; 522} 523 524struct seq_operations zoneinfo_op = { 525 .start = frag_start, /* iterate over all zones. The same as in 526 * fragmentation. */ 527 .next = frag_next, 528 .stop = frag_stop, 529 .show = zoneinfo_show, 530}; 531 532static void *vmstat_start(struct seq_file *m, loff_t *pos) 533{ 534 unsigned long *v; 535 struct page_state *ps; 536 int i; 537 538 if (*pos >= ARRAY_SIZE(vmstat_text)) 539 return NULL; 540 541 v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) 542 + sizeof(*ps), GFP_KERNEL); 543 m->private = v; 544 if (!v) 545 return ERR_PTR(-ENOMEM); 546 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 547 v[i] = global_page_state(i); 548 ps = (struct page_state *)(v + NR_VM_ZONE_STAT_ITEMS); 549 get_full_page_state(ps); 550 ps->pgpgin /= 2; /* sectors -> kbytes */ 551 ps->pgpgout /= 2; 552 return v + *pos; 553} 554 555static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) 556{ 557 (*pos)++; 558 if (*pos >= ARRAY_SIZE(vmstat_text)) 559 return NULL; 560 return (unsigned long *)m->private + *pos; 561} 562 563static int vmstat_show(struct seq_file *m, void *arg) 564{ 565 unsigned long *l = arg; 566 unsigned long off = l - (unsigned long *)m->private; 567 568 seq_printf(m, "%s %lu\n", vmstat_text[off], *l); 569 return 0; 570} 571 572static void vmstat_stop(struct seq_file *m, void *arg) 573{ 574 kfree(m->private); 575 m->private = NULL; 576} 577 578struct seq_operations vmstat_op = { 579 .start = vmstat_start, 580 .next = vmstat_next, 581 .stop = vmstat_stop, 582 .show = vmstat_show, 583}; 584 585#endif /* CONFIG_PROC_FS */ 586 587