1/* 2 * GPL HEADER START 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 only, 8 * as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License version 2 for more details (a copy is included 14 * in the LICENSE file that accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License 17 * version 2 along with this program; If not, see 18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf 19 * 20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 21 * CA 95054 USA or visit www.sun.com if you need additional information or 22 * have any questions. 23 * 24 * GPL HEADER END 25 */ 26/* 27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Use is subject to license terms. 29 * 30 * Copyright (c) 2012, Intel Corporation. 31 */ 32/* 33 * This file is part of Lustre, http://www.lustre.org/ 34 * Lustre is a trademark of Sun Microsystems, Inc. 35 * 36 * libcfs/libcfs/tracefile.c 37 * 38 * Author: Zach Brown <zab@clusterfs.com> 39 * Author: Phil Schwan <phil@clusterfs.com> 40 */ 41 42 43#define DEBUG_SUBSYSTEM S_LNET 44#define LUSTRE_TRACEFILE_PRIVATE 45#include "tracefile.h" 46 47#include "../../include/linux/libcfs/libcfs.h" 48 49/* XXX move things up to the top, comment */ 50union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned; 51 52char cfs_tracefile[TRACEFILE_NAME_SIZE]; 53long long cfs_tracefile_size = CFS_TRACEFILE_SIZE; 54static struct tracefiled_ctl trace_tctl; 55struct mutex cfs_trace_thread_mutex; 56static int thread_running = 0; 57 58atomic_t cfs_tage_allocated = ATOMIC_INIT(0); 59 60static void put_pages_on_tcd_daemon_list(struct page_collection *pc, 61 struct cfs_trace_cpu_data *tcd); 62 63static inline struct cfs_trace_page * 64cfs_tage_from_list(struct list_head *list) 65{ 66 return list_entry(list, struct cfs_trace_page, linkage); 67} 68 69static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp) 70{ 71 struct page *page; 72 struct cfs_trace_page *tage; 73 74 /* My caller is trying to free memory */ 75 if (!in_interrupt() && memory_pressure_get()) 76 return NULL; 77 78 /* 79 * Don't spam console with allocation failures: they will be reported 80 * by upper layer anyway. 81 */ 82 gfp |= __GFP_NOWARN; 83 page = alloc_page(gfp); 84 if (page == NULL) 85 return NULL; 86 87 tage = kmalloc(sizeof(*tage), gfp); 88 if (tage == NULL) { 89 __free_page(page); 90 return NULL; 91 } 92 93 tage->page = page; 94 atomic_inc(&cfs_tage_allocated); 95 return tage; 96} 97 98static void cfs_tage_free(struct cfs_trace_page *tage) 99{ 100 __LASSERT(tage != NULL); 101 __LASSERT(tage->page != NULL); 102 103 __free_page(tage->page); 104 kfree(tage); 105 atomic_dec(&cfs_tage_allocated); 106} 107 108static void cfs_tage_to_tail(struct cfs_trace_page *tage, 109 struct list_head *queue) 110{ 111 __LASSERT(tage != NULL); 112 __LASSERT(queue != NULL); 113 114 list_move_tail(&tage->linkage, queue); 115} 116 117int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp, 118 struct list_head *stock) 119{ 120 int i; 121 122 /* 123 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) 124 * from here: this will lead to infinite recursion. 125 */ 126 127 for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) { 128 struct cfs_trace_page *tage; 129 130 tage = cfs_tage_alloc(gfp); 131 if (tage == NULL) 132 break; 133 list_add_tail(&tage->linkage, stock); 134 } 135 return i; 136} 137 138/* return a page that has 'len' bytes left at the end */ 139static struct cfs_trace_page * 140cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len) 141{ 142 struct cfs_trace_page *tage; 143 144 if (tcd->tcd_cur_pages > 0) { 145 __LASSERT(!list_empty(&tcd->tcd_pages)); 146 tage = cfs_tage_from_list(tcd->tcd_pages.prev); 147 if (tage->used + len <= PAGE_CACHE_SIZE) 148 return tage; 149 } 150 151 if (tcd->tcd_cur_pages < tcd->tcd_max_pages) { 152 if (tcd->tcd_cur_stock_pages > 0) { 153 tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev); 154 --tcd->tcd_cur_stock_pages; 155 list_del_init(&tage->linkage); 156 } else { 157 tage = cfs_tage_alloc(GFP_ATOMIC); 158 if (unlikely(tage == NULL)) { 159 if ((!memory_pressure_get() || 160 in_interrupt()) && printk_ratelimit()) 161 printk(KERN_WARNING 162 "cannot allocate a tage (%ld)\n", 163 tcd->tcd_cur_pages); 164 return NULL; 165 } 166 } 167 168 tage->used = 0; 169 tage->cpu = smp_processor_id(); 170 tage->type = tcd->tcd_type; 171 list_add_tail(&tage->linkage, &tcd->tcd_pages); 172 tcd->tcd_cur_pages++; 173 174 if (tcd->tcd_cur_pages > 8 && thread_running) { 175 struct tracefiled_ctl *tctl = &trace_tctl; 176 /* 177 * wake up tracefiled to process some pages. 178 */ 179 wake_up(&tctl->tctl_waitq); 180 } 181 return tage; 182 } 183 return NULL; 184} 185 186static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd) 187{ 188 int pgcount = tcd->tcd_cur_pages / 10; 189 struct page_collection pc; 190 struct cfs_trace_page *tage; 191 struct cfs_trace_page *tmp; 192 193 /* 194 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) 195 * from here: this will lead to infinite recursion. 196 */ 197 198 if (printk_ratelimit()) 199 printk(KERN_WARNING "debug daemon buffer overflowed; " 200 "discarding 10%% of pages (%d of %ld)\n", 201 pgcount + 1, tcd->tcd_cur_pages); 202 203 INIT_LIST_HEAD(&pc.pc_pages); 204 spin_lock_init(&pc.pc_lock); 205 206 list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { 207 if (pgcount-- == 0) 208 break; 209 210 list_move_tail(&tage->linkage, &pc.pc_pages); 211 tcd->tcd_cur_pages--; 212 } 213 put_pages_on_tcd_daemon_list(&pc, tcd); 214} 215 216/* return a page that has 'len' bytes left at the end */ 217static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd, 218 unsigned long len) 219{ 220 struct cfs_trace_page *tage; 221 222 /* 223 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) 224 * from here: this will lead to infinite recursion. 225 */ 226 227 if (len > PAGE_CACHE_SIZE) { 228 printk(KERN_ERR 229 "cowardly refusing to write %lu bytes in a page\n", len); 230 return NULL; 231 } 232 233 tage = cfs_trace_get_tage_try(tcd, len); 234 if (tage != NULL) 235 return tage; 236 if (thread_running) 237 cfs_tcd_shrink(tcd); 238 if (tcd->tcd_cur_pages > 0) { 239 tage = cfs_tage_from_list(tcd->tcd_pages.next); 240 tage->used = 0; 241 cfs_tage_to_tail(tage, &tcd->tcd_pages); 242 } 243 return tage; 244} 245 246int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata, 247 const char *format, ...) 248{ 249 va_list args; 250 int rc; 251 252 va_start(args, format); 253 rc = libcfs_debug_vmsg2(msgdata, format, args, NULL); 254 va_end(args); 255 256 return rc; 257} 258EXPORT_SYMBOL(libcfs_debug_msg); 259 260int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata, 261 const char *format1, va_list args, 262 const char *format2, ...) 263{ 264 struct cfs_trace_cpu_data *tcd = NULL; 265 struct ptldebug_header header = {0}; 266 struct cfs_trace_page *tage; 267 /* string_buf is used only if tcd != NULL, and is always set then */ 268 char *string_buf = NULL; 269 char *debug_buf; 270 int known_size; 271 int needed = 85; /* average message length */ 272 int max_nob; 273 va_list ap; 274 int depth; 275 int i; 276 int remain; 277 int mask = msgdata->msg_mask; 278 const char *file = kbasename(msgdata->msg_file); 279 struct cfs_debug_limit_state *cdls = msgdata->msg_cdls; 280 281 tcd = cfs_trace_get_tcd(); 282 283 /* cfs_trace_get_tcd() grabs a lock, which disables preemption and 284 * pins us to a particular CPU. This avoids an smp_processor_id() 285 * warning on Linux when debugging is enabled. */ 286 cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK()); 287 288 if (tcd == NULL) /* arch may not log in IRQ context */ 289 goto console; 290 291 if (tcd->tcd_cur_pages == 0) 292 header.ph_flags |= PH_FLAG_FIRST_RECORD; 293 294 if (tcd->tcd_shutting_down) { 295 cfs_trace_put_tcd(tcd); 296 tcd = NULL; 297 goto console; 298 } 299 300 depth = __current_nesting_level(); 301 known_size = strlen(file) + 1 + depth; 302 if (msgdata->msg_fn) 303 known_size += strlen(msgdata->msg_fn) + 1; 304 305 if (libcfs_debug_binary) 306 known_size += sizeof(header); 307 308 /*/ 309 * '2' used because vsnprintf return real size required for output 310 * _without_ terminating NULL. 311 * if needed is to small for this format. 312 */ 313 for (i = 0; i < 2; i++) { 314 tage = cfs_trace_get_tage(tcd, needed + known_size + 1); 315 if (tage == NULL) { 316 if (needed + known_size > PAGE_CACHE_SIZE) 317 mask |= D_ERROR; 318 319 cfs_trace_put_tcd(tcd); 320 tcd = NULL; 321 goto console; 322 } 323 324 string_buf = (char *)page_address(tage->page) + 325 tage->used + known_size; 326 327 max_nob = PAGE_CACHE_SIZE - tage->used - known_size; 328 if (max_nob <= 0) { 329 printk(KERN_EMERG "negative max_nob: %d\n", 330 max_nob); 331 mask |= D_ERROR; 332 cfs_trace_put_tcd(tcd); 333 tcd = NULL; 334 goto console; 335 } 336 337 needed = 0; 338 if (format1) { 339 va_copy(ap, args); 340 needed = vsnprintf(string_buf, max_nob, format1, ap); 341 va_end(ap); 342 } 343 344 if (format2) { 345 remain = max_nob - needed; 346 if (remain < 0) 347 remain = 0; 348 349 va_start(ap, format2); 350 needed += vsnprintf(string_buf + needed, remain, 351 format2, ap); 352 va_end(ap); 353 } 354 355 if (needed < max_nob) /* well. printing ok.. */ 356 break; 357 } 358 359 if (*(string_buf+needed-1) != '\n') 360 printk(KERN_INFO "format at %s:%d:%s doesn't end in " 361 "newline\n", file, msgdata->msg_line, msgdata->msg_fn); 362 363 header.ph_len = known_size + needed; 364 debug_buf = (char *)page_address(tage->page) + tage->used; 365 366 if (libcfs_debug_binary) { 367 memcpy(debug_buf, &header, sizeof(header)); 368 tage->used += sizeof(header); 369 debug_buf += sizeof(header); 370 } 371 372 /* indent message according to the nesting level */ 373 while (depth-- > 0) { 374 *(debug_buf++) = '.'; 375 ++ tage->used; 376 } 377 378 strcpy(debug_buf, file); 379 tage->used += strlen(file) + 1; 380 debug_buf += strlen(file) + 1; 381 382 if (msgdata->msg_fn) { 383 strcpy(debug_buf, msgdata->msg_fn); 384 tage->used += strlen(msgdata->msg_fn) + 1; 385 debug_buf += strlen(msgdata->msg_fn) + 1; 386 } 387 388 __LASSERT(debug_buf == string_buf); 389 390 tage->used += needed; 391 __LASSERT (tage->used <= PAGE_CACHE_SIZE); 392 393console: 394 if ((mask & libcfs_printk) == 0) { 395 /* no console output requested */ 396 if (tcd != NULL) 397 cfs_trace_put_tcd(tcd); 398 return 1; 399 } 400 401 if (cdls != NULL) { 402 if (libcfs_console_ratelimit && 403 cdls->cdls_next != 0 && /* not first time ever */ 404 !cfs_time_after(cfs_time_current(), cdls->cdls_next)) { 405 /* skipping a console message */ 406 cdls->cdls_count++; 407 if (tcd != NULL) 408 cfs_trace_put_tcd(tcd); 409 return 1; 410 } 411 412 if (cfs_time_after(cfs_time_current(), cdls->cdls_next + 413 libcfs_console_max_delay 414 + cfs_time_seconds(10))) { 415 /* last timeout was a long time ago */ 416 cdls->cdls_delay /= libcfs_console_backoff * 4; 417 } else { 418 cdls->cdls_delay *= libcfs_console_backoff; 419 } 420 421 if (cdls->cdls_delay < libcfs_console_min_delay) 422 cdls->cdls_delay = libcfs_console_min_delay; 423 else if (cdls->cdls_delay > libcfs_console_max_delay) 424 cdls->cdls_delay = libcfs_console_max_delay; 425 426 /* ensure cdls_next is never zero after it's been seen */ 427 cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1; 428 } 429 430 if (tcd != NULL) { 431 cfs_print_to_console(&header, mask, string_buf, needed, file, 432 msgdata->msg_fn); 433 cfs_trace_put_tcd(tcd); 434 } else { 435 string_buf = cfs_trace_get_console_buffer(); 436 437 needed = 0; 438 if (format1 != NULL) { 439 va_copy(ap, args); 440 needed = vsnprintf(string_buf, 441 CFS_TRACE_CONSOLE_BUFFER_SIZE, 442 format1, ap); 443 va_end(ap); 444 } 445 if (format2 != NULL) { 446 remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed; 447 if (remain > 0) { 448 va_start(ap, format2); 449 needed += vsnprintf(string_buf+needed, remain, 450 format2, ap); 451 va_end(ap); 452 } 453 } 454 cfs_print_to_console(&header, mask, 455 string_buf, needed, file, msgdata->msg_fn); 456 457 cfs_trace_put_console_buffer(string_buf); 458 } 459 460 if (cdls != NULL && cdls->cdls_count != 0) { 461 string_buf = cfs_trace_get_console_buffer(); 462 463 needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE, 464 "Skipped %d previous similar message%s\n", 465 cdls->cdls_count, 466 (cdls->cdls_count > 1) ? "s" : ""); 467 468 cfs_print_to_console(&header, mask, 469 string_buf, needed, file, msgdata->msg_fn); 470 471 cfs_trace_put_console_buffer(string_buf); 472 cdls->cdls_count = 0; 473 } 474 475 return 0; 476} 477EXPORT_SYMBOL(libcfs_debug_vmsg2); 478 479void 480cfs_trace_assertion_failed(const char *str, 481 struct libcfs_debug_msg_data *msgdata) 482{ 483 struct ptldebug_header hdr; 484 485 libcfs_panic_in_progress = 1; 486 libcfs_catastrophe = 1; 487 mb(); 488 489 cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK()); 490 491 cfs_print_to_console(&hdr, D_EMERG, str, strlen(str), 492 msgdata->msg_file, msgdata->msg_fn); 493 494 panic("Lustre debug assertion failure\n"); 495 496 /* not reached */ 497} 498 499static void 500panic_collect_pages(struct page_collection *pc) 501{ 502 /* Do the collect_pages job on a single CPU: assumes that all other 503 * CPUs have been stopped during a panic. If this isn't true for some 504 * arch, this will have to be implemented separately in each arch. */ 505 int i; 506 int j; 507 struct cfs_trace_cpu_data *tcd; 508 509 INIT_LIST_HEAD(&pc->pc_pages); 510 511 cfs_tcd_for_each(tcd, i, j) { 512 list_splice_init(&tcd->tcd_pages, &pc->pc_pages); 513 tcd->tcd_cur_pages = 0; 514 515 if (pc->pc_want_daemon_pages) { 516 list_splice_init(&tcd->tcd_daemon_pages, 517 &pc->pc_pages); 518 tcd->tcd_cur_daemon_pages = 0; 519 } 520 } 521} 522 523static void collect_pages_on_all_cpus(struct page_collection *pc) 524{ 525 struct cfs_trace_cpu_data *tcd; 526 int i, cpu; 527 528 spin_lock(&pc->pc_lock); 529 for_each_possible_cpu(cpu) { 530 cfs_tcd_for_each_type_lock(tcd, i, cpu) { 531 list_splice_init(&tcd->tcd_pages, &pc->pc_pages); 532 tcd->tcd_cur_pages = 0; 533 if (pc->pc_want_daemon_pages) { 534 list_splice_init(&tcd->tcd_daemon_pages, 535 &pc->pc_pages); 536 tcd->tcd_cur_daemon_pages = 0; 537 } 538 } 539 } 540 spin_unlock(&pc->pc_lock); 541} 542 543static void collect_pages(struct page_collection *pc) 544{ 545 INIT_LIST_HEAD(&pc->pc_pages); 546 547 if (libcfs_panic_in_progress) 548 panic_collect_pages(pc); 549 else 550 collect_pages_on_all_cpus(pc); 551} 552 553static void put_pages_back_on_all_cpus(struct page_collection *pc) 554{ 555 struct cfs_trace_cpu_data *tcd; 556 struct list_head *cur_head; 557 struct cfs_trace_page *tage; 558 struct cfs_trace_page *tmp; 559 int i, cpu; 560 561 spin_lock(&pc->pc_lock); 562 for_each_possible_cpu(cpu) { 563 cfs_tcd_for_each_type_lock(tcd, i, cpu) { 564 cur_head = tcd->tcd_pages.next; 565 566 list_for_each_entry_safe(tage, tmp, &pc->pc_pages, 567 linkage) { 568 569 __LASSERT_TAGE_INVARIANT(tage); 570 571 if (tage->cpu != cpu || tage->type != i) 572 continue; 573 574 cfs_tage_to_tail(tage, cur_head); 575 tcd->tcd_cur_pages++; 576 } 577 } 578 } 579 spin_unlock(&pc->pc_lock); 580} 581 582static void put_pages_back(struct page_collection *pc) 583{ 584 if (!libcfs_panic_in_progress) 585 put_pages_back_on_all_cpus(pc); 586} 587 588/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that 589 * we have a good amount of data at all times for dumping during an LBUG, even 590 * if we have been steadily writing (and otherwise discarding) pages via the 591 * debug daemon. */ 592static void put_pages_on_tcd_daemon_list(struct page_collection *pc, 593 struct cfs_trace_cpu_data *tcd) 594{ 595 struct cfs_trace_page *tage; 596 struct cfs_trace_page *tmp; 597 598 spin_lock(&pc->pc_lock); 599 list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { 600 601 __LASSERT_TAGE_INVARIANT(tage); 602 603 if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type) 604 continue; 605 606 cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages); 607 tcd->tcd_cur_daemon_pages++; 608 609 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) { 610 struct cfs_trace_page *victim; 611 612 __LASSERT(!list_empty(&tcd->tcd_daemon_pages)); 613 victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next); 614 615 __LASSERT_TAGE_INVARIANT(victim); 616 617 list_del(&victim->linkage); 618 cfs_tage_free(victim); 619 tcd->tcd_cur_daemon_pages--; 620 } 621 } 622 spin_unlock(&pc->pc_lock); 623} 624 625static void put_pages_on_daemon_list(struct page_collection *pc) 626{ 627 struct cfs_trace_cpu_data *tcd; 628 int i, cpu; 629 630 for_each_possible_cpu(cpu) { 631 cfs_tcd_for_each_type_lock(tcd, i, cpu) 632 put_pages_on_tcd_daemon_list(pc, tcd); 633 } 634} 635 636void cfs_trace_debug_print(void) 637{ 638 struct page_collection pc; 639 struct cfs_trace_page *tage; 640 struct cfs_trace_page *tmp; 641 642 spin_lock_init(&pc.pc_lock); 643 644 pc.pc_want_daemon_pages = 1; 645 collect_pages(&pc); 646 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { 647 char *p, *file, *fn; 648 struct page *page; 649 650 __LASSERT_TAGE_INVARIANT(tage); 651 652 page = tage->page; 653 p = page_address(page); 654 while (p < ((char *)page_address(page) + tage->used)) { 655 struct ptldebug_header *hdr; 656 int len; 657 hdr = (void *)p; 658 p += sizeof(*hdr); 659 file = p; 660 p += strlen(file) + 1; 661 fn = p; 662 p += strlen(fn) + 1; 663 len = hdr->ph_len - (int)(p - (char *)hdr); 664 665 cfs_print_to_console(hdr, D_EMERG, p, len, file, fn); 666 667 p += len; 668 } 669 670 list_del(&tage->linkage); 671 cfs_tage_free(tage); 672 } 673} 674 675int cfs_tracefile_dump_all_pages(char *filename) 676{ 677 struct page_collection pc; 678 struct file *filp; 679 struct cfs_trace_page *tage; 680 struct cfs_trace_page *tmp; 681 char *buf; 682 int rc; 683 684 DECL_MMSPACE; 685 686 cfs_tracefile_write_lock(); 687 688 filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600); 689 if (IS_ERR(filp)) { 690 rc = PTR_ERR(filp); 691 filp = NULL; 692 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n", 693 filename, rc); 694 goto out; 695 } 696 697 spin_lock_init(&pc.pc_lock); 698 pc.pc_want_daemon_pages = 1; 699 collect_pages(&pc); 700 if (list_empty(&pc.pc_pages)) { 701 rc = 0; 702 goto close; 703 } 704 705 /* ok, for now, just write the pages. in the future we'll be building 706 * iobufs with the pages and calling generic_direct_IO */ 707 MMSPACE_OPEN; 708 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { 709 710 __LASSERT_TAGE_INVARIANT(tage); 711 712 buf = kmap(tage->page); 713 rc = vfs_write(filp, (__force const char __user *)buf, 714 tage->used, &filp->f_pos); 715 kunmap(tage->page); 716 717 if (rc != (int)tage->used) { 718 printk(KERN_WARNING "wanted to write %u but wrote " 719 "%d\n", tage->used, rc); 720 put_pages_back(&pc); 721 __LASSERT(list_empty(&pc.pc_pages)); 722 break; 723 } 724 list_del(&tage->linkage); 725 cfs_tage_free(tage); 726 } 727 MMSPACE_CLOSE; 728 rc = vfs_fsync(filp, 1); 729 if (rc) 730 printk(KERN_ERR "sync returns %d\n", rc); 731close: 732 filp_close(filp, NULL); 733out: 734 cfs_tracefile_write_unlock(); 735 return rc; 736} 737 738void cfs_trace_flush_pages(void) 739{ 740 struct page_collection pc; 741 struct cfs_trace_page *tage; 742 struct cfs_trace_page *tmp; 743 744 spin_lock_init(&pc.pc_lock); 745 746 pc.pc_want_daemon_pages = 1; 747 collect_pages(&pc); 748 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { 749 750 __LASSERT_TAGE_INVARIANT(tage); 751 752 list_del(&tage->linkage); 753 cfs_tage_free(tage); 754 } 755} 756 757int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, 758 const char __user *usr_buffer, int usr_buffer_nob) 759{ 760 int nob; 761 762 if (usr_buffer_nob > knl_buffer_nob) 763 return -EOVERFLOW; 764 765 if (copy_from_user((void *)knl_buffer, 766 usr_buffer, usr_buffer_nob)) 767 return -EFAULT; 768 769 nob = strnlen(knl_buffer, usr_buffer_nob); 770 while (nob-- >= 0) /* strip trailing whitespace */ 771 if (!isspace(knl_buffer[nob])) 772 break; 773 774 if (nob < 0) /* empty string */ 775 return -EINVAL; 776 777 if (nob == knl_buffer_nob) /* no space to terminate */ 778 return -EOVERFLOW; 779 780 knl_buffer[nob + 1] = 0; /* terminate */ 781 return 0; 782} 783EXPORT_SYMBOL(cfs_trace_copyin_string); 784 785int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob, 786 const char *knl_buffer, char *append) 787{ 788 /* NB if 'append' != NULL, it's a single character to append to the 789 * copied out string - usually "\n", for /proc entries and "" (i.e. a 790 * terminating zero byte) for sysctl entries */ 791 int nob = strlen(knl_buffer); 792 793 if (nob > usr_buffer_nob) 794 nob = usr_buffer_nob; 795 796 if (copy_to_user(usr_buffer, knl_buffer, nob)) 797 return -EFAULT; 798 799 if (append != NULL && nob < usr_buffer_nob) { 800 if (copy_to_user(usr_buffer + nob, append, 1)) 801 return -EFAULT; 802 803 nob++; 804 } 805 806 return nob; 807} 808EXPORT_SYMBOL(cfs_trace_copyout_string); 809 810int cfs_trace_allocate_string_buffer(char **str, int nob) 811{ 812 if (nob > 2 * PAGE_CACHE_SIZE) /* string must be "sensible" */ 813 return -EINVAL; 814 815 *str = kmalloc(nob, GFP_IOFS | __GFP_ZERO); 816 if (*str == NULL) 817 return -ENOMEM; 818 819 return 0; 820} 821 822void cfs_trace_free_string_buffer(char *str, int nob) 823{ 824 kfree(str); 825} 826 827int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob) 828{ 829 char *str; 830 int rc; 831 832 rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); 833 if (rc != 0) 834 return rc; 835 836 rc = cfs_trace_copyin_string(str, usr_str_nob + 1, 837 usr_str, usr_str_nob); 838 if (rc != 0) 839 goto out; 840 841 if (str[0] != '/') { 842 rc = -EINVAL; 843 goto out; 844 } 845 rc = cfs_tracefile_dump_all_pages(str); 846out: 847 cfs_trace_free_string_buffer(str, usr_str_nob + 1); 848 return rc; 849} 850 851int cfs_trace_daemon_command(char *str) 852{ 853 int rc = 0; 854 855 cfs_tracefile_write_lock(); 856 857 if (strcmp(str, "stop") == 0) { 858 cfs_tracefile_write_unlock(); 859 cfs_trace_stop_thread(); 860 cfs_tracefile_write_lock(); 861 memset(cfs_tracefile, 0, sizeof(cfs_tracefile)); 862 863 } else if (strncmp(str, "size=", 5) == 0) { 864 cfs_tracefile_size = simple_strtoul(str + 5, NULL, 0); 865 if (cfs_tracefile_size < 10 || cfs_tracefile_size > 20480) 866 cfs_tracefile_size = CFS_TRACEFILE_SIZE; 867 else 868 cfs_tracefile_size <<= 20; 869 870 } else if (strlen(str) >= sizeof(cfs_tracefile)) { 871 rc = -ENAMETOOLONG; 872 } else if (str[0] != '/') { 873 rc = -EINVAL; 874 } else { 875 strcpy(cfs_tracefile, str); 876 877 printk(KERN_INFO 878 "Lustre: debug daemon will attempt to start writing " 879 "to %s (%lukB max)\n", cfs_tracefile, 880 (long)(cfs_tracefile_size >> 10)); 881 882 cfs_trace_start_thread(); 883 } 884 885 cfs_tracefile_write_unlock(); 886 return rc; 887} 888 889int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob) 890{ 891 char *str; 892 int rc; 893 894 rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); 895 if (rc != 0) 896 return rc; 897 898 rc = cfs_trace_copyin_string(str, usr_str_nob + 1, 899 usr_str, usr_str_nob); 900 if (rc == 0) 901 rc = cfs_trace_daemon_command(str); 902 903 cfs_trace_free_string_buffer(str, usr_str_nob + 1); 904 return rc; 905} 906 907int cfs_trace_set_debug_mb(int mb) 908{ 909 int i; 910 int j; 911 int pages; 912 int limit = cfs_trace_max_debug_mb(); 913 struct cfs_trace_cpu_data *tcd; 914 915 if (mb < num_possible_cpus()) { 916 printk(KERN_WARNING 917 "Lustre: %d MB is too small for debug buffer size, " 918 "setting it to %d MB.\n", mb, num_possible_cpus()); 919 mb = num_possible_cpus(); 920 } 921 922 if (mb > limit) { 923 printk(KERN_WARNING 924 "Lustre: %d MB is too large for debug buffer size, " 925 "setting it to %d MB.\n", mb, limit); 926 mb = limit; 927 } 928 929 mb /= num_possible_cpus(); 930 pages = mb << (20 - PAGE_CACHE_SHIFT); 931 932 cfs_tracefile_write_lock(); 933 934 cfs_tcd_for_each(tcd, i, j) 935 tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100; 936 937 cfs_tracefile_write_unlock(); 938 939 return 0; 940} 941 942int cfs_trace_set_debug_mb_usrstr(void __user *usr_str, int usr_str_nob) 943{ 944 char str[32]; 945 int rc; 946 947 rc = cfs_trace_copyin_string(str, sizeof(str), usr_str, usr_str_nob); 948 if (rc < 0) 949 return rc; 950 951 return cfs_trace_set_debug_mb(simple_strtoul(str, NULL, 0)); 952} 953 954int cfs_trace_get_debug_mb(void) 955{ 956 int i; 957 int j; 958 struct cfs_trace_cpu_data *tcd; 959 int total_pages = 0; 960 961 cfs_tracefile_read_lock(); 962 963 cfs_tcd_for_each(tcd, i, j) 964 total_pages += tcd->tcd_max_pages; 965 966 cfs_tracefile_read_unlock(); 967 968 return (total_pages >> (20 - PAGE_CACHE_SHIFT)) + 1; 969} 970 971static int tracefiled(void *arg) 972{ 973 struct page_collection pc; 974 struct tracefiled_ctl *tctl = arg; 975 struct cfs_trace_page *tage; 976 struct cfs_trace_page *tmp; 977 struct file *filp; 978 char *buf; 979 int last_loop = 0; 980 int rc; 981 982 DECL_MMSPACE; 983 984 /* we're started late enough that we pick up init's fs context */ 985 /* this is so broken in uml? what on earth is going on? */ 986 987 spin_lock_init(&pc.pc_lock); 988 complete(&tctl->tctl_start); 989 990 while (1) { 991 wait_queue_t __wait; 992 993 pc.pc_want_daemon_pages = 0; 994 collect_pages(&pc); 995 if (list_empty(&pc.pc_pages)) 996 goto end_loop; 997 998 filp = NULL; 999 cfs_tracefile_read_lock(); 1000 if (cfs_tracefile[0] != 0) { 1001 filp = filp_open(cfs_tracefile, 1002 O_CREAT | O_RDWR | O_LARGEFILE, 1003 0600); 1004 if (IS_ERR(filp)) { 1005 rc = PTR_ERR(filp); 1006 filp = NULL; 1007 printk(KERN_WARNING "couldn't open %s: " 1008 "%d\n", cfs_tracefile, rc); 1009 } 1010 } 1011 cfs_tracefile_read_unlock(); 1012 if (filp == NULL) { 1013 put_pages_on_daemon_list(&pc); 1014 __LASSERT(list_empty(&pc.pc_pages)); 1015 goto end_loop; 1016 } 1017 1018 MMSPACE_OPEN; 1019 1020 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, 1021 linkage) { 1022 static loff_t f_pos; 1023 1024 __LASSERT_TAGE_INVARIANT(tage); 1025 1026 if (f_pos >= (off_t)cfs_tracefile_size) 1027 f_pos = 0; 1028 else if (f_pos > i_size_read(filp->f_dentry->d_inode)) 1029 f_pos = i_size_read(filp->f_dentry->d_inode); 1030 1031 buf = kmap(tage->page); 1032 rc = vfs_write(filp, (__force const char __user *)buf, 1033 tage->used, &f_pos); 1034 kunmap(tage->page); 1035 1036 if (rc != (int)tage->used) { 1037 printk(KERN_WARNING "wanted to write %u " 1038 "but wrote %d\n", tage->used, rc); 1039 put_pages_back(&pc); 1040 __LASSERT(list_empty(&pc.pc_pages)); 1041 } 1042 } 1043 MMSPACE_CLOSE; 1044 1045 filp_close(filp, NULL); 1046 put_pages_on_daemon_list(&pc); 1047 if (!list_empty(&pc.pc_pages)) { 1048 int i; 1049 1050 printk(KERN_ALERT "Lustre: trace pages aren't " 1051 " empty\n"); 1052 printk(KERN_ERR "total cpus(%d): ", 1053 num_possible_cpus()); 1054 for (i = 0; i < num_possible_cpus(); i++) 1055 if (cpu_online(i)) 1056 printk(KERN_ERR "%d(on) ", i); 1057 else 1058 printk(KERN_ERR "%d(off) ", i); 1059 printk(KERN_ERR "\n"); 1060 1061 i = 0; 1062 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, 1063 linkage) 1064 printk(KERN_ERR "page %d belongs to cpu " 1065 "%d\n", ++i, tage->cpu); 1066 printk(KERN_ERR "There are %d pages unwritten\n", 1067 i); 1068 } 1069 __LASSERT(list_empty(&pc.pc_pages)); 1070end_loop: 1071 if (atomic_read(&tctl->tctl_shutdown)) { 1072 if (last_loop == 0) { 1073 last_loop = 1; 1074 continue; 1075 } else { 1076 break; 1077 } 1078 } 1079 init_waitqueue_entry(&__wait, current); 1080 add_wait_queue(&tctl->tctl_waitq, &__wait); 1081 set_current_state(TASK_INTERRUPTIBLE); 1082 schedule_timeout(cfs_time_seconds(1)); 1083 remove_wait_queue(&tctl->tctl_waitq, &__wait); 1084 } 1085 complete(&tctl->tctl_stop); 1086 return 0; 1087} 1088 1089int cfs_trace_start_thread(void) 1090{ 1091 struct tracefiled_ctl *tctl = &trace_tctl; 1092 int rc = 0; 1093 1094 mutex_lock(&cfs_trace_thread_mutex); 1095 if (thread_running) 1096 goto out; 1097 1098 init_completion(&tctl->tctl_start); 1099 init_completion(&tctl->tctl_stop); 1100 init_waitqueue_head(&tctl->tctl_waitq); 1101 atomic_set(&tctl->tctl_shutdown, 0); 1102 1103 if (IS_ERR(kthread_run(tracefiled, tctl, "ktracefiled"))) { 1104 rc = -ECHILD; 1105 goto out; 1106 } 1107 1108 wait_for_completion(&tctl->tctl_start); 1109 thread_running = 1; 1110out: 1111 mutex_unlock(&cfs_trace_thread_mutex); 1112 return rc; 1113} 1114 1115void cfs_trace_stop_thread(void) 1116{ 1117 struct tracefiled_ctl *tctl = &trace_tctl; 1118 1119 mutex_lock(&cfs_trace_thread_mutex); 1120 if (thread_running) { 1121 printk(KERN_INFO 1122 "Lustre: shutting down debug daemon thread...\n"); 1123 atomic_set(&tctl->tctl_shutdown, 1); 1124 wait_for_completion(&tctl->tctl_stop); 1125 thread_running = 0; 1126 } 1127 mutex_unlock(&cfs_trace_thread_mutex); 1128} 1129 1130int cfs_tracefile_init(int max_pages) 1131{ 1132 struct cfs_trace_cpu_data *tcd; 1133 int i; 1134 int j; 1135 int rc; 1136 int factor; 1137 1138 rc = cfs_tracefile_init_arch(); 1139 if (rc != 0) 1140 return rc; 1141 1142 cfs_tcd_for_each(tcd, i, j) { 1143 /* tcd_pages_factor is initialized int tracefile_init_arch. */ 1144 factor = tcd->tcd_pages_factor; 1145 INIT_LIST_HEAD(&tcd->tcd_pages); 1146 INIT_LIST_HEAD(&tcd->tcd_stock_pages); 1147 INIT_LIST_HEAD(&tcd->tcd_daemon_pages); 1148 tcd->tcd_cur_pages = 0; 1149 tcd->tcd_cur_stock_pages = 0; 1150 tcd->tcd_cur_daemon_pages = 0; 1151 tcd->tcd_max_pages = (max_pages * factor) / 100; 1152 LASSERT(tcd->tcd_max_pages > 0); 1153 tcd->tcd_shutting_down = 0; 1154 } 1155 1156 return 0; 1157} 1158 1159static void trace_cleanup_on_all_cpus(void) 1160{ 1161 struct cfs_trace_cpu_data *tcd; 1162 struct cfs_trace_page *tage; 1163 struct cfs_trace_page *tmp; 1164 int i, cpu; 1165 1166 for_each_possible_cpu(cpu) { 1167 cfs_tcd_for_each_type_lock(tcd, i, cpu) { 1168 tcd->tcd_shutting_down = 1; 1169 1170 list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, 1171 linkage) { 1172 __LASSERT_TAGE_INVARIANT(tage); 1173 1174 list_del(&tage->linkage); 1175 cfs_tage_free(tage); 1176 } 1177 1178 tcd->tcd_cur_pages = 0; 1179 } 1180 } 1181} 1182 1183static void cfs_trace_cleanup(void) 1184{ 1185 struct page_collection pc; 1186 1187 INIT_LIST_HEAD(&pc.pc_pages); 1188 spin_lock_init(&pc.pc_lock); 1189 1190 trace_cleanup_on_all_cpus(); 1191 1192 cfs_tracefile_fini_arch(); 1193} 1194 1195void cfs_tracefile_exit(void) 1196{ 1197 cfs_trace_stop_thread(); 1198 cfs_trace_cleanup(); 1199} 1200