crash.c revision 66b15db69c2553036cc25f6e2e74fe7e3aa2761e
1/* 2 * Architecture specific (PPC64) functions for kexec based crash dumps. 3 * 4 * Copyright (C) 2005, IBM Corp. 5 * 6 * Created by: Haren Myneni 7 * 8 * This source code is licensed under the GNU General Public License, 9 * Version 2. See the file COPYING for more details. 10 * 11 */ 12 13#undef DEBUG 14 15#include <linux/kernel.h> 16#include <linux/smp.h> 17#include <linux/reboot.h> 18#include <linux/kexec.h> 19#include <linux/bootmem.h> 20#include <linux/export.h> 21#include <linux/crash_dump.h> 22#include <linux/delay.h> 23#include <linux/elf.h> 24#include <linux/elfcore.h> 25#include <linux/init.h> 26#include <linux/irq.h> 27#include <linux/types.h> 28#include <linux/memblock.h> 29 30#include <asm/processor.h> 31#include <asm/machdep.h> 32#include <asm/kexec.h> 33#include <asm/kdump.h> 34#include <asm/prom.h> 35#include <asm/firmware.h> 36#include <asm/smp.h> 37#include <asm/system.h> 38#include <asm/setjmp.h> 39 40#ifdef DEBUG 41#include <asm/udbg.h> 42#define DBG(fmt...) udbg_printf(fmt) 43#else 44#define DBG(fmt...) 45#endif 46 47/* This keeps a track of which one is crashing cpu. */ 48int crashing_cpu = -1; 49static cpumask_t cpus_in_crash = CPU_MASK_NONE; 50cpumask_t cpus_in_sr = CPU_MASK_NONE; 51 52#define CRASH_HANDLER_MAX 3 53/* NULL terminated list of shutdown handles */ 54static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; 55static DEFINE_SPINLOCK(crash_handlers_lock); 56 57#ifdef CONFIG_SMP 58static atomic_t enter_on_soft_reset = ATOMIC_INIT(0); 59 60void crash_ipi_callback(struct pt_regs *regs) 61{ 62 int cpu = smp_processor_id(); 63 64 if (!cpu_online(cpu)) 65 return; 66 67 hard_irq_disable(); 68 if (!cpumask_test_cpu(cpu, &cpus_in_crash)) 69 crash_save_cpu(regs, cpu); 70 cpumask_set_cpu(cpu, &cpus_in_crash); 71 72 /* 73 * Entered via soft-reset - could be the kdump 74 * process is invoked using soft-reset or user activated 75 * it if some CPU did not respond to an IPI. 76 * For soft-reset, the secondary CPU can enter this func 77 * twice. 1 - using IPI, and 2. soft-reset. 78 * Tell the kexec CPU that entered via soft-reset and ready 79 * to go down. 80 */ 81 if (cpumask_test_cpu(cpu, &cpus_in_sr)) { 82 cpumask_clear_cpu(cpu, &cpus_in_sr); 83 atomic_inc(&enter_on_soft_reset); 84 } 85 86 /* 87 * Starting the kdump boot. 88 * This barrier is needed to make sure that all CPUs are stopped. 89 * If not, soft-reset will be invoked to bring other CPUs. 90 */ 91 while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) 92 cpu_relax(); 93 94 if (ppc_md.kexec_cpu_down) 95 ppc_md.kexec_cpu_down(1, 1); 96 97#ifdef CONFIG_PPC64 98 kexec_smp_wait(); 99#else 100 for (;;); /* FIXME */ 101#endif 102 103 /* NOTREACHED */ 104} 105 106/* 107 * Wait until all CPUs are entered via soft-reset. 108 */ 109static void crash_soft_reset_check(int cpu) 110{ 111 unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ 112 113 cpumask_clear_cpu(cpu, &cpus_in_sr); 114 while (atomic_read(&enter_on_soft_reset) != ncpus) 115 cpu_relax(); 116} 117 118 119static void crash_kexec_prepare_cpus(int cpu) 120{ 121 unsigned int msecs; 122 123 unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ 124 125 crash_send_ipi(crash_ipi_callback); 126 smp_wmb(); 127 128 /* 129 * FIXME: Until we will have the way to stop other CPUs reliably, 130 * the crash CPU will send an IPI and wait for other CPUs to 131 * respond. 132 * Delay of at least 10 seconds. 133 */ 134 printk(KERN_EMERG "Sending IPI to other cpus...\n"); 135 msecs = 10000; 136 while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { 137 cpu_relax(); 138 mdelay(1); 139 } 140 141 /* Would it be better to replace the trap vector here? */ 142 143 /* 144 * FIXME: In case if we do not get all CPUs, one possibility: ask the 145 * user to do soft reset such that we get all. 146 * Soft-reset will be used until better mechanism is implemented. 147 */ 148 if (cpumask_weight(&cpus_in_crash) < ncpus) { 149 printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", 150 ncpus - cpumask_weight(&cpus_in_crash)); 151 printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); 152 cpumask_clear(&cpus_in_sr); 153 atomic_set(&enter_on_soft_reset, 0); 154 while (cpumask_weight(&cpus_in_crash) < ncpus) 155 cpu_relax(); 156 } 157 /* 158 * Make sure all CPUs are entered via soft-reset if the kdump is 159 * invoked using soft-reset. 160 */ 161 if (cpumask_test_cpu(cpu, &cpus_in_sr)) 162 crash_soft_reset_check(cpu); 163 /* Leave the IPI callback set */ 164} 165 166/* 167 * This function will be called by secondary cpus or by kexec cpu 168 * if soft-reset is activated to stop some CPUs. 169 */ 170void crash_kexec_secondary(struct pt_regs *regs) 171{ 172 int cpu = smp_processor_id(); 173 unsigned long flags; 174 int msecs = 5; 175 176 local_irq_save(flags); 177 /* Wait 5ms if the kexec CPU is not entered yet. */ 178 while (crashing_cpu < 0) { 179 if (--msecs < 0) { 180 /* 181 * Either kdump image is not loaded or 182 * kdump process is not started - Probably xmon 183 * exited using 'x'(exit and recover) or 184 * kexec_should_crash() failed for all running tasks. 185 */ 186 cpumask_clear_cpu(cpu, &cpus_in_sr); 187 local_irq_restore(flags); 188 return; 189 } 190 mdelay(1); 191 cpu_relax(); 192 } 193 if (cpu == crashing_cpu) { 194 /* 195 * Panic CPU will enter this func only via soft-reset. 196 * Wait until all secondary CPUs entered and 197 * then start kexec boot. 198 */ 199 crash_soft_reset_check(cpu); 200 cpumask_set_cpu(crashing_cpu, &cpus_in_crash); 201 if (ppc_md.kexec_cpu_down) 202 ppc_md.kexec_cpu_down(1, 0); 203 machine_kexec(kexec_crash_image); 204 /* NOTREACHED */ 205 } 206 crash_ipi_callback(regs); 207} 208 209#else /* ! CONFIG_SMP */ 210 211static void crash_kexec_prepare_cpus(int cpu) 212{ 213 /* 214 * move the secondarys to us so that we can copy 215 * the new kernel 0-0x100 safely 216 * 217 * do this if kexec in setup.c ? 218 */ 219#ifdef CONFIG_PPC64 220 smp_release_cpus(); 221#else 222 /* FIXME */ 223#endif 224} 225 226void crash_kexec_secondary(struct pt_regs *regs) 227{ 228 cpumask_clear(&cpus_in_sr); 229} 230#endif /* CONFIG_SMP */ 231 232/* wait for all the CPUs to hit real mode but timeout if they don't come in */ 233#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64) 234static void crash_kexec_wait_realmode(int cpu) 235{ 236 unsigned int msecs; 237 int i; 238 239 msecs = 10000; 240 for (i=0; i < nr_cpu_ids && msecs > 0; i++) { 241 if (i == cpu) 242 continue; 243 244 while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { 245 barrier(); 246 if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) 247 break; 248 msecs--; 249 mdelay(1); 250 } 251 } 252 mb(); 253} 254#else 255static inline void crash_kexec_wait_realmode(int cpu) {} 256#endif /* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */ 257 258/* 259 * Register a function to be called on shutdown. Only use this if you 260 * can't reset your device in the second kernel. 261 */ 262int crash_shutdown_register(crash_shutdown_t handler) 263{ 264 unsigned int i, rc; 265 266 spin_lock(&crash_handlers_lock); 267 for (i = 0 ; i < CRASH_HANDLER_MAX; i++) 268 if (!crash_shutdown_handles[i]) { 269 /* Insert handle at first empty entry */ 270 crash_shutdown_handles[i] = handler; 271 rc = 0; 272 break; 273 } 274 275 if (i == CRASH_HANDLER_MAX) { 276 printk(KERN_ERR "Crash shutdown handles full, " 277 "not registered.\n"); 278 rc = 1; 279 } 280 281 spin_unlock(&crash_handlers_lock); 282 return rc; 283} 284EXPORT_SYMBOL(crash_shutdown_register); 285 286int crash_shutdown_unregister(crash_shutdown_t handler) 287{ 288 unsigned int i, rc; 289 290 spin_lock(&crash_handlers_lock); 291 for (i = 0 ; i < CRASH_HANDLER_MAX; i++) 292 if (crash_shutdown_handles[i] == handler) 293 break; 294 295 if (i == CRASH_HANDLER_MAX) { 296 printk(KERN_ERR "Crash shutdown handle not found\n"); 297 rc = 1; 298 } else { 299 /* Shift handles down */ 300 for (; crash_shutdown_handles[i]; i++) 301 crash_shutdown_handles[i] = 302 crash_shutdown_handles[i+1]; 303 rc = 0; 304 } 305 306 spin_unlock(&crash_handlers_lock); 307 return rc; 308} 309EXPORT_SYMBOL(crash_shutdown_unregister); 310 311static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; 312static int crash_shutdown_cpu = -1; 313 314static int handle_fault(struct pt_regs *regs) 315{ 316 if (crash_shutdown_cpu == smp_processor_id()) 317 longjmp(crash_shutdown_buf, 1); 318 return 0; 319} 320 321void default_machine_crash_shutdown(struct pt_regs *regs) 322{ 323 unsigned int i; 324 int (*old_handler)(struct pt_regs *regs); 325 326 327 /* 328 * This function is only called after the system 329 * has panicked or is otherwise in a critical state. 330 * The minimum amount of code to allow a kexec'd kernel 331 * to run successfully needs to happen here. 332 * 333 * In practice this means stopping other cpus in 334 * an SMP system. 335 * The kernel is broken so disable interrupts. 336 */ 337 hard_irq_disable(); 338 339 /* 340 * Make a note of crashing cpu. Will be used in machine_kexec 341 * such that another IPI will not be sent. 342 */ 343 crashing_cpu = smp_processor_id(); 344 crash_save_cpu(regs, crashing_cpu); 345 crash_kexec_prepare_cpus(crashing_cpu); 346 cpumask_set_cpu(crashing_cpu, &cpus_in_crash); 347 crash_kexec_wait_realmode(crashing_cpu); 348 349 machine_kexec_mask_interrupts(); 350 351 /* 352 * Call registered shutdown routines savely. Swap out 353 * __debugger_fault_handler, and replace on exit. 354 */ 355 old_handler = __debugger_fault_handler; 356 __debugger_fault_handler = handle_fault; 357 crash_shutdown_cpu = smp_processor_id(); 358 for (i = 0; crash_shutdown_handles[i]; i++) { 359 if (setjmp(crash_shutdown_buf) == 0) { 360 /* 361 * Insert syncs and delay to ensure 362 * instructions in the dangerous region don't 363 * leak away from this protected region. 364 */ 365 asm volatile("sync; isync"); 366 /* dangerous region */ 367 crash_shutdown_handles[i](); 368 asm volatile("sync; isync"); 369 } 370 } 371 crash_shutdown_cpu = -1; 372 __debugger_fault_handler = old_handler; 373 374 if (ppc_md.kexec_cpu_down) 375 ppc_md.kexec_cpu_down(1, 0); 376} 377