sn2_smp.c revision c1902aae322952f8726469a6657df7b9d5c794fe
1/* 2 * SN2 Platform specific SMP Support 3 * 4 * This file is subject to the terms and conditions of the GNU General Public 5 * License. See the file "COPYING" in the main directory of this archive 6 * for more details. 7 * 8 * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. 9 */ 10 11#include <linux/init.h> 12#include <linux/kernel.h> 13#include <linux/spinlock.h> 14#include <linux/threads.h> 15#include <linux/sched.h> 16#include <linux/smp.h> 17#include <linux/interrupt.h> 18#include <linux/irq.h> 19#include <linux/mmzone.h> 20#include <linux/module.h> 21#include <linux/bitops.h> 22#include <linux/nodemask.h> 23#include <linux/proc_fs.h> 24#include <linux/seq_file.h> 25 26#include <asm/processor.h> 27#include <asm/irq.h> 28#include <asm/sal.h> 29#include <asm/system.h> 30#include <asm/delay.h> 31#include <asm/io.h> 32#include <asm/smp.h> 33#include <asm/tlb.h> 34#include <asm/numa.h> 35#include <asm/hw_irq.h> 36#include <asm/current.h> 37#include <asm/sn/sn_cpuid.h> 38#include <asm/sn/sn_sal.h> 39#include <asm/sn/addrs.h> 40#include <asm/sn/shub_mmr.h> 41#include <asm/sn/nodepda.h> 42#include <asm/sn/rw_mmr.h> 43 44DEFINE_PER_CPU(struct ptc_stats, ptcstats); 45DECLARE_PER_CPU(struct ptc_stats, ptcstats); 46 47static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); 48 49void sn2_ptc_deadlock_recovery(short *, short, int, volatile unsigned long *, unsigned long data0, 50 volatile unsigned long *, unsigned long data1); 51 52#ifdef DEBUG_PTC 53/* 54 * ptctest: 55 * 56 * xyz - 3 digit hex number: 57 * x - Force PTC purges to use shub: 58 * 0 - no force 59 * 1 - force 60 * y - interupt enable 61 * 0 - disable interrupts 62 * 1 - leave interuupts enabled 63 * z - type of lock: 64 * 0 - global lock 65 * 1 - node local lock 66 * 2 - no lock 67 * 68 * Note: on shub1, only ptctest == 0 is supported. Don't try other values! 69 */ 70 71static unsigned int sn2_ptctest = 0; 72 73static int __init ptc_test(char *str) 74{ 75 get_option(&str, &sn2_ptctest); 76 return 1; 77} 78__setup("ptctest=", ptc_test); 79 80static inline int ptc_lock(unsigned long *flagp) 81{ 82 unsigned long opt = sn2_ptctest & 255; 83 84 switch (opt) { 85 case 0x00: 86 spin_lock_irqsave(&sn2_global_ptc_lock, *flagp); 87 break; 88 case 0x01: 89 spin_lock_irqsave(&sn_nodepda->ptc_lock, *flagp); 90 break; 91 case 0x02: 92 local_irq_save(*flagp); 93 break; 94 case 0x10: 95 spin_lock(&sn2_global_ptc_lock); 96 break; 97 case 0x11: 98 spin_lock(&sn_nodepda->ptc_lock); 99 break; 100 case 0x12: 101 break; 102 default: 103 BUG(); 104 } 105 return opt; 106} 107 108static inline void ptc_unlock(unsigned long flags, int opt) 109{ 110 switch (opt) { 111 case 0x00: 112 spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); 113 break; 114 case 0x01: 115 spin_unlock_irqrestore(&sn_nodepda->ptc_lock, flags); 116 break; 117 case 0x02: 118 local_irq_restore(flags); 119 break; 120 case 0x10: 121 spin_unlock(&sn2_global_ptc_lock); 122 break; 123 case 0x11: 124 spin_unlock(&sn_nodepda->ptc_lock); 125 break; 126 case 0x12: 127 break; 128 default: 129 BUG(); 130 } 131} 132#else 133 134#define sn2_ptctest 0 135 136static inline int ptc_lock(unsigned long *flagp) 137{ 138 spin_lock_irqsave(&sn2_global_ptc_lock, *flagp); 139 return 0; 140} 141 142static inline void ptc_unlock(unsigned long flags, int opt) 143{ 144 spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); 145} 146#endif 147 148struct ptc_stats { 149 unsigned long ptc_l; 150 unsigned long change_rid; 151 unsigned long shub_ptc_flushes; 152 unsigned long nodes_flushed; 153 unsigned long deadlocks; 154 unsigned long lock_itc_clocks; 155 unsigned long shub_itc_clocks; 156 unsigned long shub_itc_clocks_max; 157}; 158 159static inline unsigned long wait_piowc(void) 160{ 161 volatile unsigned long *piows, zeroval; 162 unsigned long ws; 163 164 piows = pda->pio_write_status_addr; 165 zeroval = pda->pio_write_status_val; 166 do { 167 cpu_relax(); 168 } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval); 169 return ws; 170} 171 172void sn_tlb_migrate_finish(struct mm_struct *mm) 173{ 174 if (mm == current->mm) 175 flush_tlb_mm(mm); 176} 177 178/** 179 * sn2_global_tlb_purge - globally purge translation cache of virtual address range 180 * @mm: mm_struct containing virtual address range 181 * @start: start of virtual address range 182 * @end: end of virtual address range 183 * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) 184 * 185 * Purges the translation caches of all processors of the given virtual address 186 * range. 187 * 188 * Note: 189 * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. 190 * - cpu_vm_mask is converted into a nodemask of the nodes containing the 191 * cpus in cpu_vm_mask. 192 * - if only one bit is set in cpu_vm_mask & it is the current cpu & the 193 * process is purging its own virtual address range, then only the 194 * local TLB needs to be flushed. This flushing can be done using 195 * ptc.l. This is the common case & avoids the global spinlock. 196 * - if multiple cpus have loaded the context, then flushing has to be 197 * done with ptc.g/MMRs under protection of the global ptc_lock. 198 */ 199 200void 201sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, 202 unsigned long end, unsigned long nbits) 203{ 204 int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; 205 int mymm = (mm == current->active_mm); 206 volatile unsigned long *ptc0, *ptc1; 207 unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value; 208 short nasids[MAX_NUMNODES], nix; 209 nodemask_t nodes_flushed; 210 211 nodes_clear(nodes_flushed); 212 i = 0; 213 214 for_each_cpu_mask(cpu, mm->cpu_vm_mask) { 215 cnode = cpu_to_node(cpu); 216 node_set(cnode, nodes_flushed); 217 lcpu = cpu; 218 i++; 219 } 220 221 if (i == 0) 222 return; 223 224 preempt_disable(); 225 226 if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) { 227 do { 228 ia64_ptcl(start, nbits << 2); 229 start += (1UL << nbits); 230 } while (start < end); 231 ia64_srlz_i(); 232 __get_cpu_var(ptcstats).ptc_l++; 233 preempt_enable(); 234 return; 235 } 236 237 if (atomic_read(&mm->mm_users) == 1 && mymm) { 238 flush_tlb_mm(mm); 239 __get_cpu_var(ptcstats).change_rid++; 240 preempt_enable(); 241 return; 242 } 243 244 itc = ia64_get_itc(); 245 nix = 0; 246 for_each_node_mask(cnode, nodes_flushed) 247 nasids[nix++] = cnodeid_to_nasid(cnode); 248 249 rr_value = (mm->context << 3) | REGION_NUMBER(start); 250 251 shub1 = is_shub1(); 252 if (shub1) { 253 data0 = (1UL << SH1_PTC_0_A_SHFT) | 254 (nbits << SH1_PTC_0_PS_SHFT) | 255 (rr_value << SH1_PTC_0_RID_SHFT) | 256 (1UL << SH1_PTC_0_START_SHFT); 257 ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); 258 ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); 259 } else { 260 data0 = (1UL << SH2_PTC_A_SHFT) | 261 (nbits << SH2_PTC_PS_SHFT) | 262 (1UL << SH2_PTC_START_SHFT); 263 ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + 264 (rr_value << SH2_PTC_RID_SHFT)); 265 ptc1 = NULL; 266 } 267 268 269 mynasid = get_nasid(); 270 271 itc = ia64_get_itc(); 272 opt = ptc_lock(&flags); 273 itc2 = ia64_get_itc(); 274 __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; 275 __get_cpu_var(ptcstats).shub_ptc_flushes++; 276 __get_cpu_var(ptcstats).nodes_flushed += nix; 277 278 do { 279 if (shub1) 280 data1 = start | (1UL << SH1_PTC_1_START_SHFT); 281 else 282 data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); 283 for (i = 0; i < nix; i++) { 284 nasid = nasids[i]; 285 if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid && mymm)) { 286 ia64_ptcga(start, nbits << 2); 287 ia64_srlz_i(); 288 } else { 289 ptc0 = CHANGE_NASID(nasid, ptc0); 290 if (ptc1) 291 ptc1 = CHANGE_NASID(nasid, ptc1); 292 pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, 293 data1); 294 flushed = 1; 295 } 296 } 297 if (flushed 298 && (wait_piowc() & 299 (SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK))) { 300 sn2_ptc_deadlock_recovery(nasids, nix, mynasid, ptc0, data0, ptc1, data1); 301 } 302 303 start += (1UL << nbits); 304 305 } while (start < end); 306 307 itc2 = ia64_get_itc() - itc2; 308 __get_cpu_var(ptcstats).shub_itc_clocks += itc2; 309 if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) 310 __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2; 311 312 ptc_unlock(flags, opt); 313 314 preempt_enable(); 315} 316 317/* 318 * sn2_ptc_deadlock_recovery 319 * 320 * Recover from PTC deadlocks conditions. Recovery requires stepping thru each 321 * TLB flush transaction. The recovery sequence is somewhat tricky & is 322 * coded in assembly language. 323 */ 324void sn2_ptc_deadlock_recovery(short *nasids, short nix, int mynasid, volatile unsigned long *ptc0, unsigned long data0, 325 volatile unsigned long *ptc1, unsigned long data1) 326{ 327 extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, 328 volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long); 329 short nasid, i; 330 unsigned long *piows, zeroval; 331 332 __get_cpu_var(ptcstats).deadlocks++; 333 334 piows = (unsigned long *) pda->pio_write_status_addr; 335 zeroval = pda->pio_write_status_val; 336 337 for (i=0; i < nix; i++) { 338 nasid = nasids[i]; 339 if (!(sn2_ptctest & 3) && nasid == mynasid) 340 continue; 341 ptc0 = CHANGE_NASID(nasid, ptc0); 342 if (ptc1) 343 ptc1 = CHANGE_NASID(nasid, ptc1); 344 sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); 345 } 346 347} 348 349/** 350 * sn_send_IPI_phys - send an IPI to a Nasid and slice 351 * @nasid: nasid to receive the interrupt (may be outside partition) 352 * @physid: physical cpuid to receive the interrupt. 353 * @vector: command to send 354 * @delivery_mode: delivery mechanism 355 * 356 * Sends an IPI (interprocessor interrupt) to the processor specified by 357 * @physid 358 * 359 * @delivery_mode can be one of the following 360 * 361 * %IA64_IPI_DM_INT - pend an interrupt 362 * %IA64_IPI_DM_PMI - pend a PMI 363 * %IA64_IPI_DM_NMI - pend an NMI 364 * %IA64_IPI_DM_INIT - pend an INIT interrupt 365 */ 366void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode) 367{ 368 long val; 369 unsigned long flags = 0; 370 volatile long *p; 371 372 p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT); 373 val = (1UL << SH_IPI_INT_SEND_SHFT) | 374 (physid << SH_IPI_INT_PID_SHFT) | 375 ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) | 376 ((long)vector << SH_IPI_INT_IDX_SHFT) | 377 (0x000feeUL << SH_IPI_INT_BASE_SHFT); 378 379 mb(); 380 if (enable_shub_wars_1_1()) { 381 spin_lock_irqsave(&sn2_global_ptc_lock, flags); 382 } 383 pio_phys_write_mmr(p, val); 384 if (enable_shub_wars_1_1()) { 385 wait_piowc(); 386 spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); 387 } 388 389} 390 391EXPORT_SYMBOL(sn_send_IPI_phys); 392 393/** 394 * sn2_send_IPI - send an IPI to a processor 395 * @cpuid: target of the IPI 396 * @vector: command to send 397 * @delivery_mode: delivery mechanism 398 * @redirect: redirect the IPI? 399 * 400 * Sends an IPI (InterProcessor Interrupt) to the processor specified by 401 * @cpuid. @vector specifies the command to send, while @delivery_mode can 402 * be one of the following 403 * 404 * %IA64_IPI_DM_INT - pend an interrupt 405 * %IA64_IPI_DM_PMI - pend a PMI 406 * %IA64_IPI_DM_NMI - pend an NMI 407 * %IA64_IPI_DM_INIT - pend an INIT interrupt 408 */ 409void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect) 410{ 411 long physid; 412 int nasid; 413 414 physid = cpu_physical_id(cpuid); 415 nasid = cpuid_to_nasid(cpuid); 416 417 /* the following is used only when starting cpus at boot time */ 418 if (unlikely(nasid == -1)) 419 ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL); 420 421 sn_send_IPI_phys(nasid, physid, vector, delivery_mode); 422} 423 424#ifdef CONFIG_PROC_FS 425 426#define PTC_BASENAME "sgi_sn/ptc_statistics" 427 428static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset) 429{ 430 if (*offset < NR_CPUS) 431 return offset; 432 return NULL; 433} 434 435static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset) 436{ 437 (*offset)++; 438 if (*offset < NR_CPUS) 439 return offset; 440 return NULL; 441} 442 443static void sn2_ptc_seq_stop(struct seq_file *file, void *data) 444{ 445} 446 447static int sn2_ptc_seq_show(struct seq_file *file, void *data) 448{ 449 struct ptc_stats *stat; 450 int cpu; 451 452 cpu = *(loff_t *) data; 453 454 if (!cpu) { 455 seq_printf(file, "# ptc_l change_rid shub_ptc_flushes shub_nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max\n"); 456 seq_printf(file, "# ptctest %d\n", sn2_ptctest); 457 } 458 459 if (cpu < NR_CPUS && cpu_online(cpu)) { 460 stat = &per_cpu(ptcstats, cpu); 461 seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, 462 stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, 463 stat->deadlocks, 464 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, 465 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, 466 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec); 467 } 468 469 return 0; 470} 471 472static struct seq_operations sn2_ptc_seq_ops = { 473 .start = sn2_ptc_seq_start, 474 .next = sn2_ptc_seq_next, 475 .stop = sn2_ptc_seq_stop, 476 .show = sn2_ptc_seq_show 477}; 478 479int sn2_ptc_proc_open(struct inode *inode, struct file *file) 480{ 481 return seq_open(file, &sn2_ptc_seq_ops); 482} 483 484static struct file_operations proc_sn2_ptc_operations = { 485 .open = sn2_ptc_proc_open, 486 .read = seq_read, 487 .llseek = seq_lseek, 488 .release = seq_release, 489}; 490 491static struct proc_dir_entry *proc_sn2_ptc; 492 493static int __init sn2_ptc_init(void) 494{ 495 if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) { 496 printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME); 497 return -EINVAL; 498 } 499 proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations; 500 spin_lock_init(&sn2_global_ptc_lock); 501 return 0; 502} 503 504static void __exit sn2_ptc_exit(void) 505{ 506 remove_proc_entry(PTC_BASENAME, NULL); 507} 508 509module_init(sn2_ptc_init); 510module_exit(sn2_ptc_exit); 511#endif /* CONFIG_PROC_FS */ 512 513