1/* 2 * Thermal throttle event support code (such as syslog messaging and rate 3 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). 4 * 5 * This allows consistent reporting of CPU thermal throttle events. 6 * 7 * Maintains a counter in /sys that keeps track of the number of thermal 8 * events, such that the user knows how bad the thermal problem might be 9 * (since the logging to syslog and mcelog is rate limited). 10 * 11 * Author: Dmitriy Zavin (dmitriyz@google.com) 12 * 13 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. 14 * Inspired by Ross Biro's and Al Borchers' counter code. 15 */ 16#include <linux/interrupt.h> 17#include <linux/notifier.h> 18#include <linux/jiffies.h> 19#include <linux/kernel.h> 20#include <linux/percpu.h> 21#include <linux/export.h> 22#include <linux/types.h> 23#include <linux/init.h> 24#include <linux/smp.h> 25#include <linux/cpu.h> 26 27#include <asm/processor.h> 28#include <asm/apic.h> 29#include <asm/idle.h> 30#include <asm/mce.h> 31#include <asm/msr.h> 32 33/* How long to wait between reporting thermal events */ 34#define CHECK_INTERVAL (300 * HZ) 35 36#define THERMAL_THROTTLING_EVENT 0 37#define POWER_LIMIT_EVENT 1 38 39/* 40 * Current thermal event state: 41 */ 42struct _thermal_state { 43 bool new_event; 44 int event; 45 u64 next_check; 46 unsigned long count; 47 unsigned long last_count; 48}; 49 50struct thermal_state { 51 struct _thermal_state core_throttle; 52 struct _thermal_state core_power_limit; 53 struct _thermal_state package_throttle; 54 struct _thermal_state package_power_limit; 55 struct _thermal_state core_thresh0; 56 struct _thermal_state core_thresh1; 57}; 58 59/* Callback to handle core threshold interrupts */ 60int (*platform_thermal_notify)(__u64 msr_val); 61EXPORT_SYMBOL(platform_thermal_notify); 62 63static DEFINE_PER_CPU(struct thermal_state, thermal_state); 64 65static atomic_t therm_throt_en = ATOMIC_INIT(0); 66 67static u32 lvtthmr_init __read_mostly; 68 69#ifdef CONFIG_SYSFS 70#define define_therm_throt_device_one_ro(_name) \ 71 static DEVICE_ATTR(_name, 0444, \ 72 therm_throt_device_show_##_name, \ 73 NULL) \ 74 75#define define_therm_throt_device_show_func(event, name) \ 76 \ 77static ssize_t therm_throt_device_show_##event##_##name( \ 78 struct device *dev, \ 79 struct device_attribute *attr, \ 80 char *buf) \ 81{ \ 82 unsigned int cpu = dev->id; \ 83 ssize_t ret; \ 84 \ 85 preempt_disable(); /* CPU hotplug */ \ 86 if (cpu_online(cpu)) { \ 87 ret = sprintf(buf, "%lu\n", \ 88 per_cpu(thermal_state, cpu).event.name); \ 89 } else \ 90 ret = 0; \ 91 preempt_enable(); \ 92 \ 93 return ret; \ 94} 95 96define_therm_throt_device_show_func(core_throttle, count); 97define_therm_throt_device_one_ro(core_throttle_count); 98 99define_therm_throt_device_show_func(core_power_limit, count); 100define_therm_throt_device_one_ro(core_power_limit_count); 101 102define_therm_throt_device_show_func(package_throttle, count); 103define_therm_throt_device_one_ro(package_throttle_count); 104 105define_therm_throt_device_show_func(package_power_limit, count); 106define_therm_throt_device_one_ro(package_power_limit_count); 107 108static struct attribute *thermal_throttle_attrs[] = { 109 &dev_attr_core_throttle_count.attr, 110 NULL 111}; 112 113static struct attribute_group thermal_attr_group = { 114 .attrs = thermal_throttle_attrs, 115 .name = "thermal_throttle" 116}; 117#endif /* CONFIG_SYSFS */ 118 119#define CORE_LEVEL 0 120#define PACKAGE_LEVEL 1 121 122/*** 123 * therm_throt_process - Process thermal throttling event from interrupt 124 * @curr: Whether the condition is current or not (boolean), since the 125 * thermal interrupt normally gets called both when the thermal 126 * event begins and once the event has ended. 127 * 128 * This function is called by the thermal interrupt after the 129 * IRQ has been acknowledged. 130 * 131 * It will take care of rate limiting and printing messages to the syslog. 132 * 133 * Returns: 0 : Event should NOT be further logged, i.e. still in 134 * "timeout" from previous log message. 135 * 1 : Event should be logged further, and a message has been 136 * printed to the syslog. 137 */ 138static int therm_throt_process(bool new_event, int event, int level) 139{ 140 struct _thermal_state *state; 141 unsigned int this_cpu = smp_processor_id(); 142 bool old_event; 143 u64 now; 144 struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); 145 146 now = get_jiffies_64(); 147 if (level == CORE_LEVEL) { 148 if (event == THERMAL_THROTTLING_EVENT) 149 state = &pstate->core_throttle; 150 else if (event == POWER_LIMIT_EVENT) 151 state = &pstate->core_power_limit; 152 else 153 return 0; 154 } else if (level == PACKAGE_LEVEL) { 155 if (event == THERMAL_THROTTLING_EVENT) 156 state = &pstate->package_throttle; 157 else if (event == POWER_LIMIT_EVENT) 158 state = &pstate->package_power_limit; 159 else 160 return 0; 161 } else 162 return 0; 163 164 old_event = state->new_event; 165 state->new_event = new_event; 166 167 if (new_event) 168 state->count++; 169 170 if (time_before64(now, state->next_check) && 171 state->count != state->last_count) 172 return 0; 173 174 state->next_check = now + CHECK_INTERVAL; 175 state->last_count = state->count; 176 177 /* if we just entered the thermal event */ 178 if (new_event) { 179 if (event == THERMAL_THROTTLING_EVENT) 180 printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", 181 this_cpu, 182 level == CORE_LEVEL ? "Core" : "Package", 183 state->count); 184 else 185 printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n", 186 this_cpu, 187 level == CORE_LEVEL ? "Core" : "Package", 188 state->count); 189 return 1; 190 } 191 if (old_event) { 192 if (event == THERMAL_THROTTLING_EVENT) 193 printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", 194 this_cpu, 195 level == CORE_LEVEL ? "Core" : "Package"); 196 else 197 printk(KERN_INFO "CPU%d: %s power limit normal\n", 198 this_cpu, 199 level == CORE_LEVEL ? "Core" : "Package"); 200 return 1; 201 } 202 203 return 0; 204} 205 206static int thresh_event_valid(int event) 207{ 208 struct _thermal_state *state; 209 unsigned int this_cpu = smp_processor_id(); 210 struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); 211 u64 now = get_jiffies_64(); 212 213 state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1; 214 215 if (time_before64(now, state->next_check)) 216 return 0; 217 218 state->next_check = now + CHECK_INTERVAL; 219 return 1; 220} 221 222#ifdef CONFIG_SYSFS 223/* Add/Remove thermal_throttle interface for CPU device: */ 224static __cpuinit int thermal_throttle_add_dev(struct device *dev, 225 unsigned int cpu) 226{ 227 int err; 228 struct cpuinfo_x86 *c = &cpu_data(cpu); 229 230 err = sysfs_create_group(&dev->kobj, &thermal_attr_group); 231 if (err) 232 return err; 233 234 if (cpu_has(c, X86_FEATURE_PLN)) 235 err = sysfs_add_file_to_group(&dev->kobj, 236 &dev_attr_core_power_limit_count.attr, 237 thermal_attr_group.name); 238 if (cpu_has(c, X86_FEATURE_PTS)) { 239 err = sysfs_add_file_to_group(&dev->kobj, 240 &dev_attr_package_throttle_count.attr, 241 thermal_attr_group.name); 242 if (cpu_has(c, X86_FEATURE_PLN)) 243 err = sysfs_add_file_to_group(&dev->kobj, 244 &dev_attr_package_power_limit_count.attr, 245 thermal_attr_group.name); 246 } 247 248 return err; 249} 250 251static __cpuinit void thermal_throttle_remove_dev(struct device *dev) 252{ 253 sysfs_remove_group(&dev->kobj, &thermal_attr_group); 254} 255 256/* Mutex protecting device creation against CPU hotplug: */ 257static DEFINE_MUTEX(therm_cpu_lock); 258 259/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 260static __cpuinit int 261thermal_throttle_cpu_callback(struct notifier_block *nfb, 262 unsigned long action, 263 void *hcpu) 264{ 265 unsigned int cpu = (unsigned long)hcpu; 266 struct device *dev; 267 int err = 0; 268 269 dev = get_cpu_device(cpu); 270 271 switch (action) { 272 case CPU_UP_PREPARE: 273 case CPU_UP_PREPARE_FROZEN: 274 mutex_lock(&therm_cpu_lock); 275 err = thermal_throttle_add_dev(dev, cpu); 276 mutex_unlock(&therm_cpu_lock); 277 WARN_ON(err); 278 break; 279 case CPU_UP_CANCELED: 280 case CPU_UP_CANCELED_FROZEN: 281 case CPU_DEAD: 282 case CPU_DEAD_FROZEN: 283 mutex_lock(&therm_cpu_lock); 284 thermal_throttle_remove_dev(dev); 285 mutex_unlock(&therm_cpu_lock); 286 break; 287 } 288 return notifier_from_errno(err); 289} 290 291static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = 292{ 293 .notifier_call = thermal_throttle_cpu_callback, 294}; 295 296static __init int thermal_throttle_init_device(void) 297{ 298 unsigned int cpu = 0; 299 int err; 300 301 if (!atomic_read(&therm_throt_en)) 302 return 0; 303 304 register_hotcpu_notifier(&thermal_throttle_cpu_notifier); 305 306#ifdef CONFIG_HOTPLUG_CPU 307 mutex_lock(&therm_cpu_lock); 308#endif 309 /* connect live CPUs to sysfs */ 310 for_each_online_cpu(cpu) { 311 err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu); 312 WARN_ON(err); 313 } 314#ifdef CONFIG_HOTPLUG_CPU 315 mutex_unlock(&therm_cpu_lock); 316#endif 317 318 return 0; 319} 320device_initcall(thermal_throttle_init_device); 321 322#endif /* CONFIG_SYSFS */ 323 324static void notify_thresholds(__u64 msr_val) 325{ 326 /* check whether the interrupt handler is defined; 327 * otherwise simply return 328 */ 329 if (!platform_thermal_notify) 330 return; 331 332 /* lower threshold reached */ 333 if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0)) 334 platform_thermal_notify(msr_val); 335 /* higher threshold reached */ 336 if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1)) 337 platform_thermal_notify(msr_val); 338} 339 340/* Thermal transition interrupt handler */ 341static void intel_thermal_interrupt(void) 342{ 343 __u64 msr_val; 344 345 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 346 347 /* Check for violation of core thermal thresholds*/ 348 notify_thresholds(msr_val); 349 350 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, 351 THERMAL_THROTTLING_EVENT, 352 CORE_LEVEL) != 0) 353 mce_log_therm_throt_event(msr_val); 354 355 if (this_cpu_has(X86_FEATURE_PLN)) 356 therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, 357 POWER_LIMIT_EVENT, 358 CORE_LEVEL); 359 360 if (this_cpu_has(X86_FEATURE_PTS)) { 361 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); 362 therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, 363 THERMAL_THROTTLING_EVENT, 364 PACKAGE_LEVEL); 365 if (this_cpu_has(X86_FEATURE_PLN)) 366 therm_throt_process(msr_val & 367 PACKAGE_THERM_STATUS_POWER_LIMIT, 368 POWER_LIMIT_EVENT, 369 PACKAGE_LEVEL); 370 } 371} 372 373static void unexpected_thermal_interrupt(void) 374{ 375 printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", 376 smp_processor_id()); 377} 378 379static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; 380 381asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) 382{ 383 irq_enter(); 384 exit_idle(); 385 inc_irq_stat(irq_thermal_count); 386 smp_thermal_vector(); 387 irq_exit(); 388 /* Ack only at the end to avoid potential reentry */ 389 ack_APIC_irq(); 390} 391 392/* Thermal monitoring depends on APIC, ACPI and clock modulation */ 393static int intel_thermal_supported(struct cpuinfo_x86 *c) 394{ 395 if (!cpu_has_apic) 396 return 0; 397 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) 398 return 0; 399 return 1; 400} 401 402void __init mcheck_intel_therm_init(void) 403{ 404 /* 405 * This function is only called on boot CPU. Save the init thermal 406 * LVT value on BSP and use that value to restore APs' thermal LVT 407 * entry BIOS programmed later 408 */ 409 if (intel_thermal_supported(&boot_cpu_data)) 410 lvtthmr_init = apic_read(APIC_LVTTHMR); 411} 412 413void intel_init_thermal(struct cpuinfo_x86 *c) 414{ 415 unsigned int cpu = smp_processor_id(); 416 int tm2 = 0; 417 u32 l, h; 418 419 if (!intel_thermal_supported(c)) 420 return; 421 422 /* 423 * First check if its enabled already, in which case there might 424 * be some SMM goo which handles it, so we can't even put a handler 425 * since it might be delivered via SMI already: 426 */ 427 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 428 429 h = lvtthmr_init; 430 /* 431 * The initial value of thermal LVT entries on all APs always reads 432 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI 433 * sequence to them and LVT registers are reset to 0s except for 434 * the mask bits which are set to 1s when APs receive INIT IPI. 435 * If BIOS takes over the thermal interrupt and sets its interrupt 436 * delivery mode to SMI (not fixed), it restores the value that the 437 * BIOS has programmed on AP based on BSP's info we saved since BIOS 438 * is always setting the same value for all threads/cores. 439 */ 440 if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) 441 apic_write(APIC_LVTTHMR, lvtthmr_init); 442 443 444 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { 445 printk(KERN_DEBUG 446 "CPU%d: Thermal monitoring handled by SMI\n", cpu); 447 return; 448 } 449 450 /* Check whether a vector already exists */ 451 if (h & APIC_VECTOR_MASK) { 452 printk(KERN_DEBUG 453 "CPU%d: Thermal LVT vector (%#x) already installed\n", 454 cpu, (h & APIC_VECTOR_MASK)); 455 return; 456 } 457 458 /* early Pentium M models use different method for enabling TM2 */ 459 if (cpu_has(c, X86_FEATURE_TM2)) { 460 if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) { 461 rdmsr(MSR_THERM2_CTL, l, h); 462 if (l & MSR_THERM2_CTL_TM_SELECT) 463 tm2 = 1; 464 } else if (l & MSR_IA32_MISC_ENABLE_TM2) 465 tm2 = 1; 466 } 467 468 /* We'll mask the thermal vector in the lapic till we're ready: */ 469 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; 470 apic_write(APIC_LVTTHMR, h); 471 472 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); 473 if (cpu_has(c, X86_FEATURE_PLN)) 474 wrmsr(MSR_IA32_THERM_INTERRUPT, 475 l | (THERM_INT_LOW_ENABLE 476 | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); 477 else 478 wrmsr(MSR_IA32_THERM_INTERRUPT, 479 l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); 480 481 if (cpu_has(c, X86_FEATURE_PTS)) { 482 rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 483 if (cpu_has(c, X86_FEATURE_PLN)) 484 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, 485 l | (PACKAGE_THERM_INT_LOW_ENABLE 486 | PACKAGE_THERM_INT_HIGH_ENABLE 487 | PACKAGE_THERM_INT_PLN_ENABLE), h); 488 else 489 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, 490 l | (PACKAGE_THERM_INT_LOW_ENABLE 491 | PACKAGE_THERM_INT_HIGH_ENABLE), h); 492 } 493 494 smp_thermal_vector = intel_thermal_interrupt; 495 496 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 497 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); 498 499 /* Unmask the thermal vector: */ 500 l = apic_read(APIC_LVTTHMR); 501 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 502 503 printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n", 504 tm2 ? "TM2" : "TM1"); 505 506 /* enable thermal throttle processing */ 507 atomic_set(&therm_throt_en, 1); 508} 509