cpufreq_ondemand.c revision 57df5573a56322e6895451f759c19e875252817d
1/* 2 * drivers/cpufreq/cpufreq_ondemand.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13#include <linux/kernel.h> 14#include <linux/module.h> 15#include <linux/init.h> 16#include <linux/cpufreq.h> 17#include <linux/cpu.h> 18#include <linux/jiffies.h> 19#include <linux/kernel_stat.h> 20#include <linux/mutex.h> 21#include <linux/hrtimer.h> 22#include <linux/tick.h> 23#include <linux/ktime.h> 24#include <linux/sched.h> 25 26/* 27 * dbs is used in this file as a shortform for demandbased switching 28 * It helps to keep variable names smaller, simpler 29 */ 30 31#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) 32#define DEF_FREQUENCY_UP_THRESHOLD (80) 33#define DEF_SAMPLING_DOWN_FACTOR (1) 34#define MAX_SAMPLING_DOWN_FACTOR (100000) 35#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) 36#define MICRO_FREQUENCY_UP_THRESHOLD (95) 37#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) 38#define MIN_FREQUENCY_UP_THRESHOLD (11) 39#define MAX_FREQUENCY_UP_THRESHOLD (100) 40 41/* 42 * The polling frequency of this governor depends on the capability of 43 * the processor. Default polling frequency is 1000 times the transition 44 * latency of the processor. The governor will work on any processor with 45 * transition latency <= 10mS, using appropriate sampling 46 * rate. 47 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) 48 * this governor will not work. 49 * All times here are in uS. 50 */ 51#define MIN_SAMPLING_RATE_RATIO (2) 52 53static unsigned int min_sampling_rate; 54 55#define LATENCY_MULTIPLIER (1000) 56#define MIN_LATENCY_MULTIPLIER (100) 57#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) 58 59static void do_dbs_timer(struct work_struct *work); 60static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 61 unsigned int event); 62 63#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 64static 65#endif 66struct cpufreq_governor cpufreq_gov_ondemand = { 67 .name = "ondemand", 68 .governor = cpufreq_governor_dbs, 69 .max_transition_latency = TRANSITION_LATENCY_LIMIT, 70 .owner = THIS_MODULE, 71}; 72 73/* Sampling types */ 74enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; 75 76struct cpu_dbs_info_s { 77 cputime64_t prev_cpu_idle; 78 cputime64_t prev_cpu_iowait; 79 cputime64_t prev_cpu_wall; 80 cputime64_t prev_cpu_nice; 81 struct cpufreq_policy *cur_policy; 82 struct delayed_work work; 83 struct cpufreq_frequency_table *freq_table; 84 unsigned int freq_lo; 85 unsigned int freq_lo_jiffies; 86 unsigned int freq_hi_jiffies; 87 unsigned int rate_mult; 88 int cpu; 89 unsigned int sample_type:1; 90 /* 91 * percpu mutex that serializes governor limit change with 92 * do_dbs_timer invocation. We do not want do_dbs_timer to run 93 * when user is changing the governor or limits. 94 */ 95 struct mutex timer_mutex; 96}; 97static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); 98 99static unsigned int dbs_enable; /* number of CPUs using this policy */ 100 101/* 102 * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on 103 * different CPUs. It protects dbs_enable in governor start/stop. 104 */ 105static DEFINE_MUTEX(dbs_mutex); 106 107static struct dbs_tuners { 108 unsigned int sampling_rate; 109 unsigned int up_threshold; 110 unsigned int down_differential; 111 unsigned int ignore_nice; 112 unsigned int sampling_down_factor; 113 unsigned int powersave_bias; 114 unsigned int io_is_busy; 115} dbs_tuners_ins = { 116 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 117 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 118 .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, 119 .ignore_nice = 0, 120 .powersave_bias = 0, 121}; 122 123static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, 124 cputime64_t *wall) 125{ 126 cputime64_t idle_time; 127 cputime64_t cur_wall_time; 128 cputime64_t busy_time; 129 130 cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); 131 busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, 132 kstat_cpu(cpu).cpustat.system); 133 134 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); 135 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); 136 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); 137 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); 138 139 idle_time = cputime64_sub(cur_wall_time, busy_time); 140 if (wall) 141 *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); 142 143 return (cputime64_t)jiffies_to_usecs(idle_time); 144} 145 146static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) 147{ 148 u64 idle_time = get_cpu_idle_time_us(cpu, wall); 149 150 if (idle_time == -1ULL) 151 return get_cpu_idle_time_jiffy(cpu, wall); 152 153 return idle_time; 154} 155 156static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall) 157{ 158 u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); 159 160 if (iowait_time == -1ULL) 161 return 0; 162 163 return iowait_time; 164} 165 166/* 167 * Find right freq to be set now with powersave_bias on. 168 * Returns the freq_hi to be used right now and will set freq_hi_jiffies, 169 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. 170 */ 171static unsigned int powersave_bias_target(struct cpufreq_policy *policy, 172 unsigned int freq_next, 173 unsigned int relation) 174{ 175 unsigned int freq_req, freq_reduc, freq_avg; 176 unsigned int freq_hi, freq_lo; 177 unsigned int index = 0; 178 unsigned int jiffies_total, jiffies_hi, jiffies_lo; 179 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 180 policy->cpu); 181 182 if (!dbs_info->freq_table) { 183 dbs_info->freq_lo = 0; 184 dbs_info->freq_lo_jiffies = 0; 185 return freq_next; 186 } 187 188 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, 189 relation, &index); 190 freq_req = dbs_info->freq_table[index].frequency; 191 freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; 192 freq_avg = freq_req - freq_reduc; 193 194 /* Find freq bounds for freq_avg in freq_table */ 195 index = 0; 196 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, 197 CPUFREQ_RELATION_H, &index); 198 freq_lo = dbs_info->freq_table[index].frequency; 199 index = 0; 200 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, 201 CPUFREQ_RELATION_L, &index); 202 freq_hi = dbs_info->freq_table[index].frequency; 203 204 /* Find out how long we have to be in hi and lo freqs */ 205 if (freq_hi == freq_lo) { 206 dbs_info->freq_lo = 0; 207 dbs_info->freq_lo_jiffies = 0; 208 return freq_lo; 209 } 210 jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 211 jiffies_hi = (freq_avg - freq_lo) * jiffies_total; 212 jiffies_hi += ((freq_hi - freq_lo) / 2); 213 jiffies_hi /= (freq_hi - freq_lo); 214 jiffies_lo = jiffies_total - jiffies_hi; 215 dbs_info->freq_lo = freq_lo; 216 dbs_info->freq_lo_jiffies = jiffies_lo; 217 dbs_info->freq_hi_jiffies = jiffies_hi; 218 return freq_hi; 219} 220 221static void ondemand_powersave_bias_init_cpu(int cpu) 222{ 223 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 224 dbs_info->freq_table = cpufreq_frequency_get_table(cpu); 225 dbs_info->freq_lo = 0; 226} 227 228static void ondemand_powersave_bias_init(void) 229{ 230 int i; 231 for_each_online_cpu(i) { 232 ondemand_powersave_bias_init_cpu(i); 233 } 234} 235 236/************************** sysfs interface ************************/ 237 238static ssize_t show_sampling_rate_max(struct kobject *kobj, 239 struct attribute *attr, char *buf) 240{ 241 printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max " 242 "sysfs file is deprecated - used by: %s\n", current->comm); 243 return sprintf(buf, "%u\n", -1U); 244} 245 246static ssize_t show_sampling_rate_min(struct kobject *kobj, 247 struct attribute *attr, char *buf) 248{ 249 return sprintf(buf, "%u\n", min_sampling_rate); 250} 251 252define_one_global_ro(sampling_rate_max); 253define_one_global_ro(sampling_rate_min); 254 255/* cpufreq_ondemand Governor Tunables */ 256#define show_one(file_name, object) \ 257static ssize_t show_##file_name \ 258(struct kobject *kobj, struct attribute *attr, char *buf) \ 259{ \ 260 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 261} 262show_one(sampling_rate, sampling_rate); 263show_one(io_is_busy, io_is_busy); 264show_one(up_threshold, up_threshold); 265show_one(sampling_down_factor, sampling_down_factor); 266show_one(ignore_nice_load, ignore_nice); 267show_one(powersave_bias, powersave_bias); 268 269/*** delete after deprecation time ***/ 270 271#define DEPRECATION_MSG(file_name) \ 272 printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ 273 "interface is deprecated - " #file_name "\n"); 274 275#define show_one_old(file_name) \ 276static ssize_t show_##file_name##_old \ 277(struct cpufreq_policy *unused, char *buf) \ 278{ \ 279 printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ 280 "interface is deprecated - " #file_name "\n"); \ 281 return show_##file_name(NULL, NULL, buf); \ 282} 283show_one_old(sampling_rate); 284show_one_old(up_threshold); 285show_one_old(ignore_nice_load); 286show_one_old(powersave_bias); 287show_one_old(sampling_rate_min); 288show_one_old(sampling_rate_max); 289 290cpufreq_freq_attr_ro_old(sampling_rate_min); 291cpufreq_freq_attr_ro_old(sampling_rate_max); 292 293/*** delete after deprecation time ***/ 294 295static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, 296 const char *buf, size_t count) 297{ 298 unsigned int input; 299 int ret; 300 ret = sscanf(buf, "%u", &input); 301 if (ret != 1) 302 return -EINVAL; 303 304 mutex_lock(&dbs_mutex); 305 dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); 306 mutex_unlock(&dbs_mutex); 307 308 return count; 309} 310 311static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, 312 const char *buf, size_t count) 313{ 314 unsigned int input; 315 int ret; 316 317 ret = sscanf(buf, "%u", &input); 318 if (ret != 1) 319 return -EINVAL; 320 321 mutex_lock(&dbs_mutex); 322 dbs_tuners_ins.io_is_busy = !!input; 323 mutex_unlock(&dbs_mutex); 324 325 return count; 326} 327 328static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, 329 const char *buf, size_t count) 330{ 331 unsigned int input; 332 int ret; 333 ret = sscanf(buf, "%u", &input); 334 335 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || 336 input < MIN_FREQUENCY_UP_THRESHOLD) { 337 return -EINVAL; 338 } 339 340 mutex_lock(&dbs_mutex); 341 dbs_tuners_ins.up_threshold = input; 342 mutex_unlock(&dbs_mutex); 343 344 return count; 345} 346 347static ssize_t store_sampling_down_factor(struct kobject *a, 348 struct attribute *b, const char *buf, size_t count) 349{ 350 unsigned int input, j; 351 int ret; 352 ret = sscanf(buf, "%u", &input); 353 354 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 355 return -EINVAL; 356 mutex_lock(&dbs_mutex); 357 dbs_tuners_ins.sampling_down_factor = input; 358 359 /* Reset down sampling multiplier in case it was active */ 360 for_each_online_cpu(j) { 361 struct cpu_dbs_info_s *dbs_info; 362 dbs_info = &per_cpu(od_cpu_dbs_info, j); 363 dbs_info->rate_mult = 1; 364 } 365 mutex_unlock(&dbs_mutex); 366 367 return count; 368} 369 370static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, 371 const char *buf, size_t count) 372{ 373 unsigned int input; 374 int ret; 375 376 unsigned int j; 377 378 ret = sscanf(buf, "%u", &input); 379 if (ret != 1) 380 return -EINVAL; 381 382 if (input > 1) 383 input = 1; 384 385 mutex_lock(&dbs_mutex); 386 if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ 387 mutex_unlock(&dbs_mutex); 388 return count; 389 } 390 dbs_tuners_ins.ignore_nice = input; 391 392 /* we need to re-evaluate prev_cpu_idle */ 393 for_each_online_cpu(j) { 394 struct cpu_dbs_info_s *dbs_info; 395 dbs_info = &per_cpu(od_cpu_dbs_info, j); 396 dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 397 &dbs_info->prev_cpu_wall); 398 if (dbs_tuners_ins.ignore_nice) 399 dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 400 401 } 402 mutex_unlock(&dbs_mutex); 403 404 return count; 405} 406 407static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, 408 const char *buf, size_t count) 409{ 410 unsigned int input; 411 int ret; 412 ret = sscanf(buf, "%u", &input); 413 414 if (ret != 1) 415 return -EINVAL; 416 417 if (input > 1000) 418 input = 1000; 419 420 mutex_lock(&dbs_mutex); 421 dbs_tuners_ins.powersave_bias = input; 422 ondemand_powersave_bias_init(); 423 mutex_unlock(&dbs_mutex); 424 425 return count; 426} 427 428define_one_global_rw(sampling_rate); 429define_one_global_rw(io_is_busy); 430define_one_global_rw(up_threshold); 431define_one_global_rw(sampling_down_factor); 432define_one_global_rw(ignore_nice_load); 433define_one_global_rw(powersave_bias); 434 435static struct attribute *dbs_attributes[] = { 436 &sampling_rate_max.attr, 437 &sampling_rate_min.attr, 438 &sampling_rate.attr, 439 &up_threshold.attr, 440 &sampling_down_factor.attr, 441 &ignore_nice_load.attr, 442 &powersave_bias.attr, 443 &io_is_busy.attr, 444 NULL 445}; 446 447static struct attribute_group dbs_attr_group = { 448 .attrs = dbs_attributes, 449 .name = "ondemand", 450}; 451 452/*** delete after deprecation time ***/ 453 454#define write_one_old(file_name) \ 455static ssize_t store_##file_name##_old \ 456(struct cpufreq_policy *unused, const char *buf, size_t count) \ 457{ \ 458 printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ 459 "interface is deprecated - " #file_name "\n"); \ 460 return store_##file_name(NULL, NULL, buf, count); \ 461} 462write_one_old(sampling_rate); 463write_one_old(up_threshold); 464write_one_old(ignore_nice_load); 465write_one_old(powersave_bias); 466 467cpufreq_freq_attr_rw_old(sampling_rate); 468cpufreq_freq_attr_rw_old(up_threshold); 469cpufreq_freq_attr_rw_old(ignore_nice_load); 470cpufreq_freq_attr_rw_old(powersave_bias); 471 472static struct attribute *dbs_attributes_old[] = { 473 &sampling_rate_max_old.attr, 474 &sampling_rate_min_old.attr, 475 &sampling_rate_old.attr, 476 &up_threshold_old.attr, 477 &ignore_nice_load_old.attr, 478 &powersave_bias_old.attr, 479 NULL 480}; 481 482static struct attribute_group dbs_attr_group_old = { 483 .attrs = dbs_attributes_old, 484 .name = "ondemand", 485}; 486 487/*** delete after deprecation time ***/ 488 489/************************** sysfs end ************************/ 490 491static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) 492{ 493 if (dbs_tuners_ins.powersave_bias) 494 freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H); 495 else if (p->cur == p->max) 496 return; 497 498 __cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ? 499 CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); 500} 501 502static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) 503{ 504 unsigned int max_load_freq; 505 506 struct cpufreq_policy *policy; 507 unsigned int j; 508 509 this_dbs_info->freq_lo = 0; 510 policy = this_dbs_info->cur_policy; 511 512 /* 513 * Every sampling_rate, we check, if current idle time is less 514 * than 20% (default), then we try to increase frequency 515 * Every sampling_rate, we look for a the lowest 516 * frequency which can sustain the load while keeping idle time over 517 * 30%. If such a frequency exist, we try to decrease to this frequency. 518 * 519 * Any frequency increase takes it to the maximum frequency. 520 * Frequency reduction happens at minimum steps of 521 * 5% (default) of current frequency 522 */ 523 524 /* Get Absolute Load - in terms of freq */ 525 max_load_freq = 0; 526 527 for_each_cpu(j, policy->cpus) { 528 struct cpu_dbs_info_s *j_dbs_info; 529 cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; 530 unsigned int idle_time, wall_time, iowait_time; 531 unsigned int load, load_freq; 532 int freq_avg; 533 534 j_dbs_info = &per_cpu(od_cpu_dbs_info, j); 535 536 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); 537 cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); 538 539 wall_time = (unsigned int) cputime64_sub(cur_wall_time, 540 j_dbs_info->prev_cpu_wall); 541 j_dbs_info->prev_cpu_wall = cur_wall_time; 542 543 idle_time = (unsigned int) cputime64_sub(cur_idle_time, 544 j_dbs_info->prev_cpu_idle); 545 j_dbs_info->prev_cpu_idle = cur_idle_time; 546 547 iowait_time = (unsigned int) cputime64_sub(cur_iowait_time, 548 j_dbs_info->prev_cpu_iowait); 549 j_dbs_info->prev_cpu_iowait = cur_iowait_time; 550 551 if (dbs_tuners_ins.ignore_nice) { 552 cputime64_t cur_nice; 553 unsigned long cur_nice_jiffies; 554 555 cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, 556 j_dbs_info->prev_cpu_nice); 557 /* 558 * Assumption: nice time between sampling periods will 559 * be less than 2^32 jiffies for 32 bit sys 560 */ 561 cur_nice_jiffies = (unsigned long) 562 cputime64_to_jiffies64(cur_nice); 563 564 j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 565 idle_time += jiffies_to_usecs(cur_nice_jiffies); 566 } 567 568 /* 569 * For the purpose of ondemand, waiting for disk IO is an 570 * indication that you're performance critical, and not that 571 * the system is actually idle. So subtract the iowait time 572 * from the cpu idle time. 573 */ 574 575 if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time) 576 idle_time -= iowait_time; 577 578 if (unlikely(!wall_time || wall_time < idle_time)) 579 continue; 580 581 load = 100 * (wall_time - idle_time) / wall_time; 582 583 freq_avg = __cpufreq_driver_getavg(policy, j); 584 if (freq_avg <= 0) 585 freq_avg = policy->cur; 586 587 load_freq = load * freq_avg; 588 if (load_freq > max_load_freq) 589 max_load_freq = load_freq; 590 } 591 592 /* Check for frequency increase */ 593 if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { 594 /* If switching to max speed, apply sampling_down_factor */ 595 if (policy->cur < policy->max) 596 this_dbs_info->rate_mult = 597 dbs_tuners_ins.sampling_down_factor; 598 dbs_freq_increase(policy, policy->max); 599 return; 600 } 601 602 /* Check for frequency decrease */ 603 /* if we cannot reduce the frequency anymore, break out early */ 604 if (policy->cur == policy->min) 605 return; 606 607 /* 608 * The optimal frequency is the frequency that is the lowest that 609 * can support the current CPU usage without triggering the up 610 * policy. To be safe, we focus 10 points under the threshold. 611 */ 612 if (max_load_freq < 613 (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * 614 policy->cur) { 615 unsigned int freq_next; 616 freq_next = max_load_freq / 617 (dbs_tuners_ins.up_threshold - 618 dbs_tuners_ins.down_differential); 619 620 /* No longer fully busy, reset rate_mult */ 621 this_dbs_info->rate_mult = 1; 622 623 if (freq_next < policy->min) 624 freq_next = policy->min; 625 626 if (!dbs_tuners_ins.powersave_bias) { 627 __cpufreq_driver_target(policy, freq_next, 628 CPUFREQ_RELATION_L); 629 } else { 630 int freq = powersave_bias_target(policy, freq_next, 631 CPUFREQ_RELATION_L); 632 __cpufreq_driver_target(policy, freq, 633 CPUFREQ_RELATION_L); 634 } 635 } 636} 637 638static void do_dbs_timer(struct work_struct *work) 639{ 640 struct cpu_dbs_info_s *dbs_info = 641 container_of(work, struct cpu_dbs_info_s, work.work); 642 unsigned int cpu = dbs_info->cpu; 643 int sample_type = dbs_info->sample_type; 644 645 /* We want all CPUs to do sampling nearly on same jiffy */ 646 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate 647 * dbs_info->rate_mult); 648 649 if (num_online_cpus() > 1) 650 delay -= jiffies % delay; 651 652 mutex_lock(&dbs_info->timer_mutex); 653 654 /* Common NORMAL_SAMPLE setup */ 655 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 656 if (!dbs_tuners_ins.powersave_bias || 657 sample_type == DBS_NORMAL_SAMPLE) { 658 dbs_check_cpu(dbs_info); 659 if (dbs_info->freq_lo) { 660 /* Setup timer for SUB_SAMPLE */ 661 dbs_info->sample_type = DBS_SUB_SAMPLE; 662 delay = dbs_info->freq_hi_jiffies; 663 } 664 } else { 665 __cpufreq_driver_target(dbs_info->cur_policy, 666 dbs_info->freq_lo, CPUFREQ_RELATION_H); 667 } 668 schedule_delayed_work_on(cpu, &dbs_info->work, delay); 669 mutex_unlock(&dbs_info->timer_mutex); 670} 671 672static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) 673{ 674 /* We want all CPUs to do sampling nearly on same jiffy */ 675 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 676 677 if (num_online_cpus() > 1) 678 delay -= jiffies % delay; 679 680 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 681 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); 682 schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); 683} 684 685static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 686{ 687 cancel_delayed_work_sync(&dbs_info->work); 688} 689 690/* 691 * Not all CPUs want IO time to be accounted as busy; this dependson how 692 * efficient idling at a higher frequency/voltage is. 693 * Pavel Machek says this is not so for various generations of AMD and old 694 * Intel systems. 695 * Mike Chan (androidlcom) calis this is also not true for ARM. 696 * Because of this, whitelist specific known (series) of CPUs by default, and 697 * leave all others up to the user. 698 */ 699static int should_io_be_busy(void) 700{ 701#if defined(CONFIG_X86) 702 /* 703 * For Intel, Core 2 (model 15) andl later have an efficient idle. 704 */ 705 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 706 boot_cpu_data.x86 == 6 && 707 boot_cpu_data.x86_model >= 15) 708 return 1; 709#endif 710 return 0; 711} 712 713static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 714 unsigned int event) 715{ 716 unsigned int cpu = policy->cpu; 717 struct cpu_dbs_info_s *this_dbs_info; 718 unsigned int j; 719 int rc; 720 721 this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 722 723 switch (event) { 724 case CPUFREQ_GOV_START: 725 if ((!cpu_online(cpu)) || (!policy->cur)) 726 return -EINVAL; 727 728 mutex_lock(&dbs_mutex); 729 730 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group_old); 731 if (rc) { 732 mutex_unlock(&dbs_mutex); 733 return rc; 734 } 735 736 dbs_enable++; 737 for_each_cpu(j, policy->cpus) { 738 struct cpu_dbs_info_s *j_dbs_info; 739 j_dbs_info = &per_cpu(od_cpu_dbs_info, j); 740 j_dbs_info->cur_policy = policy; 741 742 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 743 &j_dbs_info->prev_cpu_wall); 744 if (dbs_tuners_ins.ignore_nice) { 745 j_dbs_info->prev_cpu_nice = 746 kstat_cpu(j).cpustat.nice; 747 } 748 } 749 this_dbs_info->cpu = cpu; 750 this_dbs_info->rate_mult = 1; 751 ondemand_powersave_bias_init_cpu(cpu); 752 /* 753 * Start the timerschedule work, when this governor 754 * is used for first time 755 */ 756 if (dbs_enable == 1) { 757 unsigned int latency; 758 759 rc = sysfs_create_group(cpufreq_global_kobject, 760 &dbs_attr_group); 761 if (rc) { 762 mutex_unlock(&dbs_mutex); 763 return rc; 764 } 765 766 /* policy latency is in nS. Convert it to uS first */ 767 latency = policy->cpuinfo.transition_latency / 1000; 768 if (latency == 0) 769 latency = 1; 770 /* Bring kernel and HW constraints together */ 771 min_sampling_rate = max(min_sampling_rate, 772 MIN_LATENCY_MULTIPLIER * latency); 773 dbs_tuners_ins.sampling_rate = 774 max(min_sampling_rate, 775 latency * LATENCY_MULTIPLIER); 776 dbs_tuners_ins.io_is_busy = should_io_be_busy(); 777 } 778 mutex_unlock(&dbs_mutex); 779 780 mutex_init(&this_dbs_info->timer_mutex); 781 dbs_timer_init(this_dbs_info); 782 break; 783 784 case CPUFREQ_GOV_STOP: 785 dbs_timer_exit(this_dbs_info); 786 787 mutex_lock(&dbs_mutex); 788 sysfs_remove_group(&policy->kobj, &dbs_attr_group_old); 789 mutex_destroy(&this_dbs_info->timer_mutex); 790 dbs_enable--; 791 mutex_unlock(&dbs_mutex); 792 if (!dbs_enable) 793 sysfs_remove_group(cpufreq_global_kobject, 794 &dbs_attr_group); 795 796 break; 797 798 case CPUFREQ_GOV_LIMITS: 799 mutex_lock(&this_dbs_info->timer_mutex); 800 if (policy->max < this_dbs_info->cur_policy->cur) 801 __cpufreq_driver_target(this_dbs_info->cur_policy, 802 policy->max, CPUFREQ_RELATION_H); 803 else if (policy->min > this_dbs_info->cur_policy->cur) 804 __cpufreq_driver_target(this_dbs_info->cur_policy, 805 policy->min, CPUFREQ_RELATION_L); 806 mutex_unlock(&this_dbs_info->timer_mutex); 807 break; 808 } 809 return 0; 810} 811 812static int __init cpufreq_gov_dbs_init(void) 813{ 814 cputime64_t wall; 815 u64 idle_time; 816 int cpu = get_cpu(); 817 818 idle_time = get_cpu_idle_time_us(cpu, &wall); 819 put_cpu(); 820 if (idle_time != -1ULL) { 821 /* Idle micro accounting is supported. Use finer thresholds */ 822 dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; 823 dbs_tuners_ins.down_differential = 824 MICRO_FREQUENCY_DOWN_DIFFERENTIAL; 825 /* 826 * In no_hz/micro accounting case we set the minimum frequency 827 * not depending on HZ, but fixed (very low). The deferred 828 * timer might skip some samples if idle/sleeping as needed. 829 */ 830 min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; 831 } else { 832 /* For correct statistics, we need 10 ticks for each measure */ 833 min_sampling_rate = 834 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); 835 } 836 837 return cpufreq_register_governor(&cpufreq_gov_ondemand); 838} 839 840static void __exit cpufreq_gov_dbs_exit(void) 841{ 842 cpufreq_unregister_governor(&cpufreq_gov_ondemand); 843} 844 845 846MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); 847MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>"); 848MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " 849 "Low Latency Frequency Transition capable processors"); 850MODULE_LICENSE("GPL"); 851 852#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 853fs_initcall(cpufreq_gov_dbs_init); 854#else 855module_init(cpufreq_gov_dbs_init); 856#endif 857module_exit(cpufreq_gov_dbs_exit); 858