cpufreq_ondemand.c revision 6b8fcd9029f217a9ecce822db645e19111c11080
1/* 2 * drivers/cpufreq/cpufreq_ondemand.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13#include <linux/kernel.h> 14#include <linux/module.h> 15#include <linux/init.h> 16#include <linux/cpufreq.h> 17#include <linux/cpu.h> 18#include <linux/jiffies.h> 19#include <linux/kernel_stat.h> 20#include <linux/mutex.h> 21#include <linux/hrtimer.h> 22#include <linux/tick.h> 23#include <linux/ktime.h> 24#include <linux/sched.h> 25 26/* 27 * dbs is used in this file as a shortform for demandbased switching 28 * It helps to keep variable names smaller, simpler 29 */ 30 31#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) 32#define DEF_FREQUENCY_UP_THRESHOLD (80) 33#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) 34#define MICRO_FREQUENCY_UP_THRESHOLD (95) 35#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) 36#define MIN_FREQUENCY_UP_THRESHOLD (11) 37#define MAX_FREQUENCY_UP_THRESHOLD (100) 38 39/* 40 * The polling frequency of this governor depends on the capability of 41 * the processor. Default polling frequency is 1000 times the transition 42 * latency of the processor. The governor will work on any processor with 43 * transition latency <= 10mS, using appropriate sampling 44 * rate. 45 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) 46 * this governor will not work. 47 * All times here are in uS. 48 */ 49#define MIN_SAMPLING_RATE_RATIO (2) 50 51static unsigned int min_sampling_rate; 52 53#define LATENCY_MULTIPLIER (1000) 54#define MIN_LATENCY_MULTIPLIER (100) 55#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) 56 57static void do_dbs_timer(struct work_struct *work); 58static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 59 unsigned int event); 60 61#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 62static 63#endif 64struct cpufreq_governor cpufreq_gov_ondemand = { 65 .name = "ondemand", 66 .governor = cpufreq_governor_dbs, 67 .max_transition_latency = TRANSITION_LATENCY_LIMIT, 68 .owner = THIS_MODULE, 69}; 70 71/* Sampling types */ 72enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; 73 74struct cpu_dbs_info_s { 75 cputime64_t prev_cpu_idle; 76 cputime64_t prev_cpu_iowait; 77 cputime64_t prev_cpu_wall; 78 cputime64_t prev_cpu_nice; 79 struct cpufreq_policy *cur_policy; 80 struct delayed_work work; 81 struct cpufreq_frequency_table *freq_table; 82 unsigned int freq_lo; 83 unsigned int freq_lo_jiffies; 84 unsigned int freq_hi_jiffies; 85 int cpu; 86 unsigned int sample_type:1; 87 /* 88 * percpu mutex that serializes governor limit change with 89 * do_dbs_timer invocation. We do not want do_dbs_timer to run 90 * when user is changing the governor or limits. 91 */ 92 struct mutex timer_mutex; 93}; 94static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); 95 96static unsigned int dbs_enable; /* number of CPUs using this policy */ 97 98/* 99 * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on 100 * different CPUs. It protects dbs_enable in governor start/stop. 101 */ 102static DEFINE_MUTEX(dbs_mutex); 103 104static struct workqueue_struct *kondemand_wq; 105 106static struct dbs_tuners { 107 unsigned int sampling_rate; 108 unsigned int up_threshold; 109 unsigned int down_differential; 110 unsigned int ignore_nice; 111 unsigned int powersave_bias; 112} dbs_tuners_ins = { 113 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 114 .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, 115 .ignore_nice = 0, 116 .powersave_bias = 0, 117}; 118 119static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, 120 cputime64_t *wall) 121{ 122 cputime64_t idle_time; 123 cputime64_t cur_wall_time; 124 cputime64_t busy_time; 125 126 cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); 127 busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, 128 kstat_cpu(cpu).cpustat.system); 129 130 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); 131 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); 132 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); 133 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); 134 135 idle_time = cputime64_sub(cur_wall_time, busy_time); 136 if (wall) 137 *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); 138 139 return (cputime64_t)jiffies_to_usecs(idle_time); 140} 141 142static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) 143{ 144 u64 idle_time = get_cpu_idle_time_us(cpu, wall); 145 146 if (idle_time == -1ULL) 147 return get_cpu_idle_time_jiffy(cpu, wall); 148 149 return idle_time; 150} 151 152static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall) 153{ 154 u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); 155 156 if (iowait_time == -1ULL) 157 return 0; 158 159 return iowait_time; 160} 161 162/* 163 * Find right freq to be set now with powersave_bias on. 164 * Returns the freq_hi to be used right now and will set freq_hi_jiffies, 165 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. 166 */ 167static unsigned int powersave_bias_target(struct cpufreq_policy *policy, 168 unsigned int freq_next, 169 unsigned int relation) 170{ 171 unsigned int freq_req, freq_reduc, freq_avg; 172 unsigned int freq_hi, freq_lo; 173 unsigned int index = 0; 174 unsigned int jiffies_total, jiffies_hi, jiffies_lo; 175 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 176 policy->cpu); 177 178 if (!dbs_info->freq_table) { 179 dbs_info->freq_lo = 0; 180 dbs_info->freq_lo_jiffies = 0; 181 return freq_next; 182 } 183 184 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, 185 relation, &index); 186 freq_req = dbs_info->freq_table[index].frequency; 187 freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; 188 freq_avg = freq_req - freq_reduc; 189 190 /* Find freq bounds for freq_avg in freq_table */ 191 index = 0; 192 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, 193 CPUFREQ_RELATION_H, &index); 194 freq_lo = dbs_info->freq_table[index].frequency; 195 index = 0; 196 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, 197 CPUFREQ_RELATION_L, &index); 198 freq_hi = dbs_info->freq_table[index].frequency; 199 200 /* Find out how long we have to be in hi and lo freqs */ 201 if (freq_hi == freq_lo) { 202 dbs_info->freq_lo = 0; 203 dbs_info->freq_lo_jiffies = 0; 204 return freq_lo; 205 } 206 jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 207 jiffies_hi = (freq_avg - freq_lo) * jiffies_total; 208 jiffies_hi += ((freq_hi - freq_lo) / 2); 209 jiffies_hi /= (freq_hi - freq_lo); 210 jiffies_lo = jiffies_total - jiffies_hi; 211 dbs_info->freq_lo = freq_lo; 212 dbs_info->freq_lo_jiffies = jiffies_lo; 213 dbs_info->freq_hi_jiffies = jiffies_hi; 214 return freq_hi; 215} 216 217static void ondemand_powersave_bias_init_cpu(int cpu) 218{ 219 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 220 dbs_info->freq_table = cpufreq_frequency_get_table(cpu); 221 dbs_info->freq_lo = 0; 222} 223 224static void ondemand_powersave_bias_init(void) 225{ 226 int i; 227 for_each_online_cpu(i) { 228 ondemand_powersave_bias_init_cpu(i); 229 } 230} 231 232/************************** sysfs interface ************************/ 233 234static ssize_t show_sampling_rate_max(struct kobject *kobj, 235 struct attribute *attr, char *buf) 236{ 237 printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max " 238 "sysfs file is deprecated - used by: %s\n", current->comm); 239 return sprintf(buf, "%u\n", -1U); 240} 241 242static ssize_t show_sampling_rate_min(struct kobject *kobj, 243 struct attribute *attr, char *buf) 244{ 245 return sprintf(buf, "%u\n", min_sampling_rate); 246} 247 248#define define_one_ro(_name) \ 249static struct global_attr _name = \ 250__ATTR(_name, 0444, show_##_name, NULL) 251 252define_one_ro(sampling_rate_max); 253define_one_ro(sampling_rate_min); 254 255/* cpufreq_ondemand Governor Tunables */ 256#define show_one(file_name, object) \ 257static ssize_t show_##file_name \ 258(struct kobject *kobj, struct attribute *attr, char *buf) \ 259{ \ 260 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 261} 262show_one(sampling_rate, sampling_rate); 263show_one(up_threshold, up_threshold); 264show_one(ignore_nice_load, ignore_nice); 265show_one(powersave_bias, powersave_bias); 266 267/*** delete after deprecation time ***/ 268 269#define DEPRECATION_MSG(file_name) \ 270 printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ 271 "interface is deprecated - " #file_name "\n"); 272 273#define show_one_old(file_name) \ 274static ssize_t show_##file_name##_old \ 275(struct cpufreq_policy *unused, char *buf) \ 276{ \ 277 printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ 278 "interface is deprecated - " #file_name "\n"); \ 279 return show_##file_name(NULL, NULL, buf); \ 280} 281show_one_old(sampling_rate); 282show_one_old(up_threshold); 283show_one_old(ignore_nice_load); 284show_one_old(powersave_bias); 285show_one_old(sampling_rate_min); 286show_one_old(sampling_rate_max); 287 288#define define_one_ro_old(object, _name) \ 289static struct freq_attr object = \ 290__ATTR(_name, 0444, show_##_name##_old, NULL) 291 292define_one_ro_old(sampling_rate_min_old, sampling_rate_min); 293define_one_ro_old(sampling_rate_max_old, sampling_rate_max); 294 295/*** delete after deprecation time ***/ 296 297static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, 298 const char *buf, size_t count) 299{ 300 unsigned int input; 301 int ret; 302 ret = sscanf(buf, "%u", &input); 303 if (ret != 1) 304 return -EINVAL; 305 306 mutex_lock(&dbs_mutex); 307 dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); 308 mutex_unlock(&dbs_mutex); 309 310 return count; 311} 312 313static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, 314 const char *buf, size_t count) 315{ 316 unsigned int input; 317 int ret; 318 ret = sscanf(buf, "%u", &input); 319 320 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || 321 input < MIN_FREQUENCY_UP_THRESHOLD) { 322 return -EINVAL; 323 } 324 325 mutex_lock(&dbs_mutex); 326 dbs_tuners_ins.up_threshold = input; 327 mutex_unlock(&dbs_mutex); 328 329 return count; 330} 331 332static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, 333 const char *buf, size_t count) 334{ 335 unsigned int input; 336 int ret; 337 338 unsigned int j; 339 340 ret = sscanf(buf, "%u", &input); 341 if (ret != 1) 342 return -EINVAL; 343 344 if (input > 1) 345 input = 1; 346 347 mutex_lock(&dbs_mutex); 348 if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ 349 mutex_unlock(&dbs_mutex); 350 return count; 351 } 352 dbs_tuners_ins.ignore_nice = input; 353 354 /* we need to re-evaluate prev_cpu_idle */ 355 for_each_online_cpu(j) { 356 struct cpu_dbs_info_s *dbs_info; 357 dbs_info = &per_cpu(od_cpu_dbs_info, j); 358 dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 359 &dbs_info->prev_cpu_wall); 360 if (dbs_tuners_ins.ignore_nice) 361 dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 362 363 } 364 mutex_unlock(&dbs_mutex); 365 366 return count; 367} 368 369static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, 370 const char *buf, size_t count) 371{ 372 unsigned int input; 373 int ret; 374 ret = sscanf(buf, "%u", &input); 375 376 if (ret != 1) 377 return -EINVAL; 378 379 if (input > 1000) 380 input = 1000; 381 382 mutex_lock(&dbs_mutex); 383 dbs_tuners_ins.powersave_bias = input; 384 ondemand_powersave_bias_init(); 385 mutex_unlock(&dbs_mutex); 386 387 return count; 388} 389 390#define define_one_rw(_name) \ 391static struct global_attr _name = \ 392__ATTR(_name, 0644, show_##_name, store_##_name) 393 394define_one_rw(sampling_rate); 395define_one_rw(up_threshold); 396define_one_rw(ignore_nice_load); 397define_one_rw(powersave_bias); 398 399static struct attribute *dbs_attributes[] = { 400 &sampling_rate_max.attr, 401 &sampling_rate_min.attr, 402 &sampling_rate.attr, 403 &up_threshold.attr, 404 &ignore_nice_load.attr, 405 &powersave_bias.attr, 406 NULL 407}; 408 409static struct attribute_group dbs_attr_group = { 410 .attrs = dbs_attributes, 411 .name = "ondemand", 412}; 413 414/*** delete after deprecation time ***/ 415 416#define write_one_old(file_name) \ 417static ssize_t store_##file_name##_old \ 418(struct cpufreq_policy *unused, const char *buf, size_t count) \ 419{ \ 420 printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ 421 "interface is deprecated - " #file_name "\n"); \ 422 return store_##file_name(NULL, NULL, buf, count); \ 423} 424write_one_old(sampling_rate); 425write_one_old(up_threshold); 426write_one_old(ignore_nice_load); 427write_one_old(powersave_bias); 428 429#define define_one_rw_old(object, _name) \ 430static struct freq_attr object = \ 431__ATTR(_name, 0644, show_##_name##_old, store_##_name##_old) 432 433define_one_rw_old(sampling_rate_old, sampling_rate); 434define_one_rw_old(up_threshold_old, up_threshold); 435define_one_rw_old(ignore_nice_load_old, ignore_nice_load); 436define_one_rw_old(powersave_bias_old, powersave_bias); 437 438static struct attribute *dbs_attributes_old[] = { 439 &sampling_rate_max_old.attr, 440 &sampling_rate_min_old.attr, 441 &sampling_rate_old.attr, 442 &up_threshold_old.attr, 443 &ignore_nice_load_old.attr, 444 &powersave_bias_old.attr, 445 NULL 446}; 447 448static struct attribute_group dbs_attr_group_old = { 449 .attrs = dbs_attributes_old, 450 .name = "ondemand", 451}; 452 453/*** delete after deprecation time ***/ 454 455/************************** sysfs end ************************/ 456 457static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) 458{ 459 unsigned int max_load_freq; 460 461 struct cpufreq_policy *policy; 462 unsigned int j; 463 464 this_dbs_info->freq_lo = 0; 465 policy = this_dbs_info->cur_policy; 466 467 /* 468 * Every sampling_rate, we check, if current idle time is less 469 * than 20% (default), then we try to increase frequency 470 * Every sampling_rate, we look for a the lowest 471 * frequency which can sustain the load while keeping idle time over 472 * 30%. If such a frequency exist, we try to decrease to this frequency. 473 * 474 * Any frequency increase takes it to the maximum frequency. 475 * Frequency reduction happens at minimum steps of 476 * 5% (default) of current frequency 477 */ 478 479 /* Get Absolute Load - in terms of freq */ 480 max_load_freq = 0; 481 482 for_each_cpu(j, policy->cpus) { 483 struct cpu_dbs_info_s *j_dbs_info; 484 cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; 485 unsigned int idle_time, wall_time, iowait_time; 486 unsigned int load, load_freq; 487 int freq_avg; 488 489 j_dbs_info = &per_cpu(od_cpu_dbs_info, j); 490 491 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); 492 cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); 493 494 wall_time = (unsigned int) cputime64_sub(cur_wall_time, 495 j_dbs_info->prev_cpu_wall); 496 j_dbs_info->prev_cpu_wall = cur_wall_time; 497 498 idle_time = (unsigned int) cputime64_sub(cur_idle_time, 499 j_dbs_info->prev_cpu_idle); 500 j_dbs_info->prev_cpu_idle = cur_idle_time; 501 502 iowait_time = (unsigned int) cputime64_sub(cur_iowait_time, 503 j_dbs_info->prev_cpu_iowait); 504 j_dbs_info->prev_cpu_iowait = cur_iowait_time; 505 506 if (dbs_tuners_ins.ignore_nice) { 507 cputime64_t cur_nice; 508 unsigned long cur_nice_jiffies; 509 510 cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, 511 j_dbs_info->prev_cpu_nice); 512 /* 513 * Assumption: nice time between sampling periods will 514 * be less than 2^32 jiffies for 32 bit sys 515 */ 516 cur_nice_jiffies = (unsigned long) 517 cputime64_to_jiffies64(cur_nice); 518 519 j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 520 idle_time += jiffies_to_usecs(cur_nice_jiffies); 521 } 522 523 /* 524 * For the purpose of ondemand, waiting for disk IO is an 525 * indication that you're performance critical, and not that 526 * the system is actually idle. So subtract the iowait time 527 * from the cpu idle time. 528 */ 529 530 if (idle_time >= iowait_time) 531 idle_time -= iowait_time; 532 533 if (unlikely(!wall_time || wall_time < idle_time)) 534 continue; 535 536 load = 100 * (wall_time - idle_time) / wall_time; 537 538 freq_avg = __cpufreq_driver_getavg(policy, j); 539 if (freq_avg <= 0) 540 freq_avg = policy->cur; 541 542 load_freq = load * freq_avg; 543 if (load_freq > max_load_freq) 544 max_load_freq = load_freq; 545 } 546 547 /* Check for frequency increase */ 548 if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { 549 /* if we are already at full speed then break out early */ 550 if (!dbs_tuners_ins.powersave_bias) { 551 if (policy->cur == policy->max) 552 return; 553 554 __cpufreq_driver_target(policy, policy->max, 555 CPUFREQ_RELATION_H); 556 } else { 557 int freq = powersave_bias_target(policy, policy->max, 558 CPUFREQ_RELATION_H); 559 __cpufreq_driver_target(policy, freq, 560 CPUFREQ_RELATION_L); 561 } 562 return; 563 } 564 565 /* Check for frequency decrease */ 566 /* if we cannot reduce the frequency anymore, break out early */ 567 if (policy->cur == policy->min) 568 return; 569 570 /* 571 * The optimal frequency is the frequency that is the lowest that 572 * can support the current CPU usage without triggering the up 573 * policy. To be safe, we focus 10 points under the threshold. 574 */ 575 if (max_load_freq < 576 (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * 577 policy->cur) { 578 unsigned int freq_next; 579 freq_next = max_load_freq / 580 (dbs_tuners_ins.up_threshold - 581 dbs_tuners_ins.down_differential); 582 583 if (freq_next < policy->min) 584 freq_next = policy->min; 585 586 if (!dbs_tuners_ins.powersave_bias) { 587 __cpufreq_driver_target(policy, freq_next, 588 CPUFREQ_RELATION_L); 589 } else { 590 int freq = powersave_bias_target(policy, freq_next, 591 CPUFREQ_RELATION_L); 592 __cpufreq_driver_target(policy, freq, 593 CPUFREQ_RELATION_L); 594 } 595 } 596} 597 598static void do_dbs_timer(struct work_struct *work) 599{ 600 struct cpu_dbs_info_s *dbs_info = 601 container_of(work, struct cpu_dbs_info_s, work.work); 602 unsigned int cpu = dbs_info->cpu; 603 int sample_type = dbs_info->sample_type; 604 605 /* We want all CPUs to do sampling nearly on same jiffy */ 606 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 607 608 delay -= jiffies % delay; 609 mutex_lock(&dbs_info->timer_mutex); 610 611 /* Common NORMAL_SAMPLE setup */ 612 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 613 if (!dbs_tuners_ins.powersave_bias || 614 sample_type == DBS_NORMAL_SAMPLE) { 615 dbs_check_cpu(dbs_info); 616 if (dbs_info->freq_lo) { 617 /* Setup timer for SUB_SAMPLE */ 618 dbs_info->sample_type = DBS_SUB_SAMPLE; 619 delay = dbs_info->freq_hi_jiffies; 620 } 621 } else { 622 __cpufreq_driver_target(dbs_info->cur_policy, 623 dbs_info->freq_lo, CPUFREQ_RELATION_H); 624 } 625 queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); 626 mutex_unlock(&dbs_info->timer_mutex); 627} 628 629static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) 630{ 631 /* We want all CPUs to do sampling nearly on same jiffy */ 632 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 633 delay -= jiffies % delay; 634 635 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 636 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); 637 queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work, 638 delay); 639} 640 641static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 642{ 643 cancel_delayed_work_sync(&dbs_info->work); 644} 645 646static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 647 unsigned int event) 648{ 649 unsigned int cpu = policy->cpu; 650 struct cpu_dbs_info_s *this_dbs_info; 651 unsigned int j; 652 int rc; 653 654 this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 655 656 switch (event) { 657 case CPUFREQ_GOV_START: 658 if ((!cpu_online(cpu)) || (!policy->cur)) 659 return -EINVAL; 660 661 mutex_lock(&dbs_mutex); 662 663 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group_old); 664 if (rc) { 665 mutex_unlock(&dbs_mutex); 666 return rc; 667 } 668 669 dbs_enable++; 670 for_each_cpu(j, policy->cpus) { 671 struct cpu_dbs_info_s *j_dbs_info; 672 j_dbs_info = &per_cpu(od_cpu_dbs_info, j); 673 j_dbs_info->cur_policy = policy; 674 675 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 676 &j_dbs_info->prev_cpu_wall); 677 if (dbs_tuners_ins.ignore_nice) { 678 j_dbs_info->prev_cpu_nice = 679 kstat_cpu(j).cpustat.nice; 680 } 681 } 682 this_dbs_info->cpu = cpu; 683 ondemand_powersave_bias_init_cpu(cpu); 684 /* 685 * Start the timerschedule work, when this governor 686 * is used for first time 687 */ 688 if (dbs_enable == 1) { 689 unsigned int latency; 690 691 rc = sysfs_create_group(cpufreq_global_kobject, 692 &dbs_attr_group); 693 if (rc) { 694 mutex_unlock(&dbs_mutex); 695 return rc; 696 } 697 698 /* policy latency is in nS. Convert it to uS first */ 699 latency = policy->cpuinfo.transition_latency / 1000; 700 if (latency == 0) 701 latency = 1; 702 /* Bring kernel and HW constraints together */ 703 min_sampling_rate = max(min_sampling_rate, 704 MIN_LATENCY_MULTIPLIER * latency); 705 dbs_tuners_ins.sampling_rate = 706 max(min_sampling_rate, 707 latency * LATENCY_MULTIPLIER); 708 } 709 mutex_unlock(&dbs_mutex); 710 711 mutex_init(&this_dbs_info->timer_mutex); 712 dbs_timer_init(this_dbs_info); 713 break; 714 715 case CPUFREQ_GOV_STOP: 716 dbs_timer_exit(this_dbs_info); 717 718 mutex_lock(&dbs_mutex); 719 sysfs_remove_group(&policy->kobj, &dbs_attr_group_old); 720 mutex_destroy(&this_dbs_info->timer_mutex); 721 dbs_enable--; 722 mutex_unlock(&dbs_mutex); 723 if (!dbs_enable) 724 sysfs_remove_group(cpufreq_global_kobject, 725 &dbs_attr_group); 726 727 break; 728 729 case CPUFREQ_GOV_LIMITS: 730 mutex_lock(&this_dbs_info->timer_mutex); 731 if (policy->max < this_dbs_info->cur_policy->cur) 732 __cpufreq_driver_target(this_dbs_info->cur_policy, 733 policy->max, CPUFREQ_RELATION_H); 734 else if (policy->min > this_dbs_info->cur_policy->cur) 735 __cpufreq_driver_target(this_dbs_info->cur_policy, 736 policy->min, CPUFREQ_RELATION_L); 737 mutex_unlock(&this_dbs_info->timer_mutex); 738 break; 739 } 740 return 0; 741} 742 743static int __init cpufreq_gov_dbs_init(void) 744{ 745 int err; 746 cputime64_t wall; 747 u64 idle_time; 748 int cpu = get_cpu(); 749 750 idle_time = get_cpu_idle_time_us(cpu, &wall); 751 put_cpu(); 752 if (idle_time != -1ULL) { 753 /* Idle micro accounting is supported. Use finer thresholds */ 754 dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; 755 dbs_tuners_ins.down_differential = 756 MICRO_FREQUENCY_DOWN_DIFFERENTIAL; 757 /* 758 * In no_hz/micro accounting case we set the minimum frequency 759 * not depending on HZ, but fixed (very low). The deferred 760 * timer might skip some samples if idle/sleeping as needed. 761 */ 762 min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; 763 } else { 764 /* For correct statistics, we need 10 ticks for each measure */ 765 min_sampling_rate = 766 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); 767 } 768 769 kondemand_wq = create_workqueue("kondemand"); 770 if (!kondemand_wq) { 771 printk(KERN_ERR "Creation of kondemand failed\n"); 772 return -EFAULT; 773 } 774 err = cpufreq_register_governor(&cpufreq_gov_ondemand); 775 if (err) 776 destroy_workqueue(kondemand_wq); 777 778 return err; 779} 780 781static void __exit cpufreq_gov_dbs_exit(void) 782{ 783 cpufreq_unregister_governor(&cpufreq_gov_ondemand); 784 destroy_workqueue(kondemand_wq); 785} 786 787 788MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); 789MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>"); 790MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " 791 "Low Latency Frequency Transition capable processors"); 792MODULE_LICENSE("GPL"); 793 794#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 795fs_initcall(cpufreq_gov_dbs_init); 796#else 797module_init(cpufreq_gov_dbs_init); 798#endif 799module_exit(cpufreq_gov_dbs_exit); 800