cpufreq_conservative.c revision 2c906b317b2d9c7e32b0d513e102bd68a2c49112
1/* 2 * drivers/cpufreq/cpufreq_conservative.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * (C) 2004 Alexander Clouter <alex-kernel@digriz.org.uk> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 */ 13 14#include <linux/kernel.h> 15#include <linux/module.h> 16#include <linux/smp.h> 17#include <linux/init.h> 18#include <linux/interrupt.h> 19#include <linux/ctype.h> 20#include <linux/cpufreq.h> 21#include <linux/sysctl.h> 22#include <linux/types.h> 23#include <linux/fs.h> 24#include <linux/sysfs.h> 25#include <linux/sched.h> 26#include <linux/kmod.h> 27#include <linux/workqueue.h> 28#include <linux/jiffies.h> 29#include <linux/kernel_stat.h> 30#include <linux/percpu.h> 31#include <linux/mutex.h> 32/* 33 * dbs is used in this file as a shortform for demandbased switching 34 * It helps to keep variable names smaller, simpler 35 */ 36 37#define DEF_FREQUENCY_UP_THRESHOLD (80) 38#define DEF_FREQUENCY_DOWN_THRESHOLD (20) 39 40/* 41 * The polling frequency of this governor depends on the capability of 42 * the processor. Default polling frequency is 1000 times the transition 43 * latency of the processor. The governor will work on any processor with 44 * transition latency <= 10mS, using appropriate sampling 45 * rate. 46 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) 47 * this governor will not work. 48 * All times here are in uS. 49 */ 50static unsigned int def_sampling_rate; 51#define MIN_SAMPLING_RATE_RATIO (2) 52/* for correct statistics, we need at least 10 ticks between each measure */ 53#define MIN_STAT_SAMPLING_RATE (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) 54#define MIN_SAMPLING_RATE (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) 55#define MAX_SAMPLING_RATE (500 * def_sampling_rate) 56#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) 57#define DEF_SAMPLING_DOWN_FACTOR (1) 58#define MAX_SAMPLING_DOWN_FACTOR (10) 59#define TRANSITION_LATENCY_LIMIT (10 * 1000) 60 61static void do_dbs_timer(void *data); 62 63struct cpu_dbs_info_s { 64 struct cpufreq_policy *cur_policy; 65 unsigned int prev_cpu_idle_up; 66 unsigned int prev_cpu_idle_down; 67 unsigned int enable; 68}; 69static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); 70 71static unsigned int dbs_enable; /* number of CPUs using this policy */ 72 73static DEFINE_MUTEX (dbs_mutex); 74static DECLARE_WORK (dbs_work, do_dbs_timer, NULL); 75 76struct dbs_tuners { 77 unsigned int sampling_rate; 78 unsigned int sampling_down_factor; 79 unsigned int up_threshold; 80 unsigned int down_threshold; 81 unsigned int ignore_nice; 82 unsigned int freq_step; 83}; 84 85static struct dbs_tuners dbs_tuners_ins = { 86 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 87 .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, 88 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 89}; 90 91static inline unsigned int get_cpu_idle_time(unsigned int cpu) 92{ 93 return kstat_cpu(cpu).cpustat.idle + 94 kstat_cpu(cpu).cpustat.iowait + 95 ( dbs_tuners_ins.ignore_nice ? 96 kstat_cpu(cpu).cpustat.nice : 97 0); 98} 99 100/************************** sysfs interface ************************/ 101static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) 102{ 103 return sprintf (buf, "%u\n", MAX_SAMPLING_RATE); 104} 105 106static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) 107{ 108 return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); 109} 110 111#define define_one_ro(_name) \ 112static struct freq_attr _name = \ 113__ATTR(_name, 0444, show_##_name, NULL) 114 115define_one_ro(sampling_rate_max); 116define_one_ro(sampling_rate_min); 117 118/* cpufreq_conservative Governor Tunables */ 119#define show_one(file_name, object) \ 120static ssize_t show_##file_name \ 121(struct cpufreq_policy *unused, char *buf) \ 122{ \ 123 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 124} 125show_one(sampling_rate, sampling_rate); 126show_one(sampling_down_factor, sampling_down_factor); 127show_one(up_threshold, up_threshold); 128show_one(down_threshold, down_threshold); 129show_one(ignore_nice_load, ignore_nice); 130show_one(freq_step, freq_step); 131 132static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, 133 const char *buf, size_t count) 134{ 135 unsigned int input; 136 int ret; 137 ret = sscanf (buf, "%u", &input); 138 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 139 return -EINVAL; 140 141 mutex_lock(&dbs_mutex); 142 dbs_tuners_ins.sampling_down_factor = input; 143 mutex_unlock(&dbs_mutex); 144 145 return count; 146} 147 148static ssize_t store_sampling_rate(struct cpufreq_policy *unused, 149 const char *buf, size_t count) 150{ 151 unsigned int input; 152 int ret; 153 ret = sscanf (buf, "%u", &input); 154 155 mutex_lock(&dbs_mutex); 156 if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) { 157 mutex_unlock(&dbs_mutex); 158 return -EINVAL; 159 } 160 161 dbs_tuners_ins.sampling_rate = input; 162 mutex_unlock(&dbs_mutex); 163 164 return count; 165} 166 167static ssize_t store_up_threshold(struct cpufreq_policy *unused, 168 const char *buf, size_t count) 169{ 170 unsigned int input; 171 int ret; 172 ret = sscanf (buf, "%u", &input); 173 174 mutex_lock(&dbs_mutex); 175 if (ret != 1 || input > 100 || input < 0 || 176 input <= dbs_tuners_ins.down_threshold) { 177 mutex_unlock(&dbs_mutex); 178 return -EINVAL; 179 } 180 181 dbs_tuners_ins.up_threshold = input; 182 mutex_unlock(&dbs_mutex); 183 184 return count; 185} 186 187static ssize_t store_down_threshold(struct cpufreq_policy *unused, 188 const char *buf, size_t count) 189{ 190 unsigned int input; 191 int ret; 192 ret = sscanf (buf, "%u", &input); 193 194 mutex_lock(&dbs_mutex); 195 if (ret != 1 || input > 100 || input < 0 || 196 input >= dbs_tuners_ins.up_threshold) { 197 mutex_unlock(&dbs_mutex); 198 return -EINVAL; 199 } 200 201 dbs_tuners_ins.down_threshold = input; 202 mutex_unlock(&dbs_mutex); 203 204 return count; 205} 206 207static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, 208 const char *buf, size_t count) 209{ 210 unsigned int input; 211 int ret; 212 213 unsigned int j; 214 215 ret = sscanf (buf, "%u", &input); 216 if ( ret != 1 ) 217 return -EINVAL; 218 219 if ( input > 1 ) 220 input = 1; 221 222 mutex_lock(&dbs_mutex); 223 if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */ 224 mutex_unlock(&dbs_mutex); 225 return count; 226 } 227 dbs_tuners_ins.ignore_nice = input; 228 229 /* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */ 230 for_each_online_cpu(j) { 231 struct cpu_dbs_info_s *j_dbs_info; 232 j_dbs_info = &per_cpu(cpu_dbs_info, j); 233 j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); 234 j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; 235 } 236 mutex_unlock(&dbs_mutex); 237 238 return count; 239} 240 241static ssize_t store_freq_step(struct cpufreq_policy *policy, 242 const char *buf, size_t count) 243{ 244 unsigned int input; 245 int ret; 246 247 ret = sscanf (buf, "%u", &input); 248 249 if ( ret != 1 ) 250 return -EINVAL; 251 252 if ( input > 100 ) 253 input = 100; 254 255 /* no need to test here if freq_step is zero as the user might actually 256 * want this, they would be crazy though :) */ 257 mutex_lock(&dbs_mutex); 258 dbs_tuners_ins.freq_step = input; 259 mutex_unlock(&dbs_mutex); 260 261 return count; 262} 263 264#define define_one_rw(_name) \ 265static struct freq_attr _name = \ 266__ATTR(_name, 0644, show_##_name, store_##_name) 267 268define_one_rw(sampling_rate); 269define_one_rw(sampling_down_factor); 270define_one_rw(up_threshold); 271define_one_rw(down_threshold); 272define_one_rw(ignore_nice_load); 273define_one_rw(freq_step); 274 275static struct attribute * dbs_attributes[] = { 276 &sampling_rate_max.attr, 277 &sampling_rate_min.attr, 278 &sampling_rate.attr, 279 &sampling_down_factor.attr, 280 &up_threshold.attr, 281 &down_threshold.attr, 282 &ignore_nice_load.attr, 283 &freq_step.attr, 284 NULL 285}; 286 287static struct attribute_group dbs_attr_group = { 288 .attrs = dbs_attributes, 289 .name = "conservative", 290}; 291 292/************************** sysfs end ************************/ 293 294static void dbs_check_cpu(int cpu) 295{ 296 unsigned int idle_ticks, up_idle_ticks, down_idle_ticks; 297 unsigned int freq_step; 298 unsigned int freq_down_sampling_rate; 299 static int down_skip[NR_CPUS]; 300 static int requested_freq[NR_CPUS]; 301 static unsigned short init_flag = 0; 302 struct cpu_dbs_info_s *this_dbs_info; 303 struct cpu_dbs_info_s *dbs_info; 304 305 struct cpufreq_policy *policy; 306 unsigned int j; 307 308 this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 309 if (!this_dbs_info->enable) 310 return; 311 312 policy = this_dbs_info->cur_policy; 313 314 if ( init_flag == 0 ) { 315 for_each_online_cpu(j) { 316 dbs_info = &per_cpu(cpu_dbs_info, j); 317 requested_freq[j] = dbs_info->cur_policy->cur; 318 } 319 init_flag = 1; 320 } 321 322 /* 323 * The default safe range is 20% to 80% 324 * Every sampling_rate, we check 325 * - If current idle time is less than 20%, then we try to 326 * increase frequency 327 * Every sampling_rate*sampling_down_factor, we check 328 * - If current idle time is more than 80%, then we try to 329 * decrease frequency 330 * 331 * Any frequency increase takes it to the maximum frequency. 332 * Frequency reduction happens at minimum steps of 333 * 5% (default) of max_frequency 334 */ 335 336 /* Check for frequency increase */ 337 idle_ticks = UINT_MAX; 338 for_each_cpu_mask(j, policy->cpus) { 339 unsigned int tmp_idle_ticks, total_idle_ticks; 340 struct cpu_dbs_info_s *j_dbs_info; 341 342 j_dbs_info = &per_cpu(cpu_dbs_info, j); 343 /* Check for frequency increase */ 344 total_idle_ticks = get_cpu_idle_time(j); 345 tmp_idle_ticks = total_idle_ticks - 346 j_dbs_info->prev_cpu_idle_up; 347 j_dbs_info->prev_cpu_idle_up = total_idle_ticks; 348 349 if (tmp_idle_ticks < idle_ticks) 350 idle_ticks = tmp_idle_ticks; 351 } 352 353 /* Scale idle ticks by 100 and compare with up and down ticks */ 354 idle_ticks *= 100; 355 up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) * 356 usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 357 358 if (idle_ticks < up_idle_ticks) { 359 down_skip[cpu] = 0; 360 for_each_cpu_mask(j, policy->cpus) { 361 struct cpu_dbs_info_s *j_dbs_info; 362 363 j_dbs_info = &per_cpu(cpu_dbs_info, j); 364 j_dbs_info->prev_cpu_idle_down = 365 j_dbs_info->prev_cpu_idle_up; 366 } 367 /* if we are already at full speed then break out early */ 368 if (requested_freq[cpu] == policy->max) 369 return; 370 371 freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100; 372 373 /* max freq cannot be less than 100. But who knows.... */ 374 if (unlikely(freq_step == 0)) 375 freq_step = 5; 376 377 requested_freq[cpu] += freq_step; 378 if (requested_freq[cpu] > policy->max) 379 requested_freq[cpu] = policy->max; 380 381 __cpufreq_driver_target(policy, requested_freq[cpu], 382 CPUFREQ_RELATION_H); 383 return; 384 } 385 386 /* Check for frequency decrease */ 387 down_skip[cpu]++; 388 if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor) 389 return; 390 391 idle_ticks = UINT_MAX; 392 for_each_cpu_mask(j, policy->cpus) { 393 unsigned int tmp_idle_ticks, total_idle_ticks; 394 struct cpu_dbs_info_s *j_dbs_info; 395 396 j_dbs_info = &per_cpu(cpu_dbs_info, j); 397 /* Check for frequency decrease */ 398 total_idle_ticks = j_dbs_info->prev_cpu_idle_up; 399 tmp_idle_ticks = total_idle_ticks - 400 j_dbs_info->prev_cpu_idle_down; 401 j_dbs_info->prev_cpu_idle_down = total_idle_ticks; 402 403 if (tmp_idle_ticks < idle_ticks) 404 idle_ticks = tmp_idle_ticks; 405 } 406 407 /* Scale idle ticks by 100 and compare with up and down ticks */ 408 idle_ticks *= 100; 409 down_skip[cpu] = 0; 410 411 freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * 412 dbs_tuners_ins.sampling_down_factor; 413 down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) * 414 usecs_to_jiffies(freq_down_sampling_rate); 415 416 if (idle_ticks > down_idle_ticks) { 417 /* 418 * if we are already at the lowest speed then break out early 419 * or if we 'cannot' reduce the speed as the user might want 420 * freq_step to be zero 421 */ 422 if (requested_freq[cpu] == policy->min 423 || dbs_tuners_ins.freq_step == 0) 424 return; 425 426 freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100; 427 428 /* max freq cannot be less than 100. But who knows.... */ 429 if (unlikely(freq_step == 0)) 430 freq_step = 5; 431 432 requested_freq[cpu] -= freq_step; 433 if (requested_freq[cpu] < policy->min) 434 requested_freq[cpu] = policy->min; 435 436 __cpufreq_driver_target(policy, requested_freq[cpu], 437 CPUFREQ_RELATION_H); 438 return; 439 } 440} 441 442static void do_dbs_timer(void *data) 443{ 444 int i; 445 mutex_lock(&dbs_mutex); 446 for_each_online_cpu(i) 447 dbs_check_cpu(i); 448 schedule_delayed_work(&dbs_work, 449 usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); 450 mutex_unlock(&dbs_mutex); 451} 452 453static inline void dbs_timer_init(void) 454{ 455 INIT_WORK(&dbs_work, do_dbs_timer, NULL); 456 schedule_delayed_work(&dbs_work, 457 usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); 458 return; 459} 460 461static inline void dbs_timer_exit(void) 462{ 463 cancel_delayed_work(&dbs_work); 464 return; 465} 466 467static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 468 unsigned int event) 469{ 470 unsigned int cpu = policy->cpu; 471 struct cpu_dbs_info_s *this_dbs_info; 472 unsigned int j; 473 474 this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 475 476 switch (event) { 477 case CPUFREQ_GOV_START: 478 if ((!cpu_online(cpu)) || 479 (!policy->cur)) 480 return -EINVAL; 481 482 if (policy->cpuinfo.transition_latency > 483 (TRANSITION_LATENCY_LIMIT * 1000)) 484 return -EINVAL; 485 if (this_dbs_info->enable) /* Already enabled */ 486 break; 487 488 mutex_lock(&dbs_mutex); 489 for_each_cpu_mask(j, policy->cpus) { 490 struct cpu_dbs_info_s *j_dbs_info; 491 j_dbs_info = &per_cpu(cpu_dbs_info, j); 492 j_dbs_info->cur_policy = policy; 493 494 j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); 495 j_dbs_info->prev_cpu_idle_down 496 = j_dbs_info->prev_cpu_idle_up; 497 } 498 this_dbs_info->enable = 1; 499 sysfs_create_group(&policy->kobj, &dbs_attr_group); 500 dbs_enable++; 501 /* 502 * Start the timerschedule work, when this governor 503 * is used for first time 504 */ 505 if (dbs_enable == 1) { 506 unsigned int latency; 507 /* policy latency is in nS. Convert it to uS first */ 508 latency = policy->cpuinfo.transition_latency / 1000; 509 if (latency == 0) 510 latency = 1; 511 512 def_sampling_rate = latency * 513 DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; 514 515 if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) 516 def_sampling_rate = MIN_STAT_SAMPLING_RATE; 517 518 dbs_tuners_ins.sampling_rate = def_sampling_rate; 519 dbs_tuners_ins.ignore_nice = 0; 520 dbs_tuners_ins.freq_step = 5; 521 522 dbs_timer_init(); 523 } 524 525 mutex_unlock(&dbs_mutex); 526 break; 527 528 case CPUFREQ_GOV_STOP: 529 mutex_lock(&dbs_mutex); 530 this_dbs_info->enable = 0; 531 sysfs_remove_group(&policy->kobj, &dbs_attr_group); 532 dbs_enable--; 533 /* 534 * Stop the timerschedule work, when this governor 535 * is used for first time 536 */ 537 if (dbs_enable == 0) 538 dbs_timer_exit(); 539 540 mutex_unlock(&dbs_mutex); 541 542 break; 543 544 case CPUFREQ_GOV_LIMITS: 545 mutex_lock(&dbs_mutex); 546 if (policy->max < this_dbs_info->cur_policy->cur) 547 __cpufreq_driver_target( 548 this_dbs_info->cur_policy, 549 policy->max, CPUFREQ_RELATION_H); 550 else if (policy->min > this_dbs_info->cur_policy->cur) 551 __cpufreq_driver_target( 552 this_dbs_info->cur_policy, 553 policy->min, CPUFREQ_RELATION_L); 554 mutex_unlock(&dbs_mutex); 555 break; 556 } 557 return 0; 558} 559 560static struct cpufreq_governor cpufreq_gov_dbs = { 561 .name = "conservative", 562 .governor = cpufreq_governor_dbs, 563 .owner = THIS_MODULE, 564}; 565 566static int __init cpufreq_gov_dbs_init(void) 567{ 568 return cpufreq_register_governor(&cpufreq_gov_dbs); 569} 570 571static void __exit cpufreq_gov_dbs_exit(void) 572{ 573 /* Make sure that the scheduled work is indeed not running */ 574 flush_scheduled_work(); 575 576 cpufreq_unregister_governor(&cpufreq_gov_dbs); 577} 578 579 580MODULE_AUTHOR ("Alexander Clouter <alex-kernel@digriz.org.uk>"); 581MODULE_DESCRIPTION ("'cpufreq_conservative' - A dynamic cpufreq governor for " 582 "Low Latency Frequency Transition capable processors " 583 "optimised for use in a battery environment"); 584MODULE_LICENSE ("GPL"); 585 586module_init(cpufreq_gov_dbs_init); 587module_exit(cpufreq_gov_dbs_exit); 588