1/*
2 *  linux/drivers/cpufreq/cpufreq.c
3 *
4 *  Copyright (C) 2001 Russell King
5 *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6 *
7 *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8 *	Added handling for CPU hotplug
9 *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10 *	Fix handling for CPU hotplug -- affected CPUs
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
15 *
16 */
17
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/init.h>
21#include <linux/notifier.h>
22#include <linux/cpufreq.h>
23#include <linux/delay.h>
24#include <linux/interrupt.h>
25#include <linux/spinlock.h>
26#include <linux/device.h>
27#include <linux/slab.h>
28#include <linux/cpu.h>
29#include <linux/completion.h>
30#include <linux/mutex.h>
31#include <linux/syscore_ops.h>
32
33#include <trace/events/power.h>
34
35/**
36 * The "cpufreq driver" - the arch- or hardware-dependent low
37 * level driver of CPUFreq support, and its spinlock. This lock
38 * also protects the cpufreq_cpu_data array.
39 */
40static struct cpufreq_driver *cpufreq_driver;
41static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42#ifdef CONFIG_HOTPLUG_CPU
43/* This one keeps track of the previously set governor of a removed CPU */
44static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
45#endif
46static DEFINE_SPINLOCK(cpufreq_driver_lock);
47
48/*
49 * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50 * all cpufreq/hotplug/workqueue/etc related lock issues.
51 *
52 * The rules for this semaphore:
53 * - Any routine that wants to read from the policy structure will
54 *   do a down_read on this semaphore.
55 * - Any routine that will write to the policy structure and/or may take away
56 *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57 *   mode before doing so.
58 *
59 * Additional rules:
60 * - All holders of the lock should check to make sure that the CPU they
61 *   are concerned with are online after they get the lock.
62 * - Governor routines that can be called in cpufreq hotplug path should not
63 *   take this sem as top level hotplug notifier handler takes this.
64 * - Lock should not be held across
65 *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
66 */
67static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
68static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
69
70#define lock_policy_rwsem(mode, cpu)					\
71static int lock_policy_rwsem_##mode					\
72(int cpu)								\
73{									\
74	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);		\
75	BUG_ON(policy_cpu == -1);					\
76	down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
77	if (unlikely(!cpu_online(cpu))) {				\
78		up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));	\
79		return -1;						\
80	}								\
81									\
82	return 0;							\
83}
84
85lock_policy_rwsem(read, cpu);
86
87lock_policy_rwsem(write, cpu);
88
89static void unlock_policy_rwsem_read(int cpu)
90{
91	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
92	BUG_ON(policy_cpu == -1);
93	up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94}
95
96static void unlock_policy_rwsem_write(int cpu)
97{
98	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
99	BUG_ON(policy_cpu == -1);
100	up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
101}
102
103
104/* internal prototypes */
105static int __cpufreq_governor(struct cpufreq_policy *policy,
106		unsigned int event);
107static unsigned int __cpufreq_get(unsigned int cpu);
108static void handle_update(struct work_struct *work);
109
110/**
111 * Two notifier lists: the "policy" list is involved in the
112 * validation process for a new CPU frequency policy; the
113 * "transition" list for kernel code that needs to handle
114 * changes to devices when the CPU clock speed changes.
115 * The mutex locks both lists.
116 */
117static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
118static struct srcu_notifier_head cpufreq_transition_notifier_list;
119
120static bool init_cpufreq_transition_notifier_list_called;
121static int __init init_cpufreq_transition_notifier_list(void)
122{
123	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
124	init_cpufreq_transition_notifier_list_called = true;
125	return 0;
126}
127pure_initcall(init_cpufreq_transition_notifier_list);
128
129static LIST_HEAD(cpufreq_governor_list);
130static DEFINE_MUTEX(cpufreq_governor_mutex);
131
132struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
133{
134	struct cpufreq_policy *data;
135	unsigned long flags;
136
137	if (cpu >= nr_cpu_ids)
138		goto err_out;
139
140	/* get the cpufreq driver */
141	spin_lock_irqsave(&cpufreq_driver_lock, flags);
142
143	if (!cpufreq_driver)
144		goto err_out_unlock;
145
146	if (!try_module_get(cpufreq_driver->owner))
147		goto err_out_unlock;
148
149
150	/* get the CPU */
151	data = per_cpu(cpufreq_cpu_data, cpu);
152
153	if (!data)
154		goto err_out_put_module;
155
156	if (!kobject_get(&data->kobj))
157		goto err_out_put_module;
158
159	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
160	return data;
161
162err_out_put_module:
163	module_put(cpufreq_driver->owner);
164err_out_unlock:
165	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
166err_out:
167	return NULL;
168}
169EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
170
171
172void cpufreq_cpu_put(struct cpufreq_policy *data)
173{
174	kobject_put(&data->kobj);
175	module_put(cpufreq_driver->owner);
176}
177EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
178
179
180/*********************************************************************
181 *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
182 *********************************************************************/
183
184/**
185 * adjust_jiffies - adjust the system "loops_per_jiffy"
186 *
187 * This function alters the system "loops_per_jiffy" for the clock
188 * speed change. Note that loops_per_jiffy cannot be updated on SMP
189 * systems as each CPU might be scaled differently. So, use the arch
190 * per-CPU loops_per_jiffy value wherever possible.
191 */
192#ifndef CONFIG_SMP
193static unsigned long l_p_j_ref;
194static unsigned int  l_p_j_ref_freq;
195
196static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
197{
198	if (ci->flags & CPUFREQ_CONST_LOOPS)
199		return;
200
201	if (!l_p_j_ref_freq) {
202		l_p_j_ref = loops_per_jiffy;
203		l_p_j_ref_freq = ci->old;
204		pr_debug("saving %lu as reference value for loops_per_jiffy; "
205			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
206	}
207	if ((val == CPUFREQ_POSTCHANGE  && ci->old != ci->new) ||
208	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
209		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
210								ci->new);
211		pr_debug("scaling loops_per_jiffy to %lu "
212			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
213	}
214}
215#else
216static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
217{
218	return;
219}
220#endif
221
222
223/**
224 * cpufreq_notify_transition - call notifier chain and adjust_jiffies
225 * on frequency transition.
226 *
227 * This function calls the transition notifiers and the "adjust_jiffies"
228 * function. It is called twice on all CPU frequency changes that have
229 * external effects.
230 */
231void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
232{
233	struct cpufreq_policy *policy;
234
235	BUG_ON(irqs_disabled());
236
237	freqs->flags = cpufreq_driver->flags;
238	pr_debug("notification %u of frequency transition to %u kHz\n",
239		state, freqs->new);
240
241	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
242	switch (state) {
243
244	case CPUFREQ_PRECHANGE:
245		/* detect if the driver reported a value as "old frequency"
246		 * which is not equal to what the cpufreq core thinks is
247		 * "old frequency".
248		 */
249		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
250			if ((policy) && (policy->cpu == freqs->cpu) &&
251			    (policy->cur) && (policy->cur != freqs->old)) {
252				pr_debug("Warning: CPU frequency is"
253					" %u, cpufreq assumed %u kHz.\n",
254					freqs->old, policy->cur);
255				freqs->old = policy->cur;
256			}
257		}
258		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
259				CPUFREQ_PRECHANGE, freqs);
260		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
261		break;
262
263	case CPUFREQ_POSTCHANGE:
264		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
265		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
266			(unsigned long)freqs->cpu);
267		trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
268		trace_cpu_frequency(freqs->new, freqs->cpu);
269		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
270				CPUFREQ_POSTCHANGE, freqs);
271		if (likely(policy) && likely(policy->cpu == freqs->cpu))
272			policy->cur = freqs->new;
273		break;
274	}
275}
276EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
277
278
279
280/*********************************************************************
281 *                          SYSFS INTERFACE                          *
282 *********************************************************************/
283
284static struct cpufreq_governor *__find_governor(const char *str_governor)
285{
286	struct cpufreq_governor *t;
287
288	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
289		if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
290			return t;
291
292	return NULL;
293}
294
295/**
296 * cpufreq_parse_governor - parse a governor string
297 */
298static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
299				struct cpufreq_governor **governor)
300{
301	int err = -EINVAL;
302
303	if (!cpufreq_driver)
304		goto out;
305
306	if (cpufreq_driver->setpolicy) {
307		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
308			*policy = CPUFREQ_POLICY_PERFORMANCE;
309			err = 0;
310		} else if (!strnicmp(str_governor, "powersave",
311						CPUFREQ_NAME_LEN)) {
312			*policy = CPUFREQ_POLICY_POWERSAVE;
313			err = 0;
314		}
315	} else if (cpufreq_driver->target) {
316		struct cpufreq_governor *t;
317
318		mutex_lock(&cpufreq_governor_mutex);
319
320		t = __find_governor(str_governor);
321
322		if (t == NULL) {
323			int ret;
324
325			mutex_unlock(&cpufreq_governor_mutex);
326			ret = request_module("cpufreq_%s", str_governor);
327			mutex_lock(&cpufreq_governor_mutex);
328
329			if (ret == 0)
330				t = __find_governor(str_governor);
331		}
332
333		if (t != NULL) {
334			*governor = t;
335			err = 0;
336		}
337
338		mutex_unlock(&cpufreq_governor_mutex);
339	}
340out:
341	return err;
342}
343
344
345/**
346 * cpufreq_per_cpu_attr_read() / show_##file_name() -
347 * print out cpufreq information
348 *
349 * Write out information from cpufreq_driver->policy[cpu]; object must be
350 * "unsigned int".
351 */
352
353#define show_one(file_name, object)			\
354static ssize_t show_##file_name				\
355(struct cpufreq_policy *policy, char *buf)		\
356{							\
357	return sprintf(buf, "%u\n", policy->object);	\
358}
359
360show_one(cpuinfo_min_freq, cpuinfo.min_freq);
361show_one(cpuinfo_max_freq, cpuinfo.max_freq);
362show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
363show_one(scaling_min_freq, min);
364show_one(scaling_max_freq, max);
365show_one(scaling_cur_freq, cur);
366
367static int __cpufreq_set_policy(struct cpufreq_policy *data,
368				struct cpufreq_policy *policy);
369
370/**
371 * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
372 */
373#define store_one(file_name, object)			\
374static ssize_t store_##file_name					\
375(struct cpufreq_policy *policy, const char *buf, size_t count)		\
376{									\
377	unsigned int ret = -EINVAL;					\
378	struct cpufreq_policy new_policy;				\
379									\
380	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
381	if (ret)							\
382		return -EINVAL;						\
383									\
384	ret = sscanf(buf, "%u", &new_policy.object);			\
385	if (ret != 1)							\
386		return -EINVAL;						\
387									\
388	ret = __cpufreq_set_policy(policy, &new_policy);		\
389	policy->user_policy.object = policy->object;			\
390									\
391	return ret ? ret : count;					\
392}
393
394store_one(scaling_min_freq, min);
395store_one(scaling_max_freq, max);
396
397/**
398 * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
399 */
400static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
401					char *buf)
402{
403	unsigned int cur_freq = __cpufreq_get(policy->cpu);
404	if (!cur_freq)
405		return sprintf(buf, "<unknown>");
406	return sprintf(buf, "%u\n", cur_freq);
407}
408
409
410/**
411 * show_scaling_governor - show the current policy for the specified CPU
412 */
413static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
414{
415	if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
416		return sprintf(buf, "powersave\n");
417	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
418		return sprintf(buf, "performance\n");
419	else if (policy->governor)
420		return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
421				policy->governor->name);
422	return -EINVAL;
423}
424
425
426/**
427 * store_scaling_governor - store policy for the specified CPU
428 */
429static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
430					const char *buf, size_t count)
431{
432	unsigned int ret = -EINVAL;
433	char	str_governor[16];
434	struct cpufreq_policy new_policy;
435
436	ret = cpufreq_get_policy(&new_policy, policy->cpu);
437	if (ret)
438		return ret;
439
440	ret = sscanf(buf, "%15s", str_governor);
441	if (ret != 1)
442		return -EINVAL;
443
444	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
445						&new_policy.governor))
446		return -EINVAL;
447
448	/* Do not use cpufreq_set_policy here or the user_policy.max
449	   will be wrongly overridden */
450	ret = __cpufreq_set_policy(policy, &new_policy);
451
452	policy->user_policy.policy = policy->policy;
453	policy->user_policy.governor = policy->governor;
454
455	if (ret)
456		return ret;
457	else
458		return count;
459}
460
461/**
462 * show_scaling_driver - show the cpufreq driver currently loaded
463 */
464static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
465{
466	return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
467}
468
469/**
470 * show_scaling_available_governors - show the available CPUfreq governors
471 */
472static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
473						char *buf)
474{
475	ssize_t i = 0;
476	struct cpufreq_governor *t;
477
478	if (!cpufreq_driver->target) {
479		i += sprintf(buf, "performance powersave");
480		goto out;
481	}
482
483	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
484		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
485		    - (CPUFREQ_NAME_LEN + 2)))
486			goto out;
487		i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
488	}
489out:
490	i += sprintf(&buf[i], "\n");
491	return i;
492}
493
494static ssize_t show_cpus(const struct cpumask *mask, char *buf)
495{
496	ssize_t i = 0;
497	unsigned int cpu;
498
499	for_each_cpu(cpu, mask) {
500		if (i)
501			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
502		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
503		if (i >= (PAGE_SIZE - 5))
504			break;
505	}
506	i += sprintf(&buf[i], "\n");
507	return i;
508}
509
510/**
511 * show_related_cpus - show the CPUs affected by each transition even if
512 * hw coordination is in use
513 */
514static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
515{
516	if (cpumask_empty(policy->related_cpus))
517		return show_cpus(policy->cpus, buf);
518	return show_cpus(policy->related_cpus, buf);
519}
520
521/**
522 * show_affected_cpus - show the CPUs affected by each transition
523 */
524static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
525{
526	return show_cpus(policy->cpus, buf);
527}
528
529static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
530					const char *buf, size_t count)
531{
532	unsigned int freq = 0;
533	unsigned int ret;
534
535	if (!policy->governor || !policy->governor->store_setspeed)
536		return -EINVAL;
537
538	ret = sscanf(buf, "%u", &freq);
539	if (ret != 1)
540		return -EINVAL;
541
542	policy->governor->store_setspeed(policy, freq);
543
544	return count;
545}
546
547static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
548{
549	if (!policy->governor || !policy->governor->show_setspeed)
550		return sprintf(buf, "<unsupported>\n");
551
552	return policy->governor->show_setspeed(policy, buf);
553}
554
555/**
556 * show_scaling_driver - show the current cpufreq HW/BIOS limitation
557 */
558static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
559{
560	unsigned int limit;
561	int ret;
562	if (cpufreq_driver->bios_limit) {
563		ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
564		if (!ret)
565			return sprintf(buf, "%u\n", limit);
566	}
567	return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
568}
569
570cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
571cpufreq_freq_attr_ro(cpuinfo_min_freq);
572cpufreq_freq_attr_ro(cpuinfo_max_freq);
573cpufreq_freq_attr_ro(cpuinfo_transition_latency);
574cpufreq_freq_attr_ro(scaling_available_governors);
575cpufreq_freq_attr_ro(scaling_driver);
576cpufreq_freq_attr_ro(scaling_cur_freq);
577cpufreq_freq_attr_ro(bios_limit);
578cpufreq_freq_attr_ro(related_cpus);
579cpufreq_freq_attr_ro(affected_cpus);
580cpufreq_freq_attr_rw(scaling_min_freq);
581cpufreq_freq_attr_rw(scaling_max_freq);
582cpufreq_freq_attr_rw(scaling_governor);
583cpufreq_freq_attr_rw(scaling_setspeed);
584
585static struct attribute *default_attrs[] = {
586	&cpuinfo_min_freq.attr,
587	&cpuinfo_max_freq.attr,
588	&cpuinfo_transition_latency.attr,
589	&scaling_min_freq.attr,
590	&scaling_max_freq.attr,
591	&affected_cpus.attr,
592	&related_cpus.attr,
593	&scaling_governor.attr,
594	&scaling_driver.attr,
595	&scaling_available_governors.attr,
596	&scaling_setspeed.attr,
597	NULL
598};
599
600struct kobject *cpufreq_global_kobject;
601EXPORT_SYMBOL(cpufreq_global_kobject);
602
603#define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
604#define to_attr(a) container_of(a, struct freq_attr, attr)
605
606static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
607{
608	struct cpufreq_policy *policy = to_policy(kobj);
609	struct freq_attr *fattr = to_attr(attr);
610	ssize_t ret = -EINVAL;
611	policy = cpufreq_cpu_get(policy->cpu);
612	if (!policy)
613		goto no_policy;
614
615	if (lock_policy_rwsem_read(policy->cpu) < 0)
616		goto fail;
617
618	if (fattr->show)
619		ret = fattr->show(policy, buf);
620	else
621		ret = -EIO;
622
623	unlock_policy_rwsem_read(policy->cpu);
624fail:
625	cpufreq_cpu_put(policy);
626no_policy:
627	return ret;
628}
629
630static ssize_t store(struct kobject *kobj, struct attribute *attr,
631		     const char *buf, size_t count)
632{
633	struct cpufreq_policy *policy = to_policy(kobj);
634	struct freq_attr *fattr = to_attr(attr);
635	ssize_t ret = -EINVAL;
636	policy = cpufreq_cpu_get(policy->cpu);
637	if (!policy)
638		goto no_policy;
639
640	if (lock_policy_rwsem_write(policy->cpu) < 0)
641		goto fail;
642
643	if (fattr->store)
644		ret = fattr->store(policy, buf, count);
645	else
646		ret = -EIO;
647
648	unlock_policy_rwsem_write(policy->cpu);
649fail:
650	cpufreq_cpu_put(policy);
651no_policy:
652	return ret;
653}
654
655static void cpufreq_sysfs_release(struct kobject *kobj)
656{
657	struct cpufreq_policy *policy = to_policy(kobj);
658	pr_debug("last reference is dropped\n");
659	complete(&policy->kobj_unregister);
660}
661
662static const struct sysfs_ops sysfs_ops = {
663	.show	= show,
664	.store	= store,
665};
666
667static struct kobj_type ktype_cpufreq = {
668	.sysfs_ops	= &sysfs_ops,
669	.default_attrs	= default_attrs,
670	.release	= cpufreq_sysfs_release,
671};
672
673/*
674 * Returns:
675 *   Negative: Failure
676 *   0:        Success
677 *   Positive: When we have a managed CPU and the sysfs got symlinked
678 */
679static int cpufreq_add_dev_policy(unsigned int cpu,
680				  struct cpufreq_policy *policy,
681				  struct device *dev)
682{
683	int ret = 0;
684#ifdef CONFIG_SMP
685	unsigned long flags;
686	unsigned int j;
687#ifdef CONFIG_HOTPLUG_CPU
688	struct cpufreq_governor *gov;
689
690	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
691	if (gov) {
692		policy->governor = gov;
693		pr_debug("Restoring governor %s for cpu %d\n",
694		       policy->governor->name, cpu);
695	}
696#endif
697
698	for_each_cpu(j, policy->cpus) {
699		struct cpufreq_policy *managed_policy;
700
701		if (cpu == j)
702			continue;
703
704		/* Check for existing affected CPUs.
705		 * They may not be aware of it due to CPU Hotplug.
706		 * cpufreq_cpu_put is called when the device is removed
707		 * in __cpufreq_remove_dev()
708		 */
709		managed_policy = cpufreq_cpu_get(j);
710		if (unlikely(managed_policy)) {
711
712			/* Set proper policy_cpu */
713			unlock_policy_rwsem_write(cpu);
714			per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
715
716			if (lock_policy_rwsem_write(cpu) < 0) {
717				/* Should not go through policy unlock path */
718				if (cpufreq_driver->exit)
719					cpufreq_driver->exit(policy);
720				cpufreq_cpu_put(managed_policy);
721				return -EBUSY;
722			}
723
724			spin_lock_irqsave(&cpufreq_driver_lock, flags);
725			cpumask_copy(managed_policy->cpus, policy->cpus);
726			per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
727			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
728
729			pr_debug("CPU already managed, adding link\n");
730			ret = sysfs_create_link(&dev->kobj,
731						&managed_policy->kobj,
732						"cpufreq");
733			if (ret)
734				cpufreq_cpu_put(managed_policy);
735			/*
736			 * Success. We only needed to be added to the mask.
737			 * Call driver->exit() because only the cpu parent of
738			 * the kobj needed to call init().
739			 */
740			if (cpufreq_driver->exit)
741				cpufreq_driver->exit(policy);
742
743			if (!ret)
744				return 1;
745			else
746				return ret;
747		}
748	}
749#endif
750	return ret;
751}
752
753
754/* symlink affected CPUs */
755static int cpufreq_add_dev_symlink(unsigned int cpu,
756				   struct cpufreq_policy *policy)
757{
758	unsigned int j;
759	int ret = 0;
760
761	for_each_cpu(j, policy->cpus) {
762		struct cpufreq_policy *managed_policy;
763		struct device *cpu_dev;
764
765		if (j == cpu)
766			continue;
767		if (!cpu_online(j))
768			continue;
769
770		pr_debug("CPU %u already managed, adding link\n", j);
771		managed_policy = cpufreq_cpu_get(cpu);
772		cpu_dev = get_cpu_device(j);
773		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
774					"cpufreq");
775		if (ret) {
776			cpufreq_cpu_put(managed_policy);
777			return ret;
778		}
779	}
780	return ret;
781}
782
783static int cpufreq_add_dev_interface(unsigned int cpu,
784				     struct cpufreq_policy *policy,
785				     struct device *dev)
786{
787	struct cpufreq_policy new_policy;
788	struct freq_attr **drv_attr;
789	unsigned long flags;
790	int ret = 0;
791	unsigned int j;
792
793	/* prepare interface data */
794	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
795				   &dev->kobj, "cpufreq");
796	if (ret)
797		return ret;
798
799	/* set up files for this cpu device */
800	drv_attr = cpufreq_driver->attr;
801	while ((drv_attr) && (*drv_attr)) {
802		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
803		if (ret)
804			goto err_out_kobj_put;
805		drv_attr++;
806	}
807	if (cpufreq_driver->get) {
808		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
809		if (ret)
810			goto err_out_kobj_put;
811	}
812	if (cpufreq_driver->target) {
813		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
814		if (ret)
815			goto err_out_kobj_put;
816	}
817	if (cpufreq_driver->bios_limit) {
818		ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
819		if (ret)
820			goto err_out_kobj_put;
821	}
822
823	spin_lock_irqsave(&cpufreq_driver_lock, flags);
824	for_each_cpu(j, policy->cpus) {
825		if (!cpu_online(j))
826			continue;
827		per_cpu(cpufreq_cpu_data, j) = policy;
828		per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
829	}
830	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
831
832	ret = cpufreq_add_dev_symlink(cpu, policy);
833	if (ret)
834		goto err_out_kobj_put;
835
836	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
837	/* assure that the starting sequence is run in __cpufreq_set_policy */
838	policy->governor = NULL;
839
840	/* set default policy */
841	ret = __cpufreq_set_policy(policy, &new_policy);
842	policy->user_policy.policy = policy->policy;
843	policy->user_policy.governor = policy->governor;
844
845	if (ret) {
846		pr_debug("setting policy failed\n");
847		if (cpufreq_driver->exit)
848			cpufreq_driver->exit(policy);
849	}
850	return ret;
851
852err_out_kobj_put:
853	kobject_put(&policy->kobj);
854	wait_for_completion(&policy->kobj_unregister);
855	return ret;
856}
857
858
859/**
860 * cpufreq_add_dev - add a CPU device
861 *
862 * Adds the cpufreq interface for a CPU device.
863 *
864 * The Oracle says: try running cpufreq registration/unregistration concurrently
865 * with with cpu hotplugging and all hell will break loose. Tried to clean this
866 * mess up, but more thorough testing is needed. - Mathieu
867 */
868static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
869{
870	unsigned int cpu = dev->id;
871	int ret = 0, found = 0;
872	struct cpufreq_policy *policy;
873	unsigned long flags;
874	unsigned int j;
875#ifdef CONFIG_HOTPLUG_CPU
876	int sibling;
877#endif
878
879	if (cpu_is_offline(cpu))
880		return 0;
881
882	pr_debug("adding CPU %u\n", cpu);
883
884#ifdef CONFIG_SMP
885	/* check whether a different CPU already registered this
886	 * CPU because it is in the same boat. */
887	policy = cpufreq_cpu_get(cpu);
888	if (unlikely(policy)) {
889		cpufreq_cpu_put(policy);
890		return 0;
891	}
892#endif
893
894	if (!try_module_get(cpufreq_driver->owner)) {
895		ret = -EINVAL;
896		goto module_out;
897	}
898
899	ret = -ENOMEM;
900	policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
901	if (!policy)
902		goto nomem_out;
903
904	if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
905		goto err_free_policy;
906
907	if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
908		goto err_free_cpumask;
909
910	policy->cpu = cpu;
911	cpumask_copy(policy->cpus, cpumask_of(cpu));
912
913	/* Initially set CPU itself as the policy_cpu */
914	per_cpu(cpufreq_policy_cpu, cpu) = cpu;
915	ret = (lock_policy_rwsem_write(cpu) < 0);
916	WARN_ON(ret);
917
918	init_completion(&policy->kobj_unregister);
919	INIT_WORK(&policy->update, handle_update);
920
921	/* Set governor before ->init, so that driver could check it */
922#ifdef CONFIG_HOTPLUG_CPU
923	for_each_online_cpu(sibling) {
924		struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
925		if (cp && cp->governor &&
926		    (cpumask_test_cpu(cpu, cp->related_cpus))) {
927			policy->governor = cp->governor;
928			found = 1;
929			break;
930		}
931	}
932#endif
933	if (!found)
934		policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
935	/* call driver. From then on the cpufreq must be able
936	 * to accept all calls to ->verify and ->setpolicy for this CPU
937	 */
938	ret = cpufreq_driver->init(policy);
939	if (ret) {
940		pr_debug("initialization failed\n");
941		goto err_unlock_policy;
942	}
943	policy->user_policy.min = policy->min;
944	policy->user_policy.max = policy->max;
945
946	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
947				     CPUFREQ_START, policy);
948
949	ret = cpufreq_add_dev_policy(cpu, policy, dev);
950	if (ret) {
951		if (ret > 0)
952			/* This is a managed cpu, symlink created,
953			   exit with 0 */
954			ret = 0;
955		goto err_unlock_policy;
956	}
957
958	ret = cpufreq_add_dev_interface(cpu, policy, dev);
959	if (ret)
960		goto err_out_unregister;
961
962	unlock_policy_rwsem_write(cpu);
963
964	kobject_uevent(&policy->kobj, KOBJ_ADD);
965	module_put(cpufreq_driver->owner);
966	pr_debug("initialization complete\n");
967
968	return 0;
969
970
971err_out_unregister:
972	spin_lock_irqsave(&cpufreq_driver_lock, flags);
973	for_each_cpu(j, policy->cpus)
974		per_cpu(cpufreq_cpu_data, j) = NULL;
975	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
976
977	kobject_put(&policy->kobj);
978	wait_for_completion(&policy->kobj_unregister);
979
980err_unlock_policy:
981	unlock_policy_rwsem_write(cpu);
982	free_cpumask_var(policy->related_cpus);
983err_free_cpumask:
984	free_cpumask_var(policy->cpus);
985err_free_policy:
986	kfree(policy);
987nomem_out:
988	module_put(cpufreq_driver->owner);
989module_out:
990	return ret;
991}
992
993
994/**
995 * __cpufreq_remove_dev - remove a CPU device
996 *
997 * Removes the cpufreq interface for a CPU device.
998 * Caller should already have policy_rwsem in write mode for this CPU.
999 * This routine frees the rwsem before returning.
1000 */
1001static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1002{
1003	unsigned int cpu = dev->id;
1004	unsigned long flags;
1005	struct cpufreq_policy *data;
1006	struct kobject *kobj;
1007	struct completion *cmp;
1008#ifdef CONFIG_SMP
1009	struct device *cpu_dev;
1010	unsigned int j;
1011#endif
1012
1013	pr_debug("unregistering CPU %u\n", cpu);
1014
1015	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1016	data = per_cpu(cpufreq_cpu_data, cpu);
1017
1018	if (!data) {
1019		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1020		unlock_policy_rwsem_write(cpu);
1021		return -EINVAL;
1022	}
1023	per_cpu(cpufreq_cpu_data, cpu) = NULL;
1024
1025
1026#ifdef CONFIG_SMP
1027	/* if this isn't the CPU which is the parent of the kobj, we
1028	 * only need to unlink, put and exit
1029	 */
1030	if (unlikely(cpu != data->cpu)) {
1031		pr_debug("removing link\n");
1032		cpumask_clear_cpu(cpu, data->cpus);
1033		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1034		kobj = &dev->kobj;
1035		cpufreq_cpu_put(data);
1036		unlock_policy_rwsem_write(cpu);
1037		sysfs_remove_link(kobj, "cpufreq");
1038		return 0;
1039	}
1040#endif
1041
1042#ifdef CONFIG_SMP
1043
1044#ifdef CONFIG_HOTPLUG_CPU
1045	strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1046			CPUFREQ_NAME_LEN);
1047#endif
1048
1049	/* if we have other CPUs still registered, we need to unlink them,
1050	 * or else wait_for_completion below will lock up. Clean the
1051	 * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1052	 * the sysfs links afterwards.
1053	 */
1054	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1055		for_each_cpu(j, data->cpus) {
1056			if (j == cpu)
1057				continue;
1058			per_cpu(cpufreq_cpu_data, j) = NULL;
1059		}
1060	}
1061
1062	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1063
1064	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1065		for_each_cpu(j, data->cpus) {
1066			if (j == cpu)
1067				continue;
1068			pr_debug("removing link for cpu %u\n", j);
1069#ifdef CONFIG_HOTPLUG_CPU
1070			strncpy(per_cpu(cpufreq_cpu_governor, j),
1071				data->governor->name, CPUFREQ_NAME_LEN);
1072#endif
1073			cpu_dev = get_cpu_device(j);
1074			kobj = &cpu_dev->kobj;
1075			unlock_policy_rwsem_write(cpu);
1076			sysfs_remove_link(kobj, "cpufreq");
1077			lock_policy_rwsem_write(cpu);
1078			cpufreq_cpu_put(data);
1079		}
1080	}
1081#else
1082	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1083#endif
1084
1085	if (cpufreq_driver->target)
1086		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1087
1088	kobj = &data->kobj;
1089	cmp = &data->kobj_unregister;
1090	unlock_policy_rwsem_write(cpu);
1091	kobject_put(kobj);
1092
1093	/* we need to make sure that the underlying kobj is actually
1094	 * not referenced anymore by anybody before we proceed with
1095	 * unloading.
1096	 */
1097	pr_debug("waiting for dropping of refcount\n");
1098	wait_for_completion(cmp);
1099	pr_debug("wait complete\n");
1100
1101	lock_policy_rwsem_write(cpu);
1102	if (cpufreq_driver->exit)
1103		cpufreq_driver->exit(data);
1104	unlock_policy_rwsem_write(cpu);
1105
1106#ifdef CONFIG_HOTPLUG_CPU
1107	/* when the CPU which is the parent of the kobj is hotplugged
1108	 * offline, check for siblings, and create cpufreq sysfs interface
1109	 * and symlinks
1110	 */
1111	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1112		/* first sibling now owns the new sysfs dir */
1113		cpumask_clear_cpu(cpu, data->cpus);
1114		cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL);
1115
1116		/* finally remove our own symlink */
1117		lock_policy_rwsem_write(cpu);
1118		__cpufreq_remove_dev(dev, sif);
1119	}
1120#endif
1121
1122	free_cpumask_var(data->related_cpus);
1123	free_cpumask_var(data->cpus);
1124	kfree(data);
1125
1126	return 0;
1127}
1128
1129
1130static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1131{
1132	unsigned int cpu = dev->id;
1133	int retval;
1134
1135	if (cpu_is_offline(cpu))
1136		return 0;
1137
1138	if (unlikely(lock_policy_rwsem_write(cpu)))
1139		BUG();
1140
1141	retval = __cpufreq_remove_dev(dev, sif);
1142	return retval;
1143}
1144
1145
1146static void handle_update(struct work_struct *work)
1147{
1148	struct cpufreq_policy *policy =
1149		container_of(work, struct cpufreq_policy, update);
1150	unsigned int cpu = policy->cpu;
1151	pr_debug("handle_update for cpu %u called\n", cpu);
1152	cpufreq_update_policy(cpu);
1153}
1154
1155/**
1156 *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1157 *	@cpu: cpu number
1158 *	@old_freq: CPU frequency the kernel thinks the CPU runs at
1159 *	@new_freq: CPU frequency the CPU actually runs at
1160 *
1161 *	We adjust to current frequency first, and need to clean up later.
1162 *	So either call to cpufreq_update_policy() or schedule handle_update()).
1163 */
1164static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1165				unsigned int new_freq)
1166{
1167	struct cpufreq_freqs freqs;
1168
1169	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1170	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1171
1172	freqs.cpu = cpu;
1173	freqs.old = old_freq;
1174	freqs.new = new_freq;
1175	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1176	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1177}
1178
1179
1180/**
1181 * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1182 * @cpu: CPU number
1183 *
1184 * This is the last known freq, without actually getting it from the driver.
1185 * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1186 */
1187unsigned int cpufreq_quick_get(unsigned int cpu)
1188{
1189	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1190	unsigned int ret_freq = 0;
1191
1192	if (policy) {
1193		ret_freq = policy->cur;
1194		cpufreq_cpu_put(policy);
1195	}
1196
1197	return ret_freq;
1198}
1199EXPORT_SYMBOL(cpufreq_quick_get);
1200
1201/**
1202 * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1203 * @cpu: CPU number
1204 *
1205 * Just return the max possible frequency for a given CPU.
1206 */
1207unsigned int cpufreq_quick_get_max(unsigned int cpu)
1208{
1209	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1210	unsigned int ret_freq = 0;
1211
1212	if (policy) {
1213		ret_freq = policy->max;
1214		cpufreq_cpu_put(policy);
1215	}
1216
1217	return ret_freq;
1218}
1219EXPORT_SYMBOL(cpufreq_quick_get_max);
1220
1221
1222static unsigned int __cpufreq_get(unsigned int cpu)
1223{
1224	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1225	unsigned int ret_freq = 0;
1226
1227	if (!cpufreq_driver->get)
1228		return ret_freq;
1229
1230	ret_freq = cpufreq_driver->get(cpu);
1231
1232	if (ret_freq && policy->cur &&
1233		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1234		/* verify no discrepancy between actual and
1235					saved value exists */
1236		if (unlikely(ret_freq != policy->cur)) {
1237			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1238			schedule_work(&policy->update);
1239		}
1240	}
1241
1242	return ret_freq;
1243}
1244
1245/**
1246 * cpufreq_get - get the current CPU frequency (in kHz)
1247 * @cpu: CPU number
1248 *
1249 * Get the CPU current (static) CPU frequency
1250 */
1251unsigned int cpufreq_get(unsigned int cpu)
1252{
1253	unsigned int ret_freq = 0;
1254	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1255
1256	if (!policy)
1257		goto out;
1258
1259	if (unlikely(lock_policy_rwsem_read(cpu)))
1260		goto out_policy;
1261
1262	ret_freq = __cpufreq_get(cpu);
1263
1264	unlock_policy_rwsem_read(cpu);
1265
1266out_policy:
1267	cpufreq_cpu_put(policy);
1268out:
1269	return ret_freq;
1270}
1271EXPORT_SYMBOL(cpufreq_get);
1272
1273static struct subsys_interface cpufreq_interface = {
1274	.name		= "cpufreq",
1275	.subsys		= &cpu_subsys,
1276	.add_dev	= cpufreq_add_dev,
1277	.remove_dev	= cpufreq_remove_dev,
1278};
1279
1280
1281/**
1282 * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1283 *
1284 * This function is only executed for the boot processor.  The other CPUs
1285 * have been put offline by means of CPU hotplug.
1286 */
1287static int cpufreq_bp_suspend(void)
1288{
1289	int ret = 0;
1290
1291	int cpu = smp_processor_id();
1292	struct cpufreq_policy *cpu_policy;
1293
1294	pr_debug("suspending cpu %u\n", cpu);
1295
1296	/* If there's no policy for the boot CPU, we have nothing to do. */
1297	cpu_policy = cpufreq_cpu_get(cpu);
1298	if (!cpu_policy)
1299		return 0;
1300
1301	if (cpufreq_driver->suspend) {
1302		ret = cpufreq_driver->suspend(cpu_policy);
1303		if (ret)
1304			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1305					"step on CPU %u\n", cpu_policy->cpu);
1306	}
1307
1308	cpufreq_cpu_put(cpu_policy);
1309	return ret;
1310}
1311
1312/**
1313 * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1314 *
1315 *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1316 *	2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1317 *	    restored. It will verify that the current freq is in sync with
1318 *	    what we believe it to be. This is a bit later than when it
1319 *	    should be, but nonethteless it's better than calling
1320 *	    cpufreq_driver->get() here which might re-enable interrupts...
1321 *
1322 * This function is only executed for the boot CPU.  The other CPUs have not
1323 * been turned on yet.
1324 */
1325static void cpufreq_bp_resume(void)
1326{
1327	int ret = 0;
1328
1329	int cpu = smp_processor_id();
1330	struct cpufreq_policy *cpu_policy;
1331
1332	pr_debug("resuming cpu %u\n", cpu);
1333
1334	/* If there's no policy for the boot CPU, we have nothing to do. */
1335	cpu_policy = cpufreq_cpu_get(cpu);
1336	if (!cpu_policy)
1337		return;
1338
1339	if (cpufreq_driver->resume) {
1340		ret = cpufreq_driver->resume(cpu_policy);
1341		if (ret) {
1342			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1343					"step on CPU %u\n", cpu_policy->cpu);
1344			goto fail;
1345		}
1346	}
1347
1348	schedule_work(&cpu_policy->update);
1349
1350fail:
1351	cpufreq_cpu_put(cpu_policy);
1352}
1353
1354static struct syscore_ops cpufreq_syscore_ops = {
1355	.suspend	= cpufreq_bp_suspend,
1356	.resume		= cpufreq_bp_resume,
1357};
1358
1359
1360/*********************************************************************
1361 *                     NOTIFIER LISTS INTERFACE                      *
1362 *********************************************************************/
1363
1364/**
1365 *	cpufreq_register_notifier - register a driver with cpufreq
1366 *	@nb: notifier function to register
1367 *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1368 *
1369 *	Add a driver to one of two lists: either a list of drivers that
1370 *      are notified about clock rate changes (once before and once after
1371 *      the transition), or a list of drivers that are notified about
1372 *      changes in cpufreq policy.
1373 *
1374 *	This function may sleep, and has the same return conditions as
1375 *	blocking_notifier_chain_register.
1376 */
1377int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1378{
1379	int ret;
1380
1381	WARN_ON(!init_cpufreq_transition_notifier_list_called);
1382
1383	switch (list) {
1384	case CPUFREQ_TRANSITION_NOTIFIER:
1385		ret = srcu_notifier_chain_register(
1386				&cpufreq_transition_notifier_list, nb);
1387		break;
1388	case CPUFREQ_POLICY_NOTIFIER:
1389		ret = blocking_notifier_chain_register(
1390				&cpufreq_policy_notifier_list, nb);
1391		break;
1392	default:
1393		ret = -EINVAL;
1394	}
1395
1396	return ret;
1397}
1398EXPORT_SYMBOL(cpufreq_register_notifier);
1399
1400
1401/**
1402 *	cpufreq_unregister_notifier - unregister a driver with cpufreq
1403 *	@nb: notifier block to be unregistered
1404 *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1405 *
1406 *	Remove a driver from the CPU frequency notifier list.
1407 *
1408 *	This function may sleep, and has the same return conditions as
1409 *	blocking_notifier_chain_unregister.
1410 */
1411int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1412{
1413	int ret;
1414
1415	switch (list) {
1416	case CPUFREQ_TRANSITION_NOTIFIER:
1417		ret = srcu_notifier_chain_unregister(
1418				&cpufreq_transition_notifier_list, nb);
1419		break;
1420	case CPUFREQ_POLICY_NOTIFIER:
1421		ret = blocking_notifier_chain_unregister(
1422				&cpufreq_policy_notifier_list, nb);
1423		break;
1424	default:
1425		ret = -EINVAL;
1426	}
1427
1428	return ret;
1429}
1430EXPORT_SYMBOL(cpufreq_unregister_notifier);
1431
1432
1433/*********************************************************************
1434 *                              GOVERNORS                            *
1435 *********************************************************************/
1436
1437
1438int __cpufreq_driver_target(struct cpufreq_policy *policy,
1439			    unsigned int target_freq,
1440			    unsigned int relation)
1441{
1442	int retval = -EINVAL;
1443
1444	pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1445		target_freq, relation);
1446	if (cpu_online(policy->cpu) && cpufreq_driver->target)
1447		retval = cpufreq_driver->target(policy, target_freq, relation);
1448
1449	return retval;
1450}
1451EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1452
1453int cpufreq_driver_target(struct cpufreq_policy *policy,
1454			  unsigned int target_freq,
1455			  unsigned int relation)
1456{
1457	int ret = -EINVAL;
1458
1459	policy = cpufreq_cpu_get(policy->cpu);
1460	if (!policy)
1461		goto no_policy;
1462
1463	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1464		goto fail;
1465
1466	ret = __cpufreq_driver_target(policy, target_freq, relation);
1467
1468	unlock_policy_rwsem_write(policy->cpu);
1469
1470fail:
1471	cpufreq_cpu_put(policy);
1472no_policy:
1473	return ret;
1474}
1475EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1476
1477int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1478{
1479	int ret = 0;
1480
1481	policy = cpufreq_cpu_get(policy->cpu);
1482	if (!policy)
1483		return -EINVAL;
1484
1485	if (cpu_online(cpu) && cpufreq_driver->getavg)
1486		ret = cpufreq_driver->getavg(policy, cpu);
1487
1488	cpufreq_cpu_put(policy);
1489	return ret;
1490}
1491EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1492
1493/*
1494 * when "event" is CPUFREQ_GOV_LIMITS
1495 */
1496
1497static int __cpufreq_governor(struct cpufreq_policy *policy,
1498					unsigned int event)
1499{
1500	int ret;
1501
1502	/* Only must be defined when default governor is known to have latency
1503	   restrictions, like e.g. conservative or ondemand.
1504	   That this is the case is already ensured in Kconfig
1505	*/
1506#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1507	struct cpufreq_governor *gov = &cpufreq_gov_performance;
1508#else
1509	struct cpufreq_governor *gov = NULL;
1510#endif
1511
1512	if (policy->governor->max_transition_latency &&
1513	    policy->cpuinfo.transition_latency >
1514	    policy->governor->max_transition_latency) {
1515		if (!gov)
1516			return -EINVAL;
1517		else {
1518			printk(KERN_WARNING "%s governor failed, too long"
1519			       " transition latency of HW, fallback"
1520			       " to %s governor\n",
1521			       policy->governor->name,
1522			       gov->name);
1523			policy->governor = gov;
1524		}
1525	}
1526
1527	if (!try_module_get(policy->governor->owner))
1528		return -EINVAL;
1529
1530	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1531						policy->cpu, event);
1532	ret = policy->governor->governor(policy, event);
1533
1534	/* we keep one module reference alive for
1535			each CPU governed by this CPU */
1536	if ((event != CPUFREQ_GOV_START) || ret)
1537		module_put(policy->governor->owner);
1538	if ((event == CPUFREQ_GOV_STOP) && !ret)
1539		module_put(policy->governor->owner);
1540
1541	return ret;
1542}
1543
1544
1545int cpufreq_register_governor(struct cpufreq_governor *governor)
1546{
1547	int err;
1548
1549	if (!governor)
1550		return -EINVAL;
1551
1552	mutex_lock(&cpufreq_governor_mutex);
1553
1554	err = -EBUSY;
1555	if (__find_governor(governor->name) == NULL) {
1556		err = 0;
1557		list_add(&governor->governor_list, &cpufreq_governor_list);
1558	}
1559
1560	mutex_unlock(&cpufreq_governor_mutex);
1561	return err;
1562}
1563EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1564
1565
1566void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1567{
1568#ifdef CONFIG_HOTPLUG_CPU
1569	int cpu;
1570#endif
1571
1572	if (!governor)
1573		return;
1574
1575#ifdef CONFIG_HOTPLUG_CPU
1576	for_each_present_cpu(cpu) {
1577		if (cpu_online(cpu))
1578			continue;
1579		if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1580			strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1581	}
1582#endif
1583
1584	mutex_lock(&cpufreq_governor_mutex);
1585	list_del(&governor->governor_list);
1586	mutex_unlock(&cpufreq_governor_mutex);
1587	return;
1588}
1589EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1590
1591
1592
1593/*********************************************************************
1594 *                          POLICY INTERFACE                         *
1595 *********************************************************************/
1596
1597/**
1598 * cpufreq_get_policy - get the current cpufreq_policy
1599 * @policy: struct cpufreq_policy into which the current cpufreq_policy
1600 *	is written
1601 *
1602 * Reads the current cpufreq policy.
1603 */
1604int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1605{
1606	struct cpufreq_policy *cpu_policy;
1607	if (!policy)
1608		return -EINVAL;
1609
1610	cpu_policy = cpufreq_cpu_get(cpu);
1611	if (!cpu_policy)
1612		return -EINVAL;
1613
1614	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1615
1616	cpufreq_cpu_put(cpu_policy);
1617	return 0;
1618}
1619EXPORT_SYMBOL(cpufreq_get_policy);
1620
1621
1622/*
1623 * data   : current policy.
1624 * policy : policy to be set.
1625 */
1626static int __cpufreq_set_policy(struct cpufreq_policy *data,
1627				struct cpufreq_policy *policy)
1628{
1629	int ret = 0;
1630
1631	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1632		policy->min, policy->max);
1633
1634	memcpy(&policy->cpuinfo, &data->cpuinfo,
1635				sizeof(struct cpufreq_cpuinfo));
1636
1637	if (policy->min > data->max || policy->max < data->min) {
1638		ret = -EINVAL;
1639		goto error_out;
1640	}
1641
1642	/* verify the cpu speed can be set within this limit */
1643	ret = cpufreq_driver->verify(policy);
1644	if (ret)
1645		goto error_out;
1646
1647	/* adjust if necessary - all reasons */
1648	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1649			CPUFREQ_ADJUST, policy);
1650
1651	/* adjust if necessary - hardware incompatibility*/
1652	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1653			CPUFREQ_INCOMPATIBLE, policy);
1654
1655	/* verify the cpu speed can be set within this limit,
1656	   which might be different to the first one */
1657	ret = cpufreq_driver->verify(policy);
1658	if (ret)
1659		goto error_out;
1660
1661	/* notification of the new policy */
1662	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1663			CPUFREQ_NOTIFY, policy);
1664
1665	data->min = policy->min;
1666	data->max = policy->max;
1667
1668	pr_debug("new min and max freqs are %u - %u kHz\n",
1669					data->min, data->max);
1670
1671	if (cpufreq_driver->setpolicy) {
1672		data->policy = policy->policy;
1673		pr_debug("setting range\n");
1674		ret = cpufreq_driver->setpolicy(policy);
1675	} else {
1676		if (policy->governor != data->governor) {
1677			/* save old, working values */
1678			struct cpufreq_governor *old_gov = data->governor;
1679
1680			pr_debug("governor switch\n");
1681
1682			/* end old governor */
1683			if (data->governor)
1684				__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1685
1686			/* start new governor */
1687			data->governor = policy->governor;
1688			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1689				/* new governor failed, so re-start old one */
1690				pr_debug("starting governor %s failed\n",
1691							data->governor->name);
1692				if (old_gov) {
1693					data->governor = old_gov;
1694					__cpufreq_governor(data,
1695							   CPUFREQ_GOV_START);
1696				}
1697				ret = -EINVAL;
1698				goto error_out;
1699			}
1700			/* might be a policy change, too, so fall through */
1701		}
1702		pr_debug("governor: change or update limits\n");
1703		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1704	}
1705
1706error_out:
1707	return ret;
1708}
1709
1710/**
1711 *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
1712 *	@cpu: CPU which shall be re-evaluated
1713 *
1714 *	Useful for policy notifiers which have different necessities
1715 *	at different times.
1716 */
1717int cpufreq_update_policy(unsigned int cpu)
1718{
1719	struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1720	struct cpufreq_policy policy;
1721	int ret;
1722
1723	if (!data) {
1724		ret = -ENODEV;
1725		goto no_policy;
1726	}
1727
1728	if (unlikely(lock_policy_rwsem_write(cpu))) {
1729		ret = -EINVAL;
1730		goto fail;
1731	}
1732
1733	pr_debug("updating policy for CPU %u\n", cpu);
1734	memcpy(&policy, data, sizeof(struct cpufreq_policy));
1735	policy.min = data->user_policy.min;
1736	policy.max = data->user_policy.max;
1737	policy.policy = data->user_policy.policy;
1738	policy.governor = data->user_policy.governor;
1739
1740	/* BIOS might change freq behind our back
1741	  -> ask driver for current freq and notify governors about a change */
1742	if (cpufreq_driver->get) {
1743		policy.cur = cpufreq_driver->get(cpu);
1744		if (!data->cur) {
1745			pr_debug("Driver did not initialize current freq");
1746			data->cur = policy.cur;
1747		} else {
1748			if (data->cur != policy.cur)
1749				cpufreq_out_of_sync(cpu, data->cur,
1750								policy.cur);
1751		}
1752	}
1753
1754	ret = __cpufreq_set_policy(data, &policy);
1755
1756	unlock_policy_rwsem_write(cpu);
1757
1758fail:
1759	cpufreq_cpu_put(data);
1760no_policy:
1761	return ret;
1762}
1763EXPORT_SYMBOL(cpufreq_update_policy);
1764
1765static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1766					unsigned long action, void *hcpu)
1767{
1768	unsigned int cpu = (unsigned long)hcpu;
1769	struct device *dev;
1770
1771	dev = get_cpu_device(cpu);
1772	if (dev) {
1773		switch (action) {
1774		case CPU_ONLINE:
1775		case CPU_ONLINE_FROZEN:
1776			cpufreq_add_dev(dev, NULL);
1777			break;
1778		case CPU_DOWN_PREPARE:
1779		case CPU_DOWN_PREPARE_FROZEN:
1780			if (unlikely(lock_policy_rwsem_write(cpu)))
1781				BUG();
1782
1783			__cpufreq_remove_dev(dev, NULL);
1784			break;
1785		case CPU_DOWN_FAILED:
1786		case CPU_DOWN_FAILED_FROZEN:
1787			cpufreq_add_dev(dev, NULL);
1788			break;
1789		}
1790	}
1791	return NOTIFY_OK;
1792}
1793
1794static struct notifier_block __refdata cpufreq_cpu_notifier = {
1795    .notifier_call = cpufreq_cpu_callback,
1796};
1797
1798/*********************************************************************
1799 *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1800 *********************************************************************/
1801
1802/**
1803 * cpufreq_register_driver - register a CPU Frequency driver
1804 * @driver_data: A struct cpufreq_driver containing the values#
1805 * submitted by the CPU Frequency driver.
1806 *
1807 *   Registers a CPU Frequency driver to this core code. This code
1808 * returns zero on success, -EBUSY when another driver got here first
1809 * (and isn't unregistered in the meantime).
1810 *
1811 */
1812int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1813{
1814	unsigned long flags;
1815	int ret;
1816
1817	if (!driver_data || !driver_data->verify || !driver_data->init ||
1818	    ((!driver_data->setpolicy) && (!driver_data->target)))
1819		return -EINVAL;
1820
1821	pr_debug("trying to register driver %s\n", driver_data->name);
1822
1823	if (driver_data->setpolicy)
1824		driver_data->flags |= CPUFREQ_CONST_LOOPS;
1825
1826	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1827	if (cpufreq_driver) {
1828		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1829		return -EBUSY;
1830	}
1831	cpufreq_driver = driver_data;
1832	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1833
1834	ret = subsys_interface_register(&cpufreq_interface);
1835	if (ret)
1836		goto err_null_driver;
1837
1838	if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1839		int i;
1840		ret = -ENODEV;
1841
1842		/* check for at least one working CPU */
1843		for (i = 0; i < nr_cpu_ids; i++)
1844			if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1845				ret = 0;
1846				break;
1847			}
1848
1849		/* if all ->init() calls failed, unregister */
1850		if (ret) {
1851			pr_debug("no CPU initialized for driver %s\n",
1852							driver_data->name);
1853			goto err_if_unreg;
1854		}
1855	}
1856
1857	register_hotcpu_notifier(&cpufreq_cpu_notifier);
1858	pr_debug("driver %s up and running\n", driver_data->name);
1859
1860	return 0;
1861err_if_unreg:
1862	subsys_interface_unregister(&cpufreq_interface);
1863err_null_driver:
1864	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1865	cpufreq_driver = NULL;
1866	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1867	return ret;
1868}
1869EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1870
1871
1872/**
1873 * cpufreq_unregister_driver - unregister the current CPUFreq driver
1874 *
1875 *    Unregister the current CPUFreq driver. Only call this if you have
1876 * the right to do so, i.e. if you have succeeded in initialising before!
1877 * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1878 * currently not initialised.
1879 */
1880int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1881{
1882	unsigned long flags;
1883
1884	if (!cpufreq_driver || (driver != cpufreq_driver))
1885		return -EINVAL;
1886
1887	pr_debug("unregistering driver %s\n", driver->name);
1888
1889	subsys_interface_unregister(&cpufreq_interface);
1890	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1891
1892	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1893	cpufreq_driver = NULL;
1894	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1895
1896	return 0;
1897}
1898EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1899
1900static int __init cpufreq_core_init(void)
1901{
1902	int cpu;
1903
1904	for_each_possible_cpu(cpu) {
1905		per_cpu(cpufreq_policy_cpu, cpu) = -1;
1906		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1907	}
1908
1909	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
1910	BUG_ON(!cpufreq_global_kobject);
1911	register_syscore_ops(&cpufreq_syscore_ops);
1912
1913	return 0;
1914}
1915core_initcall(cpufreq_core_init);
1916