intel_pstate.c revision b563b4e3f2dd601e19b46ada31bd176fc0a16efc
1/*
2 * cpufreq_snb.c: Native P state management for Intel processors
3 *
4 * (C) Copyright 2012 Intel Corporation
5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 */
12
13#include <linux/kernel.h>
14#include <linux/kernel_stat.h>
15#include <linux/module.h>
16#include <linux/ktime.h>
17#include <linux/hrtimer.h>
18#include <linux/tick.h>
19#include <linux/slab.h>
20#include <linux/sched.h>
21#include <linux/list.h>
22#include <linux/cpu.h>
23#include <linux/cpufreq.h>
24#include <linux/sysfs.h>
25#include <linux/types.h>
26#include <linux/fs.h>
27#include <linux/debugfs.h>
28#include <trace/events/power.h>
29
30#include <asm/div64.h>
31#include <asm/msr.h>
32#include <asm/cpu_device_id.h>
33
34#define SAMPLE_COUNT		3
35
36#define FRAC_BITS 8
37#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
38#define fp_toint(X) ((X) >> FRAC_BITS)
39
40static inline int32_t mul_fp(int32_t x, int32_t y)
41{
42	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
43}
44
45static inline int32_t div_fp(int32_t x, int32_t y)
46{
47	return div_s64((int64_t)x << FRAC_BITS, (int64_t)y);
48}
49
50struct sample {
51	ktime_t start_time;
52	ktime_t end_time;
53	int core_pct_busy;
54	int pstate_pct_busy;
55	u64 duration_us;
56	u64 idletime_us;
57	u64 aperf;
58	u64 mperf;
59	int freq;
60};
61
62struct pstate_data {
63	int	current_pstate;
64	int	min_pstate;
65	int	max_pstate;
66	int	turbo_pstate;
67};
68
69struct _pid {
70	int setpoint;
71	int32_t integral;
72	int32_t p_gain;
73	int32_t i_gain;
74	int32_t d_gain;
75	int deadband;
76	int last_err;
77};
78
79struct cpudata {
80	int cpu;
81
82	char name[64];
83
84	struct timer_list timer;
85
86	struct pstate_adjust_policy *pstate_policy;
87	struct pstate_data pstate;
88	struct _pid pid;
89	struct _pid idle_pid;
90
91	int min_pstate_count;
92	int idle_mode;
93
94	ktime_t prev_sample;
95	u64	prev_idle_time_us;
96	u64	prev_aperf;
97	u64	prev_mperf;
98	int	sample_ptr;
99	struct sample samples[SAMPLE_COUNT];
100};
101
102static struct cpudata **all_cpu_data;
103struct pstate_adjust_policy {
104	int sample_rate_ms;
105	int deadband;
106	int setpoint;
107	int p_gain_pct;
108	int d_gain_pct;
109	int i_gain_pct;
110};
111
112static struct pstate_adjust_policy default_policy = {
113	.sample_rate_ms = 10,
114	.deadband = 0,
115	.setpoint = 109,
116	.p_gain_pct = 17,
117	.d_gain_pct = 0,
118	.i_gain_pct = 4,
119};
120
121struct perf_limits {
122	int no_turbo;
123	int max_perf_pct;
124	int min_perf_pct;
125	int32_t max_perf;
126	int32_t min_perf;
127};
128
129static struct perf_limits limits = {
130	.no_turbo = 0,
131	.max_perf_pct = 100,
132	.max_perf = int_tofp(1),
133	.min_perf_pct = 0,
134	.min_perf = 0,
135};
136
137static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
138			int deadband, int integral) {
139	pid->setpoint = setpoint;
140	pid->deadband  = deadband;
141	pid->integral  = int_tofp(integral);
142	pid->last_err  = setpoint - busy;
143}
144
145static inline void pid_p_gain_set(struct _pid *pid, int percent)
146{
147	pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
148}
149
150static inline void pid_i_gain_set(struct _pid *pid, int percent)
151{
152	pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
153}
154
155static inline void pid_d_gain_set(struct _pid *pid, int percent)
156{
157
158	pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
159}
160
161static signed int pid_calc(struct _pid *pid, int busy)
162{
163	signed int err, result;
164	int32_t pterm, dterm, fp_error;
165	int32_t integral_limit;
166
167	err = pid->setpoint - busy;
168	fp_error = int_tofp(err);
169
170	if (abs(err) <= pid->deadband)
171		return 0;
172
173	pterm = mul_fp(pid->p_gain, fp_error);
174
175	pid->integral += fp_error;
176
177	/* limit the integral term */
178	integral_limit = int_tofp(30);
179	if (pid->integral > integral_limit)
180		pid->integral = integral_limit;
181	if (pid->integral < -integral_limit)
182		pid->integral = -integral_limit;
183
184	dterm = mul_fp(pid->d_gain, (err - pid->last_err));
185	pid->last_err = err;
186
187	result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
188
189	return (signed int)fp_toint(result);
190}
191
192static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
193{
194	pid_p_gain_set(&cpu->pid, cpu->pstate_policy->p_gain_pct);
195	pid_d_gain_set(&cpu->pid, cpu->pstate_policy->d_gain_pct);
196	pid_i_gain_set(&cpu->pid, cpu->pstate_policy->i_gain_pct);
197
198	pid_reset(&cpu->pid,
199		cpu->pstate_policy->setpoint,
200		100,
201		cpu->pstate_policy->deadband,
202		0);
203}
204
205static inline void intel_pstate_idle_pid_reset(struct cpudata *cpu)
206{
207	pid_p_gain_set(&cpu->idle_pid, cpu->pstate_policy->p_gain_pct);
208	pid_d_gain_set(&cpu->idle_pid, cpu->pstate_policy->d_gain_pct);
209	pid_i_gain_set(&cpu->idle_pid, cpu->pstate_policy->i_gain_pct);
210
211	pid_reset(&cpu->idle_pid,
212		75,
213		50,
214		cpu->pstate_policy->deadband,
215		0);
216}
217
218static inline void intel_pstate_reset_all_pid(void)
219{
220	unsigned int cpu;
221	for_each_online_cpu(cpu) {
222		if (all_cpu_data[cpu])
223			intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
224	}
225}
226
227/************************** debugfs begin ************************/
228static int pid_param_set(void *data, u64 val)
229{
230	*(u32 *)data = val;
231	intel_pstate_reset_all_pid();
232	return 0;
233}
234static int pid_param_get(void *data, u64 *val)
235{
236	*val = *(u32 *)data;
237	return 0;
238}
239DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get,
240			pid_param_set, "%llu\n");
241
242struct pid_param {
243	char *name;
244	void *value;
245};
246
247static struct pid_param pid_files[] = {
248	{"sample_rate_ms", &default_policy.sample_rate_ms},
249	{"d_gain_pct", &default_policy.d_gain_pct},
250	{"i_gain_pct", &default_policy.i_gain_pct},
251	{"deadband", &default_policy.deadband},
252	{"setpoint", &default_policy.setpoint},
253	{"p_gain_pct", &default_policy.p_gain_pct},
254	{NULL, NULL}
255};
256
257static struct dentry *debugfs_parent;
258static void intel_pstate_debug_expose_params(void)
259{
260	int i = 0;
261
262	debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
263	if (IS_ERR_OR_NULL(debugfs_parent))
264		return;
265	while (pid_files[i].name) {
266		debugfs_create_file(pid_files[i].name, 0660,
267				debugfs_parent, pid_files[i].value,
268				&fops_pid_param);
269		i++;
270	}
271}
272
273/************************** debugfs end ************************/
274
275/************************** sysfs begin ************************/
276#define show_one(file_name, object)					\
277	static ssize_t show_##file_name					\
278	(struct kobject *kobj, struct attribute *attr, char *buf)	\
279	{								\
280		return sprintf(buf, "%u\n", limits.object);		\
281	}
282
283static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
284				const char *buf, size_t count)
285{
286	unsigned int input;
287	int ret;
288	ret = sscanf(buf, "%u", &input);
289	if (ret != 1)
290		return -EINVAL;
291	limits.no_turbo = clamp_t(int, input, 0 , 1);
292
293	return count;
294}
295
296static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
297				const char *buf, size_t count)
298{
299	unsigned int input;
300	int ret;
301	ret = sscanf(buf, "%u", &input);
302	if (ret != 1)
303		return -EINVAL;
304
305	limits.max_perf_pct = clamp_t(int, input, 0 , 100);
306	limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
307	return count;
308}
309
310static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
311				const char *buf, size_t count)
312{
313	unsigned int input;
314	int ret;
315	ret = sscanf(buf, "%u", &input);
316	if (ret != 1)
317		return -EINVAL;
318	limits.min_perf_pct = clamp_t(int, input, 0 , 100);
319	limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
320
321	return count;
322}
323
324show_one(no_turbo, no_turbo);
325show_one(max_perf_pct, max_perf_pct);
326show_one(min_perf_pct, min_perf_pct);
327
328define_one_global_rw(no_turbo);
329define_one_global_rw(max_perf_pct);
330define_one_global_rw(min_perf_pct);
331
332static struct attribute *intel_pstate_attributes[] = {
333	&no_turbo.attr,
334	&max_perf_pct.attr,
335	&min_perf_pct.attr,
336	NULL
337};
338
339static struct attribute_group intel_pstate_attr_group = {
340	.attrs = intel_pstate_attributes,
341};
342static struct kobject *intel_pstate_kobject;
343
344static void intel_pstate_sysfs_expose_params(void)
345{
346	int rc;
347
348	intel_pstate_kobject = kobject_create_and_add("intel_pstate",
349						&cpu_subsys.dev_root->kobj);
350	BUG_ON(!intel_pstate_kobject);
351	rc = sysfs_create_group(intel_pstate_kobject,
352				&intel_pstate_attr_group);
353	BUG_ON(rc);
354}
355
356/************************** sysfs end ************************/
357
358static int intel_pstate_min_pstate(void)
359{
360	u64 value;
361	rdmsrl(0xCE, value);
362	return (value >> 40) & 0xFF;
363}
364
365static int intel_pstate_max_pstate(void)
366{
367	u64 value;
368	rdmsrl(0xCE, value);
369	return (value >> 8) & 0xFF;
370}
371
372static int intel_pstate_turbo_pstate(void)
373{
374	u64 value;
375	int nont, ret;
376	rdmsrl(0x1AD, value);
377	nont = intel_pstate_max_pstate();
378	ret = ((value) & 255);
379	if (ret <= nont)
380		ret = nont;
381	return ret;
382}
383
384static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
385{
386	int max_perf = cpu->pstate.turbo_pstate;
387	int min_perf;
388	if (limits.no_turbo)
389		max_perf = cpu->pstate.max_pstate;
390
391	max_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
392	*max = clamp_t(int, max_perf,
393			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
394
395	min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf));
396	*min = clamp_t(int, min_perf,
397			cpu->pstate.min_pstate, max_perf);
398}
399
400static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
401{
402	int max_perf, min_perf;
403
404	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
405
406	pstate = clamp_t(int, pstate, min_perf, max_perf);
407
408	if (pstate == cpu->pstate.current_pstate)
409		return;
410
411#ifndef MODULE
412	trace_cpu_frequency(pstate * 100000, cpu->cpu);
413#endif
414	cpu->pstate.current_pstate = pstate;
415	wrmsrl(MSR_IA32_PERF_CTL, pstate << 8);
416
417}
418
419static inline void intel_pstate_pstate_increase(struct cpudata *cpu, int steps)
420{
421	int target;
422	target = cpu->pstate.current_pstate + steps;
423
424	intel_pstate_set_pstate(cpu, target);
425}
426
427static inline void intel_pstate_pstate_decrease(struct cpudata *cpu, int steps)
428{
429	int target;
430	target = cpu->pstate.current_pstate - steps;
431	intel_pstate_set_pstate(cpu, target);
432}
433
434static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
435{
436	sprintf(cpu->name, "Intel 2nd generation core");
437
438	cpu->pstate.min_pstate = intel_pstate_min_pstate();
439	cpu->pstate.max_pstate = intel_pstate_max_pstate();
440	cpu->pstate.turbo_pstate = intel_pstate_turbo_pstate();
441
442	/*
443	 * goto max pstate so we don't slow up boot if we are built-in if we are
444	 * a module we will take care of it during normal operation
445	 */
446	intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate);
447}
448
449static inline void intel_pstate_calc_busy(struct cpudata *cpu,
450					struct sample *sample)
451{
452	u64 core_pct;
453	sample->pstate_pct_busy = 100 - div64_u64(
454					sample->idletime_us * 100,
455					sample->duration_us);
456	core_pct = div64_u64(sample->aperf * 100, sample->mperf);
457	sample->freq = cpu->pstate.turbo_pstate * core_pct * 1000;
458
459	sample->core_pct_busy = div_s64((sample->pstate_pct_busy * core_pct),
460					100);
461}
462
463static inline void intel_pstate_sample(struct cpudata *cpu)
464{
465	ktime_t now;
466	u64 idle_time_us;
467	u64 aperf, mperf;
468
469	now = ktime_get();
470	idle_time_us = get_cpu_idle_time_us(cpu->cpu, NULL);
471
472	rdmsrl(MSR_IA32_APERF, aperf);
473	rdmsrl(MSR_IA32_MPERF, mperf);
474	/* for the first sample, don't actually record a sample, just
475	 * set the baseline */
476	if (cpu->prev_idle_time_us > 0) {
477		cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
478		cpu->samples[cpu->sample_ptr].start_time = cpu->prev_sample;
479		cpu->samples[cpu->sample_ptr].end_time = now;
480		cpu->samples[cpu->sample_ptr].duration_us =
481			ktime_us_delta(now, cpu->prev_sample);
482		cpu->samples[cpu->sample_ptr].idletime_us =
483			idle_time_us - cpu->prev_idle_time_us;
484
485		cpu->samples[cpu->sample_ptr].aperf = aperf;
486		cpu->samples[cpu->sample_ptr].mperf = mperf;
487		cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf;
488		cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf;
489
490		intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]);
491	}
492
493	cpu->prev_sample = now;
494	cpu->prev_idle_time_us = idle_time_us;
495	cpu->prev_aperf = aperf;
496	cpu->prev_mperf = mperf;
497}
498
499static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
500{
501	int sample_time, delay;
502
503	sample_time = cpu->pstate_policy->sample_rate_ms;
504	delay = msecs_to_jiffies(sample_time);
505	delay -= jiffies % delay;
506	mod_timer_pinned(&cpu->timer, jiffies + delay);
507}
508
509static inline void intel_pstate_idle_mode(struct cpudata *cpu)
510{
511	cpu->idle_mode = 1;
512}
513
514static inline void intel_pstate_normal_mode(struct cpudata *cpu)
515{
516	cpu->idle_mode = 0;
517}
518
519static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu)
520{
521	int32_t busy_scaled;
522	int32_t core_busy, turbo_pstate, current_pstate;
523
524	core_busy = int_tofp(cpu->samples[cpu->sample_ptr].core_pct_busy);
525	turbo_pstate = int_tofp(cpu->pstate.turbo_pstate);
526	current_pstate = int_tofp(cpu->pstate.current_pstate);
527	busy_scaled = mul_fp(core_busy, div_fp(turbo_pstate, current_pstate));
528
529	return fp_toint(busy_scaled);
530}
531
532static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
533{
534	int busy_scaled;
535	struct _pid *pid;
536	signed int ctl = 0;
537	int steps;
538
539	pid = &cpu->pid;
540	busy_scaled = intel_pstate_get_scaled_busy(cpu);
541
542	ctl = pid_calc(pid, busy_scaled);
543
544	steps = abs(ctl);
545	if (ctl < 0)
546		intel_pstate_pstate_increase(cpu, steps);
547	else
548		intel_pstate_pstate_decrease(cpu, steps);
549}
550
551static inline void intel_pstate_adjust_idle_pstate(struct cpudata *cpu)
552{
553	int busy_scaled;
554	struct _pid *pid;
555	int ctl = 0;
556	int steps;
557
558	pid = &cpu->idle_pid;
559
560	busy_scaled = intel_pstate_get_scaled_busy(cpu);
561
562	ctl = pid_calc(pid, 100 - busy_scaled);
563
564	steps = abs(ctl);
565	if (ctl < 0)
566		intel_pstate_pstate_decrease(cpu, steps);
567	else
568		intel_pstate_pstate_increase(cpu, steps);
569
570	if (cpu->pstate.current_pstate == cpu->pstate.min_pstate)
571		intel_pstate_normal_mode(cpu);
572}
573
574static void intel_pstate_timer_func(unsigned long __data)
575{
576	struct cpudata *cpu = (struct cpudata *) __data;
577
578	intel_pstate_sample(cpu);
579
580	if (!cpu->idle_mode)
581		intel_pstate_adjust_busy_pstate(cpu);
582	else
583		intel_pstate_adjust_idle_pstate(cpu);
584
585#if defined(XPERF_FIX)
586	if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) {
587		cpu->min_pstate_count++;
588		if (!(cpu->min_pstate_count % 5)) {
589			intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate);
590			intel_pstate_idle_mode(cpu);
591		}
592	} else
593		cpu->min_pstate_count = 0;
594#endif
595	intel_pstate_set_sample_time(cpu);
596}
597
598#define ICPU(model, policy) \
599	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&policy }
600
601static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
602	ICPU(0x2a, default_policy),
603	ICPU(0x2d, default_policy),
604	{}
605};
606MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
607
608static int intel_pstate_init_cpu(unsigned int cpunum)
609{
610
611	const struct x86_cpu_id *id;
612	struct cpudata *cpu;
613
614	id = x86_match_cpu(intel_pstate_cpu_ids);
615	if (!id)
616		return -ENODEV;
617
618	all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL);
619	if (!all_cpu_data[cpunum])
620		return -ENOMEM;
621
622	cpu = all_cpu_data[cpunum];
623
624	intel_pstate_get_cpu_pstates(cpu);
625
626	cpu->cpu = cpunum;
627	cpu->pstate_policy =
628		(struct pstate_adjust_policy *)id->driver_data;
629	init_timer_deferrable(&cpu->timer);
630	cpu->timer.function = intel_pstate_timer_func;
631	cpu->timer.data =
632		(unsigned long)cpu;
633	cpu->timer.expires = jiffies + HZ/100;
634	intel_pstate_busy_pid_reset(cpu);
635	intel_pstate_idle_pid_reset(cpu);
636	intel_pstate_sample(cpu);
637	intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate);
638
639	add_timer_on(&cpu->timer, cpunum);
640
641	pr_info("Intel pstate controlling: cpu %d\n", cpunum);
642
643	return 0;
644}
645
646static unsigned int intel_pstate_get(unsigned int cpu_num)
647{
648	struct sample *sample;
649	struct cpudata *cpu;
650
651	cpu = all_cpu_data[cpu_num];
652	if (!cpu)
653		return 0;
654	sample = &cpu->samples[cpu->sample_ptr];
655	return sample->freq;
656}
657
658static int intel_pstate_set_policy(struct cpufreq_policy *policy)
659{
660	struct cpudata *cpu;
661	int min, max;
662
663	cpu = all_cpu_data[policy->cpu];
664
665	if (!policy->cpuinfo.max_freq)
666		return -ENODEV;
667
668	intel_pstate_get_min_max(cpu, &min, &max);
669
670	limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
671	limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100);
672	limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
673
674	limits.max_perf_pct = policy->max * 100 / policy->cpuinfo.max_freq;
675	limits.max_perf_pct = clamp_t(int, limits.max_perf_pct, 0 , 100);
676	limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
677
678	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
679		limits.min_perf_pct = 100;
680		limits.min_perf = int_tofp(1);
681		limits.max_perf_pct = 100;
682		limits.max_perf = int_tofp(1);
683		limits.no_turbo = 0;
684	}
685
686	return 0;
687}
688
689static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
690{
691	cpufreq_verify_within_limits(policy,
692				policy->cpuinfo.min_freq,
693				policy->cpuinfo.max_freq);
694
695	if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
696		(policy->policy != CPUFREQ_POLICY_PERFORMANCE))
697		return -EINVAL;
698
699	return 0;
700}
701
702static int __cpuinit intel_pstate_cpu_exit(struct cpufreq_policy *policy)
703{
704	int cpu = policy->cpu;
705
706	del_timer(&all_cpu_data[cpu]->timer);
707	kfree(all_cpu_data[cpu]);
708	all_cpu_data[cpu] = NULL;
709	return 0;
710}
711
712static int __cpuinit intel_pstate_cpu_init(struct cpufreq_policy *policy)
713{
714	int rc, min_pstate, max_pstate;
715	struct cpudata *cpu;
716
717	rc = intel_pstate_init_cpu(policy->cpu);
718	if (rc)
719		return rc;
720
721	cpu = all_cpu_data[policy->cpu];
722
723	if (!limits.no_turbo &&
724		limits.min_perf_pct == 100 && limits.max_perf_pct == 100)
725		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
726	else
727		policy->policy = CPUFREQ_POLICY_POWERSAVE;
728
729	intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate);
730	policy->min = min_pstate * 100000;
731	policy->max = max_pstate * 100000;
732
733	/* cpuinfo and default policy values */
734	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000;
735	policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000;
736	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
737	cpumask_set_cpu(policy->cpu, policy->cpus);
738
739	return 0;
740}
741
742static struct cpufreq_driver intel_pstate_driver = {
743	.flags		= CPUFREQ_CONST_LOOPS,
744	.verify		= intel_pstate_verify_policy,
745	.setpolicy	= intel_pstate_set_policy,
746	.get		= intel_pstate_get,
747	.init		= intel_pstate_cpu_init,
748	.exit		= intel_pstate_cpu_exit,
749	.name		= "intel_pstate",
750	.owner		= THIS_MODULE,
751};
752
753static int __initdata no_load;
754
755static int intel_pstate_msrs_not_valid(void)
756{
757	/* Check that all the msr's we are using are valid. */
758	u64 aperf, mperf, tmp;
759
760	rdmsrl(MSR_IA32_APERF, aperf);
761	rdmsrl(MSR_IA32_MPERF, mperf);
762
763	if (!intel_pstate_min_pstate() ||
764		!intel_pstate_max_pstate() ||
765		!intel_pstate_turbo_pstate())
766		return -ENODEV;
767
768	rdmsrl(MSR_IA32_APERF, tmp);
769	if (!(tmp - aperf))
770		return -ENODEV;
771
772	rdmsrl(MSR_IA32_MPERF, tmp);
773	if (!(tmp - mperf))
774		return -ENODEV;
775
776	return 0;
777}
778static int __init intel_pstate_init(void)
779{
780	int cpu, rc = 0;
781	const struct x86_cpu_id *id;
782
783	if (no_load)
784		return -ENODEV;
785
786	id = x86_match_cpu(intel_pstate_cpu_ids);
787	if (!id)
788		return -ENODEV;
789
790	if (intel_pstate_msrs_not_valid())
791		return -ENODEV;
792
793	pr_info("Intel P-state driver initializing.\n");
794
795	all_cpu_data = vmalloc(sizeof(void *) * num_possible_cpus());
796	if (!all_cpu_data)
797		return -ENOMEM;
798	memset(all_cpu_data, 0, sizeof(void *) * num_possible_cpus());
799
800	rc = cpufreq_register_driver(&intel_pstate_driver);
801	if (rc)
802		goto out;
803
804	intel_pstate_debug_expose_params();
805	intel_pstate_sysfs_expose_params();
806	return rc;
807out:
808	get_online_cpus();
809	for_each_online_cpu(cpu) {
810		if (all_cpu_data[cpu]) {
811			del_timer_sync(&all_cpu_data[cpu]->timer);
812			kfree(all_cpu_data[cpu]);
813		}
814	}
815
816	put_online_cpus();
817	vfree(all_cpu_data);
818	return -ENODEV;
819}
820device_initcall(intel_pstate_init);
821
822static int __init intel_pstate_setup(char *str)
823{
824	if (!str)
825		return -EINVAL;
826
827	if (!strcmp(str, "disable"))
828		no_load = 1;
829	return 0;
830}
831early_param("intel_pstate", intel_pstate_setup);
832
833MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
834MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
835MODULE_LICENSE("GPL");
836