powernow-k8.c revision a9d3d2068064b7a6395871a49616d3784f802d50
1/*
2 *   (c) 2003-2010 Advanced Micro Devices, Inc.
3 *  Your use of this code is subject to the terms and conditions of the
4 *  GNU general public license version 2. See "COPYING" or
5 *  http://www.gnu.org/licenses/gpl.html
6 *
7 *  Support : mark.langsdorf@amd.com
8 *
9 *  Based on the powernow-k7.c module written by Dave Jones.
10 *  (C) 2003 Dave Jones on behalf of SuSE Labs
11 *  (C) 2004 Dominik Brodowski <linux@brodo.de>
12 *  (C) 2004 Pavel Machek <pavel@ucw.cz>
13 *  Licensed under the terms of the GNU GPL License version 2.
14 *  Based upon datasheets & sample CPUs kindly provided by AMD.
15 *
16 *  Valuable input gratefully received from Dave Jones, Pavel Machek,
17 *  Dominik Brodowski, Jacob Shin, and others.
18 *  Originally developed by Paul Devriendt.
19 *  Processor information obtained from Chapter 9 (Power and Thermal Management)
20 *  of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
21 *  Opteron Processors" available for download from www.amd.com
22 *
23 *  Tables for specific CPUs can be inferred from
24 *     http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
25 */
26
27#include <linux/kernel.h>
28#include <linux/smp.h>
29#include <linux/module.h>
30#include <linux/init.h>
31#include <linux/cpufreq.h>
32#include <linux/slab.h>
33#include <linux/string.h>
34#include <linux/cpumask.h>
35#include <linux/sched.h>	/* for current / set_cpus_allowed() */
36#include <linux/io.h>
37#include <linux/delay.h>
38
39#include <asm/msr.h>
40
41#include <linux/acpi.h>
42#include <linux/mutex.h>
43#include <acpi/processor.h>
44
45#define PFX "powernow-k8: "
46#define VERSION "version 2.20.00"
47#include "powernow-k8.h"
48#include "mperf.h"
49
50/* serialize freq changes  */
51static DEFINE_MUTEX(fidvid_mutex);
52
53static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
54
55static int cpu_family = CPU_OPTERON;
56
57/* core performance boost */
58static bool cpb_capable, cpb_enabled;
59static struct msr __percpu *msrs;
60
61static struct cpufreq_driver cpufreq_amd64_driver;
62
63#ifndef CONFIG_SMP
64static inline const struct cpumask *cpu_core_mask(int cpu)
65{
66	return cpumask_of(0);
67}
68#endif
69
70/* Return a frequency in MHz, given an input fid */
71static u32 find_freq_from_fid(u32 fid)
72{
73	return 800 + (fid * 100);
74}
75
76/* Return a frequency in KHz, given an input fid */
77static u32 find_khz_freq_from_fid(u32 fid)
78{
79	return 1000 * find_freq_from_fid(fid);
80}
81
82static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
83		u32 pstate)
84{
85	return data[pstate].frequency;
86}
87
88/* Return the vco fid for an input fid
89 *
90 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
91 * only from corresponding high fids. This returns "high" fid corresponding to
92 * "low" one.
93 */
94static u32 convert_fid_to_vco_fid(u32 fid)
95{
96	if (fid < HI_FID_TABLE_BOTTOM)
97		return 8 + (2 * fid);
98	else
99		return fid;
100}
101
102/*
103 * Return 1 if the pending bit is set. Unless we just instructed the processor
104 * to transition to a new state, seeing this bit set is really bad news.
105 */
106static int pending_bit_stuck(void)
107{
108	u32 lo, hi;
109
110	if (cpu_family == CPU_HW_PSTATE)
111		return 0;
112
113	rdmsr(MSR_FIDVID_STATUS, lo, hi);
114	return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
115}
116
117/*
118 * Update the global current fid / vid values from the status msr.
119 * Returns 1 on error.
120 */
121static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
122{
123	u32 lo, hi;
124	u32 i = 0;
125
126	if (cpu_family == CPU_HW_PSTATE) {
127		rdmsr(MSR_PSTATE_STATUS, lo, hi);
128		i = lo & HW_PSTATE_MASK;
129		data->currpstate = i;
130
131		/*
132		 * a workaround for family 11h erratum 311 might cause
133		 * an "out-of-range Pstate if the core is in Pstate-0
134		 */
135		if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps))
136			data->currpstate = HW_PSTATE_0;
137
138		return 0;
139	}
140	do {
141		if (i++ > 10000) {
142			pr_debug("detected change pending stuck\n");
143			return 1;
144		}
145		rdmsr(MSR_FIDVID_STATUS, lo, hi);
146	} while (lo & MSR_S_LO_CHANGE_PENDING);
147
148	data->currvid = hi & MSR_S_HI_CURRENT_VID;
149	data->currfid = lo & MSR_S_LO_CURRENT_FID;
150
151	return 0;
152}
153
154/* the isochronous relief time */
155static void count_off_irt(struct powernow_k8_data *data)
156{
157	udelay((1 << data->irt) * 10);
158	return;
159}
160
161/* the voltage stabilization time */
162static void count_off_vst(struct powernow_k8_data *data)
163{
164	udelay(data->vstable * VST_UNITS_20US);
165	return;
166}
167
168/* need to init the control msr to a safe value (for each cpu) */
169static void fidvid_msr_init(void)
170{
171	u32 lo, hi;
172	u8 fid, vid;
173
174	rdmsr(MSR_FIDVID_STATUS, lo, hi);
175	vid = hi & MSR_S_HI_CURRENT_VID;
176	fid = lo & MSR_S_LO_CURRENT_FID;
177	lo = fid | (vid << MSR_C_LO_VID_SHIFT);
178	hi = MSR_C_HI_STP_GNT_BENIGN;
179	pr_debug("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
180	wrmsr(MSR_FIDVID_CTL, lo, hi);
181}
182
183/* write the new fid value along with the other control fields to the msr */
184static int write_new_fid(struct powernow_k8_data *data, u32 fid)
185{
186	u32 lo;
187	u32 savevid = data->currvid;
188	u32 i = 0;
189
190	if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
191		printk(KERN_ERR PFX "internal error - overflow on fid write\n");
192		return 1;
193	}
194
195	lo = fid;
196	lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
197	lo |= MSR_C_LO_INIT_FID_VID;
198
199	pr_debug("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
200		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
201
202	do {
203		wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
204		if (i++ > 100) {
205			printk(KERN_ERR PFX
206				"Hardware error - pending bit very stuck - "
207				"no further pstate changes possible\n");
208			return 1;
209		}
210	} while (query_current_values_with_pending_wait(data));
211
212	count_off_irt(data);
213
214	if (savevid != data->currvid) {
215		printk(KERN_ERR PFX
216			"vid change on fid trans, old 0x%x, new 0x%x\n",
217			savevid, data->currvid);
218		return 1;
219	}
220
221	if (fid != data->currfid) {
222		printk(KERN_ERR PFX
223			"fid trans failed, fid 0x%x, curr 0x%x\n", fid,
224			data->currfid);
225		return 1;
226	}
227
228	return 0;
229}
230
231/* Write a new vid to the hardware */
232static int write_new_vid(struct powernow_k8_data *data, u32 vid)
233{
234	u32 lo;
235	u32 savefid = data->currfid;
236	int i = 0;
237
238	if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
239		printk(KERN_ERR PFX "internal error - overflow on vid write\n");
240		return 1;
241	}
242
243	lo = data->currfid;
244	lo |= (vid << MSR_C_LO_VID_SHIFT);
245	lo |= MSR_C_LO_INIT_FID_VID;
246
247	pr_debug("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
248		vid, lo, STOP_GRANT_5NS);
249
250	do {
251		wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
252		if (i++ > 100) {
253			printk(KERN_ERR PFX "internal error - pending bit "
254					"very stuck - no further pstate "
255					"changes possible\n");
256			return 1;
257		}
258	} while (query_current_values_with_pending_wait(data));
259
260	if (savefid != data->currfid) {
261		printk(KERN_ERR PFX "fid changed on vid trans, old "
262			"0x%x new 0x%x\n",
263		       savefid, data->currfid);
264		return 1;
265	}
266
267	if (vid != data->currvid) {
268		printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
269				"curr 0x%x\n",
270				vid, data->currvid);
271		return 1;
272	}
273
274	return 0;
275}
276
277/*
278 * Reduce the vid by the max of step or reqvid.
279 * Decreasing vid codes represent increasing voltages:
280 * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
281 */
282static int decrease_vid_code_by_step(struct powernow_k8_data *data,
283		u32 reqvid, u32 step)
284{
285	if ((data->currvid - reqvid) > step)
286		reqvid = data->currvid - step;
287
288	if (write_new_vid(data, reqvid))
289		return 1;
290
291	count_off_vst(data);
292
293	return 0;
294}
295
296/* Change hardware pstate by single MSR write */
297static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
298{
299	wrmsr(MSR_PSTATE_CTRL, pstate, 0);
300	data->currpstate = pstate;
301	return 0;
302}
303
304/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
305static int transition_fid_vid(struct powernow_k8_data *data,
306		u32 reqfid, u32 reqvid)
307{
308	if (core_voltage_pre_transition(data, reqvid, reqfid))
309		return 1;
310
311	if (core_frequency_transition(data, reqfid))
312		return 1;
313
314	if (core_voltage_post_transition(data, reqvid))
315		return 1;
316
317	if (query_current_values_with_pending_wait(data))
318		return 1;
319
320	if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
321		printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
322				"curr 0x%x 0x%x\n",
323				smp_processor_id(),
324				reqfid, reqvid, data->currfid, data->currvid);
325		return 1;
326	}
327
328	pr_debug("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
329		smp_processor_id(), data->currfid, data->currvid);
330
331	return 0;
332}
333
334/* Phase 1 - core voltage transition ... setup voltage */
335static int core_voltage_pre_transition(struct powernow_k8_data *data,
336		u32 reqvid, u32 reqfid)
337{
338	u32 rvosteps = data->rvo;
339	u32 savefid = data->currfid;
340	u32 maxvid, lo, rvomult = 1;
341
342	pr_debug("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
343		"reqvid 0x%x, rvo 0x%x\n",
344		smp_processor_id(),
345		data->currfid, data->currvid, reqvid, data->rvo);
346
347	if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP))
348		rvomult = 2;
349	rvosteps *= rvomult;
350	rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
351	maxvid = 0x1f & (maxvid >> 16);
352	pr_debug("ph1 maxvid=0x%x\n", maxvid);
353	if (reqvid < maxvid) /* lower numbers are higher voltages */
354		reqvid = maxvid;
355
356	while (data->currvid > reqvid) {
357		pr_debug("ph1: curr 0x%x, req vid 0x%x\n",
358			data->currvid, reqvid);
359		if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
360			return 1;
361	}
362
363	while ((rvosteps > 0) &&
364			((rvomult * data->rvo + data->currvid) > reqvid)) {
365		if (data->currvid == maxvid) {
366			rvosteps = 0;
367		} else {
368			pr_debug("ph1: changing vid for rvo, req 0x%x\n",
369				data->currvid - 1);
370			if (decrease_vid_code_by_step(data, data->currvid-1, 1))
371				return 1;
372			rvosteps--;
373		}
374	}
375
376	if (query_current_values_with_pending_wait(data))
377		return 1;
378
379	if (savefid != data->currfid) {
380		printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
381				data->currfid);
382		return 1;
383	}
384
385	pr_debug("ph1 complete, currfid 0x%x, currvid 0x%x\n",
386		data->currfid, data->currvid);
387
388	return 0;
389}
390
391/* Phase 2 - core frequency transition */
392static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
393{
394	u32 vcoreqfid, vcocurrfid, vcofiddiff;
395	u32 fid_interval, savevid = data->currvid;
396
397	if (data->currfid == reqfid) {
398		printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
399				data->currfid);
400		return 0;
401	}
402
403	pr_debug("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
404		"reqfid 0x%x\n",
405		smp_processor_id(),
406		data->currfid, data->currvid, reqfid);
407
408	vcoreqfid = convert_fid_to_vco_fid(reqfid);
409	vcocurrfid = convert_fid_to_vco_fid(data->currfid);
410	vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
411	    : vcoreqfid - vcocurrfid;
412
413	if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP))
414		vcofiddiff = 0;
415
416	while (vcofiddiff > 2) {
417		(data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
418
419		if (reqfid > data->currfid) {
420			if (data->currfid > LO_FID_TABLE_TOP) {
421				if (write_new_fid(data,
422						data->currfid + fid_interval))
423					return 1;
424			} else {
425				if (write_new_fid
426				    (data,
427				     2 + convert_fid_to_vco_fid(data->currfid)))
428					return 1;
429			}
430		} else {
431			if (write_new_fid(data, data->currfid - fid_interval))
432				return 1;
433		}
434
435		vcocurrfid = convert_fid_to_vco_fid(data->currfid);
436		vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
437		    : vcoreqfid - vcocurrfid;
438	}
439
440	if (write_new_fid(data, reqfid))
441		return 1;
442
443	if (query_current_values_with_pending_wait(data))
444		return 1;
445
446	if (data->currfid != reqfid) {
447		printk(KERN_ERR PFX
448			"ph2: mismatch, failed fid transition, "
449			"curr 0x%x, req 0x%x\n",
450			data->currfid, reqfid);
451		return 1;
452	}
453
454	if (savevid != data->currvid) {
455		printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n",
456			savevid, data->currvid);
457		return 1;
458	}
459
460	pr_debug("ph2 complete, currfid 0x%x, currvid 0x%x\n",
461		data->currfid, data->currvid);
462
463	return 0;
464}
465
466/* Phase 3 - core voltage transition flow ... jump to the final vid. */
467static int core_voltage_post_transition(struct powernow_k8_data *data,
468		u32 reqvid)
469{
470	u32 savefid = data->currfid;
471	u32 savereqvid = reqvid;
472
473	pr_debug("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
474		smp_processor_id(),
475		data->currfid, data->currvid);
476
477	if (reqvid != data->currvid) {
478		if (write_new_vid(data, reqvid))
479			return 1;
480
481		if (savefid != data->currfid) {
482			printk(KERN_ERR PFX
483			       "ph3: bad fid change, save 0x%x, curr 0x%x\n",
484			       savefid, data->currfid);
485			return 1;
486		}
487
488		if (data->currvid != reqvid) {
489			printk(KERN_ERR PFX
490			       "ph3: failed vid transition\n, "
491			       "req 0x%x, curr 0x%x",
492			       reqvid, data->currvid);
493			return 1;
494		}
495	}
496
497	if (query_current_values_with_pending_wait(data))
498		return 1;
499
500	if (savereqvid != data->currvid) {
501		pr_debug("ph3 failed, currvid 0x%x\n", data->currvid);
502		return 1;
503	}
504
505	if (savefid != data->currfid) {
506		pr_debug("ph3 failed, currfid changed 0x%x\n",
507			data->currfid);
508		return 1;
509	}
510
511	pr_debug("ph3 complete, currfid 0x%x, currvid 0x%x\n",
512		data->currfid, data->currvid);
513
514	return 0;
515}
516
517static void check_supported_cpu(void *_rc)
518{
519	u32 eax, ebx, ecx, edx;
520	int *rc = _rc;
521
522	*rc = -ENODEV;
523
524	if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD)
525		return;
526
527	eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
528	if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
529	    ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
530		return;
531
532	if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
533		if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
534		    ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
535			printk(KERN_INFO PFX
536				"Processor cpuid %x not supported\n", eax);
537			return;
538		}
539
540		eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
541		if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
542			printk(KERN_INFO PFX
543			       "No frequency change capabilities detected\n");
544			return;
545		}
546
547		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
548		if ((edx & P_STATE_TRANSITION_CAPABLE)
549			!= P_STATE_TRANSITION_CAPABLE) {
550			printk(KERN_INFO PFX
551				"Power state transitions not supported\n");
552			return;
553		}
554	} else { /* must be a HW Pstate capable processor */
555		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
556		if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
557			cpu_family = CPU_HW_PSTATE;
558		else
559			return;
560	}
561
562	*rc = 0;
563}
564
565static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
566		u8 maxvid)
567{
568	unsigned int j;
569	u8 lastfid = 0xff;
570
571	for (j = 0; j < data->numps; j++) {
572		if (pst[j].vid > LEAST_VID) {
573			printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n",
574			       j, pst[j].vid);
575			return -EINVAL;
576		}
577		if (pst[j].vid < data->rvo) {
578			/* vid + rvo >= 0 */
579			printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
580			       " %d\n", j);
581			return -ENODEV;
582		}
583		if (pst[j].vid < maxvid + data->rvo) {
584			/* vid + rvo >= maxvid */
585			printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
586			       " %d\n", j);
587			return -ENODEV;
588		}
589		if (pst[j].fid > MAX_FID) {
590			printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate"
591			       " %d\n", j);
592			return -ENODEV;
593		}
594		if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) {
595			/* Only first fid is allowed to be in "low" range */
596			printk(KERN_ERR FW_BUG PFX "two low fids - %d : "
597			       "0x%x\n", j, pst[j].fid);
598			return -EINVAL;
599		}
600		if (pst[j].fid < lastfid)
601			lastfid = pst[j].fid;
602	}
603	if (lastfid & 1) {
604		printk(KERN_ERR FW_BUG PFX "lastfid invalid\n");
605		return -EINVAL;
606	}
607	if (lastfid > LO_FID_TABLE_TOP)
608		printk(KERN_INFO FW_BUG PFX
609			"first fid not from lo freq table\n");
610
611	return 0;
612}
613
614static void invalidate_entry(struct cpufreq_frequency_table *powernow_table,
615		unsigned int entry)
616{
617	powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
618}
619
620static void print_basics(struct powernow_k8_data *data)
621{
622	int j;
623	for (j = 0; j < data->numps; j++) {
624		if (data->powernow_table[j].frequency !=
625				CPUFREQ_ENTRY_INVALID) {
626			if (cpu_family == CPU_HW_PSTATE) {
627				printk(KERN_INFO PFX
628					"   %d : pstate %d (%d MHz)\n", j,
629					data->powernow_table[j].index,
630					data->powernow_table[j].frequency/1000);
631			} else {
632				printk(KERN_INFO PFX
633					"fid 0x%x (%d MHz), vid 0x%x\n",
634					data->powernow_table[j].index & 0xff,
635					data->powernow_table[j].frequency/1000,
636					data->powernow_table[j].index >> 8);
637			}
638		}
639	}
640	if (data->batps)
641		printk(KERN_INFO PFX "Only %d pstates on battery\n",
642				data->batps);
643}
644
645static u32 freq_from_fid_did(u32 fid, u32 did)
646{
647	u32 mhz = 0;
648
649	if (boot_cpu_data.x86 == 0x10)
650		mhz = (100 * (fid + 0x10)) >> did;
651	else if (boot_cpu_data.x86 == 0x11)
652		mhz = (100 * (fid + 8)) >> did;
653	else
654		BUG();
655
656	return mhz * 1000;
657}
658
659static int fill_powernow_table(struct powernow_k8_data *data,
660		struct pst_s *pst, u8 maxvid)
661{
662	struct cpufreq_frequency_table *powernow_table;
663	unsigned int j;
664
665	if (data->batps) {
666		/* use ACPI support to get full speed on mains power */
667		printk(KERN_WARNING PFX
668			"Only %d pstates usable (use ACPI driver for full "
669			"range\n", data->batps);
670		data->numps = data->batps;
671	}
672
673	for (j = 1; j < data->numps; j++) {
674		if (pst[j-1].fid >= pst[j].fid) {
675			printk(KERN_ERR PFX "PST out of sequence\n");
676			return -EINVAL;
677		}
678	}
679
680	if (data->numps < 2) {
681		printk(KERN_ERR PFX "no p states to transition\n");
682		return -ENODEV;
683	}
684
685	if (check_pst_table(data, pst, maxvid))
686		return -EINVAL;
687
688	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
689		* (data->numps + 1)), GFP_KERNEL);
690	if (!powernow_table) {
691		printk(KERN_ERR PFX "powernow_table memory alloc failure\n");
692		return -ENOMEM;
693	}
694
695	for (j = 0; j < data->numps; j++) {
696		int freq;
697		powernow_table[j].index = pst[j].fid; /* lower 8 bits */
698		powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
699		freq = find_khz_freq_from_fid(pst[j].fid);
700		powernow_table[j].frequency = freq;
701	}
702	powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
703	powernow_table[data->numps].index = 0;
704
705	if (query_current_values_with_pending_wait(data)) {
706		kfree(powernow_table);
707		return -EIO;
708	}
709
710	pr_debug("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
711	data->powernow_table = powernow_table;
712	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
713		print_basics(data);
714
715	for (j = 0; j < data->numps; j++)
716		if ((pst[j].fid == data->currfid) &&
717		    (pst[j].vid == data->currvid))
718			return 0;
719
720	pr_debug("currfid/vid do not match PST, ignoring\n");
721	return 0;
722}
723
724/* Find and validate the PSB/PST table in BIOS. */
725static int find_psb_table(struct powernow_k8_data *data)
726{
727	struct psb_s *psb;
728	unsigned int i;
729	u32 mvs;
730	u8 maxvid;
731	u32 cpst = 0;
732	u32 thiscpuid;
733
734	for (i = 0xc0000; i < 0xffff0; i += 0x10) {
735		/* Scan BIOS looking for the signature. */
736		/* It can not be at ffff0 - it is too big. */
737
738		psb = phys_to_virt(i);
739		if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
740			continue;
741
742		pr_debug("found PSB header at 0x%p\n", psb);
743
744		pr_debug("table vers: 0x%x\n", psb->tableversion);
745		if (psb->tableversion != PSB_VERSION_1_4) {
746			printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n");
747			return -ENODEV;
748		}
749
750		pr_debug("flags: 0x%x\n", psb->flags1);
751		if (psb->flags1) {
752			printk(KERN_ERR FW_BUG PFX "unknown flags\n");
753			return -ENODEV;
754		}
755
756		data->vstable = psb->vstable;
757		pr_debug("voltage stabilization time: %d(*20us)\n",
758				data->vstable);
759
760		pr_debug("flags2: 0x%x\n", psb->flags2);
761		data->rvo = psb->flags2 & 3;
762		data->irt = ((psb->flags2) >> 2) & 3;
763		mvs = ((psb->flags2) >> 4) & 3;
764		data->vidmvs = 1 << mvs;
765		data->batps = ((psb->flags2) >> 6) & 3;
766
767		pr_debug("ramp voltage offset: %d\n", data->rvo);
768		pr_debug("isochronous relief time: %d\n", data->irt);
769		pr_debug("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
770
771		pr_debug("numpst: 0x%x\n", psb->num_tables);
772		cpst = psb->num_tables;
773		if ((psb->cpuid == 0x00000fc0) ||
774		    (psb->cpuid == 0x00000fe0)) {
775			thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
776			if ((thiscpuid == 0x00000fc0) ||
777			    (thiscpuid == 0x00000fe0))
778				cpst = 1;
779		}
780		if (cpst != 1) {
781			printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
782			return -ENODEV;
783		}
784
785		data->plllock = psb->plllocktime;
786		pr_debug("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
787		pr_debug("maxfid: 0x%x\n", psb->maxfid);
788		pr_debug("maxvid: 0x%x\n", psb->maxvid);
789		maxvid = psb->maxvid;
790
791		data->numps = psb->numps;
792		pr_debug("numpstates: 0x%x\n", data->numps);
793		return fill_powernow_table(data,
794				(struct pst_s *)(psb+1), maxvid);
795	}
796	/*
797	 * If you see this message, complain to BIOS manufacturer. If
798	 * he tells you "we do not support Linux" or some similar
799	 * nonsense, remember that Windows 2000 uses the same legacy
800	 * mechanism that the old Linux PSB driver uses. Tell them it
801	 * is broken with Windows 2000.
802	 *
803	 * The reference to the AMD documentation is chapter 9 in the
804	 * BIOS and Kernel Developer's Guide, which is available on
805	 * www.amd.com
806	 */
807	printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
808	printk(KERN_ERR PFX "Make sure that your BIOS is up to date"
809		" and Cool'N'Quiet support is enabled in BIOS setup\n");
810	return -ENODEV;
811}
812
813static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
814		unsigned int index)
815{
816	u64 control;
817
818	if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
819		return;
820
821	control = data->acpi_data.states[index].control;
822	data->irt = (control >> IRT_SHIFT) & IRT_MASK;
823	data->rvo = (control >> RVO_SHIFT) & RVO_MASK;
824	data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
825	data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK;
826	data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK);
827	data->vstable = (control >> VST_SHIFT) & VST_MASK;
828}
829
830static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
831{
832	struct cpufreq_frequency_table *powernow_table;
833	int ret_val = -ENODEV;
834	u64 control, status;
835
836	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
837		pr_debug("register performance failed: bad ACPI data\n");
838		return -EIO;
839	}
840
841	/* verify the data contained in the ACPI structures */
842	if (data->acpi_data.state_count <= 1) {
843		pr_debug("No ACPI P-States\n");
844		goto err_out;
845	}
846
847	control = data->acpi_data.control_register.space_id;
848	status = data->acpi_data.status_register.space_id;
849
850	if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
851	    (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
852		pr_debug("Invalid control/status registers (%llx - %llx)\n",
853			control, status);
854		goto err_out;
855	}
856
857	/* fill in data->powernow_table */
858	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
859		* (data->acpi_data.state_count + 1)), GFP_KERNEL);
860	if (!powernow_table) {
861		pr_debug("powernow_table memory alloc failure\n");
862		goto err_out;
863	}
864
865	/* fill in data */
866	data->numps = data->acpi_data.state_count;
867	powernow_k8_acpi_pst_values(data, 0);
868
869	if (cpu_family == CPU_HW_PSTATE)
870		ret_val = fill_powernow_table_pstate(data, powernow_table);
871	else
872		ret_val = fill_powernow_table_fidvid(data, powernow_table);
873	if (ret_val)
874		goto err_out_mem;
875
876	powernow_table[data->acpi_data.state_count].frequency =
877		CPUFREQ_TABLE_END;
878	powernow_table[data->acpi_data.state_count].index = 0;
879	data->powernow_table = powernow_table;
880
881	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
882		print_basics(data);
883
884	/* notify BIOS that we exist */
885	acpi_processor_notify_smm(THIS_MODULE);
886
887	if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
888		printk(KERN_ERR PFX
889				"unable to alloc powernow_k8_data cpumask\n");
890		ret_val = -ENOMEM;
891		goto err_out_mem;
892	}
893
894	return 0;
895
896err_out_mem:
897	kfree(powernow_table);
898
899err_out:
900	acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
901
902	/* data->acpi_data.state_count informs us at ->exit()
903	 * whether ACPI was used */
904	data->acpi_data.state_count = 0;
905
906	return ret_val;
907}
908
909static int fill_powernow_table_pstate(struct powernow_k8_data *data,
910		struct cpufreq_frequency_table *powernow_table)
911{
912	int i;
913	u32 hi = 0, lo = 0;
914	rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi);
915	data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
916
917	for (i = 0; i < data->acpi_data.state_count; i++) {
918		u32 index;
919
920		index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
921		if (index > data->max_hw_pstate) {
922			printk(KERN_ERR PFX "invalid pstate %d - "
923					"bad value %d.\n", i, index);
924			printk(KERN_ERR PFX "Please report to BIOS "
925					"manufacturer\n");
926			invalidate_entry(powernow_table, i);
927			continue;
928		}
929		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
930		if (!(hi & HW_PSTATE_VALID_MASK)) {
931			pr_debug("invalid pstate %d, ignoring\n", index);
932			invalidate_entry(powernow_table, i);
933			continue;
934		}
935
936		powernow_table[i].index = index;
937
938		/* Frequency may be rounded for these */
939		if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
940				 || boot_cpu_data.x86 == 0x11) {
941			powernow_table[i].frequency =
942				freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
943		} else
944			powernow_table[i].frequency =
945				data->acpi_data.states[i].core_frequency * 1000;
946	}
947	return 0;
948}
949
950static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
951		struct cpufreq_frequency_table *powernow_table)
952{
953	int i;
954
955	for (i = 0; i < data->acpi_data.state_count; i++) {
956		u32 fid;
957		u32 vid;
958		u32 freq, index;
959		u64 status, control;
960
961		if (data->exttype) {
962			status =  data->acpi_data.states[i].status;
963			fid = status & EXT_FID_MASK;
964			vid = (status >> VID_SHIFT) & EXT_VID_MASK;
965		} else {
966			control =  data->acpi_data.states[i].control;
967			fid = control & FID_MASK;
968			vid = (control >> VID_SHIFT) & VID_MASK;
969		}
970
971		pr_debug("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
972
973		index = fid | (vid<<8);
974		powernow_table[i].index = index;
975
976		freq = find_khz_freq_from_fid(fid);
977		powernow_table[i].frequency = freq;
978
979		/* verify frequency is OK */
980		if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
981			pr_debug("invalid freq %u kHz, ignoring\n", freq);
982			invalidate_entry(powernow_table, i);
983			continue;
984		}
985
986		/* verify voltage is OK -
987		 * BIOSs are using "off" to indicate invalid */
988		if (vid == VID_OFF) {
989			pr_debug("invalid vid %u, ignoring\n", vid);
990			invalidate_entry(powernow_table, i);
991			continue;
992		}
993
994		if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
995			printk(KERN_INFO PFX "invalid freq entries "
996				"%u kHz vs. %u kHz\n", freq,
997				(unsigned int)
998				(data->acpi_data.states[i].core_frequency
999				 * 1000));
1000			invalidate_entry(powernow_table, i);
1001			continue;
1002		}
1003	}
1004	return 0;
1005}
1006
1007static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
1008{
1009	if (data->acpi_data.state_count)
1010		acpi_processor_unregister_performance(&data->acpi_data,
1011				data->cpu);
1012	free_cpumask_var(data->acpi_data.shared_cpu_map);
1013}
1014
1015static int get_transition_latency(struct powernow_k8_data *data)
1016{
1017	int max_latency = 0;
1018	int i;
1019	for (i = 0; i < data->acpi_data.state_count; i++) {
1020		int cur_latency = data->acpi_data.states[i].transition_latency
1021			+ data->acpi_data.states[i].bus_master_latency;
1022		if (cur_latency > max_latency)
1023			max_latency = cur_latency;
1024	}
1025	if (max_latency == 0) {
1026		/*
1027		 * Fam 11h and later may return 0 as transition latency. This
1028		 * is intended and means "very fast". While cpufreq core and
1029		 * governors currently can handle that gracefully, better set it
1030		 * to 1 to avoid problems in the future.
1031		 */
1032		if (boot_cpu_data.x86 < 0x11)
1033			printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
1034				"latency\n");
1035		max_latency = 1;
1036	}
1037	/* value in usecs, needs to be in nanoseconds */
1038	return 1000 * max_latency;
1039}
1040
1041/* Take a frequency, and issue the fid/vid transition command */
1042static int transition_frequency_fidvid(struct powernow_k8_data *data,
1043		unsigned int index)
1044{
1045	u32 fid = 0;
1046	u32 vid = 0;
1047	int res, i;
1048	struct cpufreq_freqs freqs;
1049
1050	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
1051
1052	/* fid/vid correctness check for k8 */
1053	/* fid are the lower 8 bits of the index we stored into
1054	 * the cpufreq frequency table in find_psb_table, vid
1055	 * are the upper 8 bits.
1056	 */
1057	fid = data->powernow_table[index].index & 0xFF;
1058	vid = (data->powernow_table[index].index & 0xFF00) >> 8;
1059
1060	pr_debug("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
1061
1062	if (query_current_values_with_pending_wait(data))
1063		return 1;
1064
1065	if ((data->currvid == vid) && (data->currfid == fid)) {
1066		pr_debug("target matches current values (fid 0x%x, vid 0x%x)\n",
1067			fid, vid);
1068		return 0;
1069	}
1070
1071	pr_debug("cpu %d, changing to fid 0x%x, vid 0x%x\n",
1072		smp_processor_id(), fid, vid);
1073	freqs.old = find_khz_freq_from_fid(data->currfid);
1074	freqs.new = find_khz_freq_from_fid(fid);
1075
1076	for_each_cpu(i, data->available_cores) {
1077		freqs.cpu = i;
1078		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1079	}
1080
1081	res = transition_fid_vid(data, fid, vid);
1082	if (res)
1083		return res;
1084
1085	freqs.new = find_khz_freq_from_fid(data->currfid);
1086
1087	for_each_cpu(i, data->available_cores) {
1088		freqs.cpu = i;
1089		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1090	}
1091	return res;
1092}
1093
1094/* Take a frequency, and issue the hardware pstate transition command */
1095static int transition_frequency_pstate(struct powernow_k8_data *data,
1096		unsigned int index)
1097{
1098	u32 pstate = 0;
1099	int res, i;
1100	struct cpufreq_freqs freqs;
1101
1102	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
1103
1104	/* get MSR index for hardware pstate transition */
1105	pstate = index & HW_PSTATE_MASK;
1106	if (pstate > data->max_hw_pstate)
1107		return 0;
1108	freqs.old = find_khz_freq_from_pstate(data->powernow_table,
1109			data->currpstate);
1110	freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1111
1112	for_each_cpu(i, data->available_cores) {
1113		freqs.cpu = i;
1114		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1115	}
1116
1117	res = transition_pstate(data, pstate);
1118	freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1119
1120	for_each_cpu(i, data->available_cores) {
1121		freqs.cpu = i;
1122		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1123	}
1124	return res;
1125}
1126
1127/* Driver entry point to switch to the target frequency */
1128static int powernowk8_target(struct cpufreq_policy *pol,
1129		unsigned targfreq, unsigned relation)
1130{
1131	cpumask_var_t oldmask;
1132	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1133	u32 checkfid;
1134	u32 checkvid;
1135	unsigned int newstate;
1136	int ret = -EIO;
1137
1138	if (!data)
1139		return -EINVAL;
1140
1141	checkfid = data->currfid;
1142	checkvid = data->currvid;
1143
1144	/* only run on specific CPU from here on. */
1145	/* This is poor form: use a workqueue or smp_call_function_single */
1146	if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
1147		return -ENOMEM;
1148
1149	cpumask_copy(oldmask, tsk_cpus_allowed(current));
1150	set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
1151
1152	if (smp_processor_id() != pol->cpu) {
1153		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
1154		goto err_out;
1155	}
1156
1157	if (pending_bit_stuck()) {
1158		printk(KERN_ERR PFX "failing targ, change pending bit set\n");
1159		goto err_out;
1160	}
1161
1162	pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
1163		pol->cpu, targfreq, pol->min, pol->max, relation);
1164
1165	if (query_current_values_with_pending_wait(data))
1166		goto err_out;
1167
1168	if (cpu_family != CPU_HW_PSTATE) {
1169		pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
1170		data->currfid, data->currvid);
1171
1172		if ((checkvid != data->currvid) ||
1173		    (checkfid != data->currfid)) {
1174			printk(KERN_INFO PFX
1175				"error - out of sync, fix 0x%x 0x%x, "
1176				"vid 0x%x 0x%x\n",
1177				checkfid, data->currfid,
1178				checkvid, data->currvid);
1179		}
1180	}
1181
1182	if (cpufreq_frequency_table_target(pol, data->powernow_table,
1183				targfreq, relation, &newstate))
1184		goto err_out;
1185
1186	mutex_lock(&fidvid_mutex);
1187
1188	powernow_k8_acpi_pst_values(data, newstate);
1189
1190	if (cpu_family == CPU_HW_PSTATE)
1191		ret = transition_frequency_pstate(data, newstate);
1192	else
1193		ret = transition_frequency_fidvid(data, newstate);
1194	if (ret) {
1195		printk(KERN_ERR PFX "transition frequency failed\n");
1196		ret = 1;
1197		mutex_unlock(&fidvid_mutex);
1198		goto err_out;
1199	}
1200	mutex_unlock(&fidvid_mutex);
1201
1202	if (cpu_family == CPU_HW_PSTATE)
1203		pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1204				newstate);
1205	else
1206		pol->cur = find_khz_freq_from_fid(data->currfid);
1207	ret = 0;
1208
1209err_out:
1210	set_cpus_allowed_ptr(current, oldmask);
1211	free_cpumask_var(oldmask);
1212	return ret;
1213}
1214
1215/* Driver entry point to verify the policy and range of frequencies */
1216static int powernowk8_verify(struct cpufreq_policy *pol)
1217{
1218	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1219
1220	if (!data)
1221		return -EINVAL;
1222
1223	return cpufreq_frequency_table_verify(pol, data->powernow_table);
1224}
1225
1226struct init_on_cpu {
1227	struct powernow_k8_data *data;
1228	int rc;
1229};
1230
1231static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
1232{
1233	struct init_on_cpu *init_on_cpu = _init_on_cpu;
1234
1235	if (pending_bit_stuck()) {
1236		printk(KERN_ERR PFX "failing init, change pending bit set\n");
1237		init_on_cpu->rc = -ENODEV;
1238		return;
1239	}
1240
1241	if (query_current_values_with_pending_wait(init_on_cpu->data)) {
1242		init_on_cpu->rc = -ENODEV;
1243		return;
1244	}
1245
1246	if (cpu_family == CPU_OPTERON)
1247		fidvid_msr_init();
1248
1249	init_on_cpu->rc = 0;
1250}
1251
1252/* per CPU init entry point to the driver */
1253static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1254{
1255	static const char ACPI_PSS_BIOS_BUG_MSG[] =
1256		KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
1257		FW_BUG PFX "Try again with latest BIOS.\n";
1258	struct powernow_k8_data *data;
1259	struct init_on_cpu init_on_cpu;
1260	int rc;
1261	struct cpuinfo_x86 *c = &cpu_data(pol->cpu);
1262
1263	if (!cpu_online(pol->cpu))
1264		return -ENODEV;
1265
1266	smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1);
1267	if (rc)
1268		return -ENODEV;
1269
1270	data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
1271	if (!data) {
1272		printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
1273		return -ENOMEM;
1274	}
1275
1276	data->cpu = pol->cpu;
1277	data->currpstate = HW_PSTATE_INVALID;
1278
1279	if (powernow_k8_cpu_init_acpi(data)) {
1280		/*
1281		 * Use the PSB BIOS structure. This is only available on
1282		 * an UP version, and is deprecated by AMD.
1283		 */
1284		if (num_online_cpus() != 1) {
1285			printk_once(ACPI_PSS_BIOS_BUG_MSG);
1286			goto err_out;
1287		}
1288		if (pol->cpu != 0) {
1289			printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1290			       "CPU other than CPU0. Complain to your BIOS "
1291			       "vendor.\n");
1292			goto err_out;
1293		}
1294		rc = find_psb_table(data);
1295		if (rc)
1296			goto err_out;
1297
1298		/* Take a crude guess here.
1299		 * That guess was in microseconds, so multiply with 1000 */
1300		pol->cpuinfo.transition_latency = (
1301			 ((data->rvo + 8) * data->vstable * VST_UNITS_20US) +
1302			 ((1 << data->irt) * 30)) * 1000;
1303	} else /* ACPI _PSS objects available */
1304		pol->cpuinfo.transition_latency = get_transition_latency(data);
1305
1306	/* only run on specific CPU from here on */
1307	init_on_cpu.data = data;
1308	smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu,
1309				 &init_on_cpu, 1);
1310	rc = init_on_cpu.rc;
1311	if (rc != 0)
1312		goto err_out_exit_acpi;
1313
1314	if (cpu_family == CPU_HW_PSTATE)
1315		cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
1316	else
1317		cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
1318	data->available_cores = pol->cpus;
1319
1320	if (cpu_family == CPU_HW_PSTATE)
1321		pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1322				data->currpstate);
1323	else
1324		pol->cur = find_khz_freq_from_fid(data->currfid);
1325	pr_debug("policy current frequency %d kHz\n", pol->cur);
1326
1327	/* min/max the cpu is capable of */
1328	if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
1329		printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n");
1330		powernow_k8_cpu_exit_acpi(data);
1331		kfree(data->powernow_table);
1332		kfree(data);
1333		return -EINVAL;
1334	}
1335
1336	/* Check for APERF/MPERF support in hardware */
1337	if (cpu_has(c, X86_FEATURE_APERFMPERF))
1338		cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf;
1339
1340	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
1341
1342	if (cpu_family == CPU_HW_PSTATE)
1343		pr_debug("cpu_init done, current pstate 0x%x\n",
1344				data->currpstate);
1345	else
1346		pr_debug("cpu_init done, current fid 0x%x, vid 0x%x\n",
1347			data->currfid, data->currvid);
1348
1349	per_cpu(powernow_data, pol->cpu) = data;
1350
1351	return 0;
1352
1353err_out_exit_acpi:
1354	powernow_k8_cpu_exit_acpi(data);
1355
1356err_out:
1357	kfree(data);
1358	return -ENODEV;
1359}
1360
1361static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1362{
1363	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1364
1365	if (!data)
1366		return -EINVAL;
1367
1368	powernow_k8_cpu_exit_acpi(data);
1369
1370	cpufreq_frequency_table_put_attr(pol->cpu);
1371
1372	kfree(data->powernow_table);
1373	kfree(data);
1374	per_cpu(powernow_data, pol->cpu) = NULL;
1375
1376	return 0;
1377}
1378
1379static void query_values_on_cpu(void *_err)
1380{
1381	int *err = _err;
1382	struct powernow_k8_data *data = __this_cpu_read(powernow_data);
1383
1384	*err = query_current_values_with_pending_wait(data);
1385}
1386
1387static unsigned int powernowk8_get(unsigned int cpu)
1388{
1389	struct powernow_k8_data *data = per_cpu(powernow_data, cpu);
1390	unsigned int khz = 0;
1391	int err;
1392
1393	if (!data)
1394		return 0;
1395
1396	smp_call_function_single(cpu, query_values_on_cpu, &err, true);
1397	if (err)
1398		goto out;
1399
1400	if (cpu_family == CPU_HW_PSTATE)
1401		khz = find_khz_freq_from_pstate(data->powernow_table,
1402						data->currpstate);
1403	else
1404		khz = find_khz_freq_from_fid(data->currfid);
1405
1406
1407out:
1408	return khz;
1409}
1410
1411static void _cpb_toggle_msrs(bool t)
1412{
1413	int cpu;
1414
1415	get_online_cpus();
1416
1417	rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1418
1419	for_each_cpu(cpu, cpu_online_mask) {
1420		struct msr *reg = per_cpu_ptr(msrs, cpu);
1421		if (t)
1422			reg->l &= ~BIT(25);
1423		else
1424			reg->l |= BIT(25);
1425	}
1426	wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1427
1428	put_online_cpus();
1429}
1430
1431/*
1432 * Switch on/off core performance boosting.
1433 *
1434 * 0=disable
1435 * 1=enable.
1436 */
1437static void cpb_toggle(bool t)
1438{
1439	if (!cpb_capable)
1440		return;
1441
1442	if (t && !cpb_enabled) {
1443		cpb_enabled = true;
1444		_cpb_toggle_msrs(t);
1445		printk(KERN_INFO PFX "Core Boosting enabled.\n");
1446	} else if (!t && cpb_enabled) {
1447		cpb_enabled = false;
1448		_cpb_toggle_msrs(t);
1449		printk(KERN_INFO PFX "Core Boosting disabled.\n");
1450	}
1451}
1452
1453static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
1454				 size_t count)
1455{
1456	int ret = -EINVAL;
1457	unsigned long val = 0;
1458
1459	ret = strict_strtoul(buf, 10, &val);
1460	if (!ret && (val == 0 || val == 1) && cpb_capable)
1461		cpb_toggle(val);
1462	else
1463		return -EINVAL;
1464
1465	return count;
1466}
1467
1468static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
1469{
1470	return sprintf(buf, "%u\n", cpb_enabled);
1471}
1472
1473#define define_one_rw(_name) \
1474static struct freq_attr _name = \
1475__ATTR(_name, 0644, show_##_name, store_##_name)
1476
1477define_one_rw(cpb);
1478
1479static struct freq_attr *powernow_k8_attr[] = {
1480	&cpufreq_freq_attr_scaling_available_freqs,
1481	&cpb,
1482	NULL,
1483};
1484
1485static struct cpufreq_driver cpufreq_amd64_driver = {
1486	.verify		= powernowk8_verify,
1487	.target		= powernowk8_target,
1488	.bios_limit	= acpi_processor_get_bios_limit,
1489	.init		= powernowk8_cpu_init,
1490	.exit		= __devexit_p(powernowk8_cpu_exit),
1491	.get		= powernowk8_get,
1492	.name		= "powernow-k8",
1493	.owner		= THIS_MODULE,
1494	.attr		= powernow_k8_attr,
1495};
1496
1497/*
1498 * Clear the boost-disable flag on the CPU_DOWN path so that this cpu
1499 * cannot block the remaining ones from boosting. On the CPU_UP path we
1500 * simply keep the boost-disable flag in sync with the current global
1501 * state.
1502 */
1503static int cpb_notify(struct notifier_block *nb, unsigned long action,
1504		      void *hcpu)
1505{
1506	unsigned cpu = (long)hcpu;
1507	u32 lo, hi;
1508
1509	switch (action) {
1510	case CPU_UP_PREPARE:
1511	case CPU_UP_PREPARE_FROZEN:
1512
1513		if (!cpb_enabled) {
1514			rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
1515			lo |= BIT(25);
1516			wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
1517		}
1518		break;
1519
1520	case CPU_DOWN_PREPARE:
1521	case CPU_DOWN_PREPARE_FROZEN:
1522		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
1523		lo &= ~BIT(25);
1524		wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
1525		break;
1526
1527	default:
1528		break;
1529	}
1530
1531	return NOTIFY_OK;
1532}
1533
1534static struct notifier_block cpb_nb = {
1535	.notifier_call		= cpb_notify,
1536};
1537
1538/* driver entry point for init */
1539static int __cpuinit powernowk8_init(void)
1540{
1541	unsigned int i, supported_cpus = 0, cpu;
1542	int rv;
1543
1544	for_each_online_cpu(i) {
1545		int rc;
1546		smp_call_function_single(i, check_supported_cpu, &rc, 1);
1547		if (rc == 0)
1548			supported_cpus++;
1549	}
1550
1551	if (supported_cpus != num_online_cpus())
1552		return -ENODEV;
1553
1554	printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n",
1555		num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus);
1556
1557	if (boot_cpu_has(X86_FEATURE_CPB)) {
1558
1559		cpb_capable = true;
1560
1561		msrs = msrs_alloc();
1562		if (!msrs) {
1563			printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
1564			return -ENOMEM;
1565		}
1566
1567		register_cpu_notifier(&cpb_nb);
1568
1569		rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1570
1571		for_each_cpu(cpu, cpu_online_mask) {
1572			struct msr *reg = per_cpu_ptr(msrs, cpu);
1573			cpb_enabled |= !(!!(reg->l & BIT(25)));
1574		}
1575
1576		printk(KERN_INFO PFX "Core Performance Boosting: %s.\n",
1577			(cpb_enabled ? "on" : "off"));
1578	}
1579
1580	rv = cpufreq_register_driver(&cpufreq_amd64_driver);
1581	if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
1582		unregister_cpu_notifier(&cpb_nb);
1583		msrs_free(msrs);
1584		msrs = NULL;
1585	}
1586	return rv;
1587}
1588
1589/* driver entry point for term */
1590static void __exit powernowk8_exit(void)
1591{
1592	pr_debug("exit\n");
1593
1594	if (boot_cpu_has(X86_FEATURE_CPB)) {
1595		msrs_free(msrs);
1596		msrs = NULL;
1597
1598		unregister_cpu_notifier(&cpb_nb);
1599	}
1600
1601	cpufreq_unregister_driver(&cpufreq_amd64_driver);
1602}
1603
1604MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
1605		"Mark Langsdorf <mark.langsdorf@amd.com>");
1606MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
1607MODULE_LICENSE("GPL");
1608
1609late_initcall(powernowk8_init);
1610module_exit(powernowk8_exit);
1611