1/**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#include "op_x86_model.h"
12#include "op_msr.h"
13#include "op_apic.h"
14#include "op_arch.h"
15
16#define NUM_EVENTS 39
17
18#define NUM_COUNTERS_NON_HT 8
19#define NUM_ESCRS_NON_HT 45
20#define NUM_CCCRS_NON_HT 18
21#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
22
23#define NUM_COUNTERS_HT2 4
24#define NUM_ESCRS_HT2 23
25#define NUM_CCCRS_HT2 9
26#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
27
28static unsigned int num_counters = NUM_COUNTERS_NON_HT;
29
30
31/* this has to be checked dynamically since the
32   hyper-threadedness of a chip is discovered at
33   kernel boot-time. */
34static inline void setup_num_counters(void)
35{
36#ifdef HT_SUPPORT
37	if (smp_num_siblings == 2)
38		num_counters = NUM_COUNTERS_HT2;
39#endif
40}
41
42static int inline addr_increment(void)
43{
44#ifdef HT_SUPPORT
45	return smp_num_siblings == 2 ? 2 : 1;
46#else
47	return 1;
48#endif
49}
50
51
52/* tables to simulate simplified hardware view of p4 registers */
53struct p4_counter_binding {
54	int virt_counter;
55	int counter_address;
56	int cccr_address;
57};
58
59struct p4_event_binding {
60	/* value to put in CCCR */
61	int escr_select;
62	/* value to put in ESCR */
63	int event_select;
64	struct {
65		/* for this counter... */
66		int virt_counter;
67		/* use this ESCR       */
68		int escr_address;
69	} bindings[2];
70};
71
72/* nb: these CTR_* defines are a duplicate of defines in
73   event/i386.p4*events. */
74
75
76#define CTR_BPU_0      (1 << 0)
77#define CTR_MS_0       (1 << 1)
78#define CTR_FLAME_0    (1 << 2)
79#define CTR_IQ_4       (1 << 3)
80#define CTR_BPU_2      (1 << 4)
81#define CTR_MS_2       (1 << 5)
82#define CTR_FLAME_2    (1 << 6)
83#define CTR_IQ_5       (1 << 7)
84
85static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
86	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
87	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
88	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
89	{ CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
90	{ CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
91	{ CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
92	{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
93	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
94};
95
96#define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
97
98/* All cccr we don't use. */
99static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
100	MSR_P4_BPU_CCCR1, 	MSR_P4_BPU_CCCR3,
101	MSR_P4_MS_CCCR1, 	MSR_P4_MS_CCCR3,
102	MSR_P4_FLAME_CCCR1, 	MSR_P4_FLAME_CCCR3,
103	MSR_P4_IQ_CCCR0, 	MSR_P4_IQ_CCCR1,
104	MSR_P4_IQ_CCCR2, 	MSR_P4_IQ_CCCR3
105};
106
107/* p4 event codes in libop/op_event.h are indices into this table. */
108
109static struct p4_event_binding p4_events[NUM_EVENTS] = {
110
111	{ /* BRANCH_RETIRED */
112		0x05, 0x06,
113		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
114		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
115	},
116
117	{ /* MISPRED_BRANCH_RETIRED */
118		0x04, 0x03,
119		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
120		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
121	},
122
123	{ /* TC_DELIVER_MODE */
124		0x01, 0x01,
125		{ { CTR_MS_0, MSR_P4_TC_ESCR0},
126		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
127	},
128
129	{ /* BPU_FETCH_REQUEST */
130		0x00, 0x03,
131		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
132		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
133	},
134
135	{ /* ITLB_REFERENCE */
136		0x03, 0x18,
137		{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
138		  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
139	},
140
141	{ /* MEMORY_CANCEL */
142		0x05, 0x02,
143		{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
144		  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
145	},
146
147	{ /* MEMORY_COMPLETE */
148		0x02, 0x08,
149		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
150		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
151	},
152
153	{ /* LOAD_PORT_REPLAY */
154		0x02, 0x04,
155		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
156		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
157	},
158
159	{ /* STORE_PORT_REPLAY */
160		0x02, 0x05,
161		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
162		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
163	},
164
165	{ /* MOB_LOAD_REPLAY */
166		0x02, 0x03,
167		{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
168		  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
169	},
170
171	{ /* PAGE_WALK_TYPE */
172		0x04, 0x01,
173		{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
174		  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
175	},
176
177	{ /* BSQ_CACHE_REFERENCE */
178		0x07, 0x0c,
179		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
180		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
181	},
182
183	/* intel doc vol 3 table A-1: P4 and xeon with cpuid signature < 0xf27
184	 * doen't allow MSR_FSB_ESCR1 so only counter 0 is available */
185	{ /* IOQ_ALLOCATION */
186		0x06, 0x03,
187		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
188		  { 0, 0 } }
189	},
190
191	{ /* IOQ_ACTIVE_ENTRIES */
192		0x06, 0x1a,
193		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
194		  { 0, 0 } }
195	},
196
197	{ /* FSB_DATA_ACTIVITY */
198		0x06, 0x17,
199		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
200		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
201	},
202
203	{ /* BSQ_ALLOCATION */
204		0x07, 0x05,
205		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
206		  { 0, 0 } }
207	},
208
209	{ /* BSQ_ACTIVE_ENTRIES */
210		0x07, 0x06,
211		/* FIXME intel doc don't say which ESCR1 to use, using
212		   BSU_ESCR1 is a sensible guess but will need validation */
213		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 },
214		  { 0, 0 } }
215	},
216
217	{ /* X87_ASSIST */
218		0x05, 0x03,
219		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
220		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
221	},
222
223	{ /* SSE_INPUT_ASSIST */
224		0x01, 0x34,
225		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
226		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
227	},
228
229	{ /* PACKED_SP_UOP */
230		0x01, 0x08,
231		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
232		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
233	},
234
235	{ /* PACKED_DP_UOP */
236		0x01, 0x0c,
237		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
238		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
239	},
240
241	{ /* SCALAR_SP_UOP */
242		0x01, 0x0a,
243		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
244		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
245	},
246
247	{ /* SCALAR_DP_UOP */
248		0x01, 0x0e,
249		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
250		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
251	},
252
253	{ /* 64BIT_MMX_UOP */
254		0x01, 0x02,
255		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
256		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
257	},
258
259	{ /* 128BIT_MMX_UOP */
260		0x01, 0x1a,
261		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
262		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
263	},
264
265	{ /* X87_FP_UOP */
266		0x01, 0x04,
267		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
268		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
269	},
270
271	{ /* X87_SIMD_MOVES_UOP */
272		0x01, 0x2e,
273		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
274		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
275	},
276
277	{ /* MACHINE_CLEAR */
278		0x05, 0x02,
279		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
280		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
281	},
282
283	{ /* GLOBAL_POWER_EVENTS */
284		0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
285		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
286		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
287	},
288
289	{ /* TC_MS_XFER */
290		0x00, 0x05,
291		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
292		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
293	},
294
295	{ /* UOP_QUEUE_WRITES */
296		0x00, 0x09,
297		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
298		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
299	},
300
301	{ /* FRONT_END_EVENT */
302		0x05, 0x08,
303		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
304		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
305	},
306
307	{ /* EXECUTION_EVENT */
308		0x05, 0x0c,
309		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
310		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
311	},
312
313	{ /* REPLAY_EVENT */
314		0x05, 0x09,
315		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
316		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
317	},
318
319	{ /* INSTR_RETIRED */
320		0x04, 0x02,
321		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
322		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
323	},
324
325	{ /* UOPS_RETIRED */
326		0x04, 0x01,
327		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
328		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
329	},
330
331	{ /* UOP_TYPE */
332		0x02, 0x02,
333		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
334		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
335	},
336
337	{ /* RETIRED_MISPRED_BRANCH_TYPE */
338		0x02, 0x05,
339		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
340		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
341	},
342
343	{ /* RETIRED_BRANCH_TYPE */
344		0x02, 0x04,
345		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
346		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
347	}
348};
349
350
351#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
352
353#define ESCR_RESERVED_BITS 0x80000003
354#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
355#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
356#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
357#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
358#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
359#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
360#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
361#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
362#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
363
364#define CCCR_RESERVED_BITS 0x38030FFF
365#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
366#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
367#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
368#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1 << 26))
369#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1 << 27))
370#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1 << 12))
371#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1 << 12))
372#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
373#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
374#define CCCR_OVF_P(cccr) ((cccr) & (1U << 31))
375#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U << 31)))
376
377#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
378#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
379#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
380
381
382/* this assigns a "stagger" to the current CPU, which is used throughout
383   the code in this module as an extra array offset, to select the "even"
384   or "odd" part of all the divided resources. */
385static unsigned int get_stagger(void)
386{
387#ifdef HT_SUPPORT
388	int cpu;
389	if (smp_num_siblings > 1) {
390		cpu = smp_processor_id();
391		return (cpu_sibling_map[cpu] > cpu) ? 0 : 1;
392	}
393#endif
394	return 0;
395}
396
397
398/* finally, mediate access to a real hardware counter
399   by passing a "virtual" counter numer to this macro,
400   along with your stagger setting. */
401#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
402
403
404static void p4_fill_in_addresses(struct op_msrs * const msrs)
405{
406	unsigned int i;
407	unsigned int addr, stag;
408
409	setup_num_counters();
410	stag = get_stagger();
411
412	/* the counter registers we pay attention to */
413	for (i = 0; i < num_counters; ++i) {
414		msrs->counters.addrs[i] =
415			p4_counters[VIRT_CTR(stag, i)].counter_address;
416	}
417
418	/* FIXME: bad feeling, we don't save the 10 counters we don't use. */
419
420	/* 18 CCCR registers */
421	for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
422	     addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
423		msrs->controls.addrs[i] = addr;
424	}
425
426	/* 43 ESCR registers in three or four discontiguous group */
427	for (addr = MSR_P4_BSU_ESCR0 + stag;
428	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
429		msrs->controls.addrs[i] = addr;
430	}
431
432	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
433	 * to avoid special case in nmi_{save|restore}_registers() */
434	if (boot_cpu_data.x86_model >= 0x3) {
435		for (addr = MSR_P4_BSU_ESCR0 + stag;
436		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
437			msrs->controls.addrs[i] = addr;
438		}
439	} else {
440		for (addr = MSR_P4_IQ_ESCR0 + stag;
441		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
442			msrs->controls.addrs[i] = addr;
443		}
444	}
445
446	for (addr = MSR_P4_RAT_ESCR0 + stag;
447	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
448		msrs->controls.addrs[i] = addr;
449	}
450
451	for (addr = MSR_P4_MS_ESCR0 + stag;
452	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
453		msrs->controls.addrs[i] = addr;
454	}
455
456	for (addr = MSR_P4_IX_ESCR0 + stag;
457	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
458		msrs->controls.addrs[i] = addr;
459	}
460
461	/* there are 2 remaining non-contiguously located ESCRs */
462
463	if (num_counters == NUM_COUNTERS_NON_HT) {
464		/* standard non-HT CPUs handle both remaining ESCRs*/
465		msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5;
466		msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR4;
467
468	} else if (stag == 0) {
469		/* HT CPUs give the first remainder to the even thread, as
470		   the 32nd control register */
471		msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR4;
472
473	} else {
474		/* and two copies of the second to the odd thread,
475		   for the 22st and 23nd control registers */
476		msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5;
477		msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5;
478	}
479}
480
481
482static void pmc_setup_one_p4_counter(unsigned int ctr)
483{
484	int i;
485	int const maxbind = 2;
486	unsigned int cccr = 0;
487	unsigned int escr = 0;
488	unsigned int high = 0;
489	unsigned int counter_bit;
490	struct p4_event_binding * ev = NULL;
491	unsigned int stag;
492
493	stag = get_stagger();
494
495	/* convert from counter *number* to counter *bit* */
496	counter_bit = 1 << VIRT_CTR(stag, ctr);
497
498	/* find our event binding structure. */
499	if (sysctl.ctr[ctr].event <= 0 || sysctl.ctr[ctr].event > NUM_EVENTS) {
500		printk(KERN_ERR
501		       "oprofile: P4 event code 0x%x out of range\n",
502		       sysctl.ctr[ctr].event);
503		return;
504	}
505
506	ev = &(p4_events[sysctl.ctr[ctr].event - 1]);
507
508	for (i = 0; i < maxbind; i++) {
509		if (ev->bindings[i].virt_counter & counter_bit) {
510
511			/* modify ESCR */
512			ESCR_READ(escr, high, ev, i);
513			ESCR_CLEAR(escr);
514			if (stag == 0) {
515				ESCR_SET_USR_0(escr, sysctl.ctr[ctr].user);
516				ESCR_SET_OS_0(escr, sysctl.ctr[ctr].kernel);
517			} else {
518				ESCR_SET_USR_1(escr, sysctl.ctr[ctr].user);
519				ESCR_SET_OS_1(escr, sysctl.ctr[ctr].kernel);
520			}
521			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
522			ESCR_SET_EVENT_MASK(escr, sysctl.ctr[ctr].unit_mask);
523			ESCR_WRITE(escr, high, ev, i);
524
525			/* modify CCCR */
526			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
527			CCCR_CLEAR(cccr);
528			CCCR_SET_REQUIRED_BITS(cccr);
529			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
530			if (stag == 0) {
531				CCCR_SET_PMI_OVF_0(cccr);
532			} else {
533				CCCR_SET_PMI_OVF_1(cccr);
534			}
535			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
536			return;
537		}
538	}
539
540	printk(KERN_ERR
541	       "oprofile: P4 event code 0x%x no binding, ctr %d\n",
542	       sysctl.ctr[ctr].event, ctr);
543}
544
545
546static void p4_setup_ctrs(struct op_msrs const * const msrs)
547{
548	unsigned int i;
549	unsigned int low, high;
550	unsigned int addr;
551	unsigned int stag;
552
553	stag = get_stagger();
554
555	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
556	if (!MISC_PMC_ENABLED_P(low)) {
557		printk(KERN_ERR "oprofile: P4 PMC not available\n");
558		return;
559	}
560
561	/* clear the cccrs we will use */
562	for (i = 0 ; i < num_counters ; i++) {
563		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
564		CCCR_CLEAR(low);
565		CCCR_SET_REQUIRED_BITS(low);
566		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
567	}
568
569	/* clear cccrs outside our concern */
570	for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
571		rdmsr(p4_unused_cccr[i], low, high);
572		CCCR_CLEAR(low);
573		CCCR_SET_REQUIRED_BITS(low);
574		wrmsr(p4_unused_cccr[i], low, high);
575	}
576
577	/* clear all escrs (including those outside our concern) */
578	for (addr = MSR_P4_BSU_ESCR0 + stag;
579	     addr <  MSR_P4_IQ_ESCR0; addr += addr_increment()) {
580		wrmsr(addr, 0, 0);
581	}
582
583	/* On older models clear also MSR_P4_IQ_ESCR0/1 */
584	if (boot_cpu_data.x86_model < 0x3) {
585		wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
586		wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
587	}
588
589	for (addr = MSR_P4_RAT_ESCR0 + stag;
590	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
591		wrmsr(addr, 0, 0);
592	}
593
594	for (addr = MSR_P4_MS_ESCR0 + stag;
595	     addr <= MSR_P4_TC_ESCR1; addr += addr_increment()) {
596		wrmsr(addr, 0, 0);
597	}
598
599	for (addr = MSR_P4_IX_ESCR0 + stag;
600	     addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()) {
601		wrmsr(addr, 0, 0);
602	}
603
604	if (num_counters == NUM_COUNTERS_NON_HT) {
605		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
606		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
607	} else if (stag == 0) {
608		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
609	} else {
610		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
611	}
612
613	/* setup all counters */
614	for (i = 0 ; i < num_counters ; ++i) {
615		if (sysctl.ctr[i].event) {
616			pmc_setup_one_p4_counter(i);
617			CTR_WRITE(sysctl.ctr[i].count, VIRT_CTR(stag, i));
618		}
619	}
620}
621
622
623static void p4_check_ctrs(unsigned int const cpu,
624			  struct op_msrs const * const msrs,
625			  struct pt_regs * const regs)
626{
627	unsigned long ctr, low, high, stag, real;
628	int i;
629
630	stag = get_stagger();
631
632	for (i = 0; i < num_counters; ++i) {
633
634		if (!sysctl.ctr[i].enabled)
635			continue;
636
637		/*
638		 * there is some eccentricity in the hardware which
639		 * requires that we perform 2 extra corrections:
640		 *
641		 * - check both the CCCR:OVF flag for overflow and the
642		 *   counter high bit for un-flagged overflows.
643		 *
644		 * - write the counter back twice to ensure it gets
645		 *   updated properly.
646		 *
647		 * the former seems to be related to extra NMIs happening
648		 * during the current NMI; the latter is reported as errata
649		 * N15 in intel doc 249199-029, pentium 4 specification
650		 * update, though their suggested work-around does not
651		 * appear to solve the problem.
652		 */
653
654		real = VIRT_CTR(stag, i);
655
656		CCCR_READ(low, high, real);
657 		CTR_READ(ctr, high, real);
658		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
659			op_do_profile(cpu, instruction_pointer(regs), IRQ_ENABLED(regs), i);
660 			CTR_WRITE(oprof_data[cpu].ctr_count[i], real);
661			CCCR_CLEAR_OVF(low);
662			CCCR_WRITE(low, high, real);
663 			CTR_WRITE(oprof_data[cpu].ctr_count[i], real);
664		}
665	}
666
667	/* P4 quirk: you have to re-unmask the apic vector */
668	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
669}
670
671
672static void p4_start(struct op_msrs const * const msrs)
673{
674	unsigned int low, high, stag;
675	int i;
676
677	stag = get_stagger();
678
679	for (i = 0; i < num_counters; ++i) {
680		if (!sysctl.ctr[i].enabled)
681			continue;
682		CCCR_READ(low, high, VIRT_CTR(stag, i));
683		CCCR_SET_ENABLE(low);
684		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
685	}
686}
687
688
689static void p4_stop(struct op_msrs const * const msrs)
690{
691	unsigned int low, high, stag;
692	int i;
693
694	stag = get_stagger();
695
696	for (i = 0; i < num_counters; ++i) {
697		/* FIXME: 2.6 driver doesn't do that ? */
698		if (!sysctl.ctr[i].enabled)
699			continue;
700		CCCR_READ(low, high, VIRT_CTR(stag, i));
701		CCCR_SET_DISABLE(low);
702		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
703	}
704}
705
706#ifdef HT_SUPPORT
707
708struct op_x86_model_spec const op_p4_ht2_spec = {
709	.num_counters = NUM_COUNTERS_HT2,
710	.num_controls = NUM_CONTROLS_HT2,
711	.fill_in_addresses = &p4_fill_in_addresses,
712	.setup_ctrs = &p4_setup_ctrs,
713	.check_ctrs = &p4_check_ctrs,
714	.start = &p4_start,
715	.stop = &p4_stop
716};
717#endif
718
719struct op_x86_model_spec const op_p4_spec = {
720	.num_counters = NUM_COUNTERS_NON_HT,
721	.num_controls = NUM_CONTROLS_NON_HT,
722	.fill_in_addresses = &p4_fill_in_addresses,
723	.setup_ctrs = &p4_setup_ctrs,
724	.check_ctrs = &p4_check_ctrs,
725	.start = &p4_start,
726	.stop = &p4_stop
727};
728